Kumi
c436885cbc
Added initial setup for "Wikimore", a simple frontend for Wikimedia projects using Flask. The app includes the following features: - Multi-language and multi-project support - Search functionality with results displayed - Proxy support for Wikimedia images - Basic structure and templates (home, article, search results) Configured appropriate .gitignore and .vscode settings for development. Licensed under MIT License.
119 lines
3.8 KiB
Python
119 lines
3.8 KiB
Python
from flask import Flask, render_template, request, redirect, url_for
|
|
import urllib.request
|
|
from urllib.parse import urlencode
|
|
from html import escape
|
|
import json
|
|
from bs4 import BeautifulSoup
|
|
|
|
app = Flask(__name__)
|
|
|
|
WIKIMEDIA_PROJECTS = {
|
|
"wikipedia": "wikipedia.org",
|
|
"wiktionary": "wiktionary.org",
|
|
# TODO: Add more Wikimedia projects
|
|
}
|
|
|
|
|
|
def get_proxy_url(url):
|
|
if url.startswith("//"):
|
|
url = "https:" + url
|
|
|
|
if not url.startswith("https://upload.wikimedia.org/"):
|
|
return url
|
|
|
|
return f"/proxy?{urlencode({'url': url})}"
|
|
|
|
|
|
@app.route("/proxy")
|
|
def proxy():
|
|
url = request.args.get("url")
|
|
with urllib.request.urlopen(url) as response:
|
|
data = response.read()
|
|
return data
|
|
|
|
|
|
@app.route("/")
|
|
def home():
|
|
return render_template("home.html")
|
|
|
|
|
|
@app.route("/search", methods=["GET", "POST"])
|
|
def search():
|
|
if request.method == "POST":
|
|
query = request.form["query"]
|
|
lang = request.form["lang"]
|
|
project = request.form["project"]
|
|
return redirect(
|
|
url_for("search_results", project=project, lang=lang, query=query)
|
|
)
|
|
return render_template("search.html")
|
|
|
|
|
|
@app.route("/<project>/<lang>/wiki/<title>")
|
|
def wiki_article(project, lang, title):
|
|
base_url = WIKIMEDIA_PROJECTS.get(project, "wikipedia.org")
|
|
url = f"https://{lang}.{base_url}/w/api.php?action=query&format=json&titles={escape(title.replace(" ", "_"), True)}&prop=revisions&rvprop=content&rvparse=1"
|
|
with urllib.request.urlopen(url) as response:
|
|
data = json.loads(response.read().decode())
|
|
pages = data["query"]["pages"]
|
|
article_html = next(iter(pages.values()))["revisions"][0]["*"]
|
|
|
|
soup = BeautifulSoup(article_html, "html.parser")
|
|
for a in soup.find_all("a", href=True):
|
|
href = a["href"]
|
|
if href.startswith("/wiki/"):
|
|
a["href"] = f"/{project}/{lang}{href}"
|
|
|
|
elif href.startswith("//") or href.startswith("https://"):
|
|
parts = href.split("/")
|
|
if len(parts) > 4:
|
|
target_project = ".".join(parts[2].split(".")[1:])
|
|
target_lang = parts[2].split(".")[0]
|
|
target_title = "/".join(parts[4:])
|
|
if target_project in WIKIMEDIA_PROJECTS.values():
|
|
target_project = list(WIKIMEDIA_PROJECTS.keys())[
|
|
list(WIKIMEDIA_PROJECTS.values()).index(target_project)
|
|
]
|
|
a["href"] = f"/{target_project}/{target_lang}/wiki/{target_title}"
|
|
|
|
for span in soup.find_all("span", class_="mw-editsection"):
|
|
span.decompose()
|
|
|
|
for style in soup.find_all("style"):
|
|
style.decompose()
|
|
|
|
for img in soup.find_all("img"):
|
|
img["src"] = get_proxy_url(img["src"])
|
|
|
|
for li in soup.find_all("li"):
|
|
# If "nv-view", "nv-talk", "nv-edit" classes are on the li element, remove it
|
|
if any(cls in li.get("class", []) for cls in ["nv-view", "nv-talk", "nv-edit"]):
|
|
li.decompose()
|
|
|
|
processed_html = str(soup)
|
|
return render_template("article.html", title=title, content=processed_html)
|
|
|
|
|
|
@app.route("/<project>/<lang>/search/<query>")
|
|
def search_results(project, lang, query):
|
|
base_url = WIKIMEDIA_PROJECTS.get(project, "wikipedia.org")
|
|
url = f"https://{lang}.{base_url}/w/api.php?action=query&format=json&list=search&srsearch={query}"
|
|
with urllib.request.urlopen(url) as response:
|
|
data = json.loads(response.read().decode())
|
|
search_results = data["query"]["search"]
|
|
return render_template(
|
|
"search_results.html",
|
|
query=query,
|
|
search_results=search_results,
|
|
project=project,
|
|
lang=lang,
|
|
)
|
|
|
|
|
|
@app.route("/<project>/<lang>/wiki/Special:Search/<query>")
|
|
def search_redirect(project, lang, query):
|
|
return redirect(url_for("search_results", project=project, lang=lang, query=query))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app.run(debug=True)
|