From c436885cbc460efeb24a18c26639d1155ca0c448 Mon Sep 17 00:00:00 2001 From: Kumi Date: Thu, 11 Jul 2024 12:25:19 +0200 Subject: [PATCH] feat: initial setup of Wikimore Flask app with basic features Added initial setup for "Wikimore", a simple frontend for Wikimedia projects using Flask. The app includes the following features: - Multi-language and multi-project support - Search functionality with results displayed - Proxy support for Wikimedia images - Basic structure and templates (home, article, search results) Configured appropriate .gitignore and .vscode settings for development. Licensed under MIT License. --- .gitignore | 4 ++ .vscode/launch.json | 26 ++++++++ .vscode/settings.json | 5 ++ LICENSE | 19 ++++++ README.md | 27 ++++++++ app.py | 119 ++++++++++++++++++++++++++++++++++ requirements.txt | 2 + templates/article.html | 6 ++ templates/base.html | 110 +++++++++++++++++++++++++++++++ templates/home.html | 6 ++ templates/search_results.html | 13 ++++ 11 files changed, 337 insertions(+) create mode 100644 .gitignore create mode 100644 .vscode/launch.json create mode 100644 .vscode/settings.json create mode 100644 LICENSE create mode 100644 README.md create mode 100644 app.py create mode 100644 requirements.txt create mode 100644 templates/article.html create mode 100644 templates/base.html create mode 100644 templates/home.html create mode 100644 templates/search_results.html diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..22ebee5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +venv/ +.venv/ +__pycache__/ +*.pyc \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..9a3dfce --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,26 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Flask", + "type": "debugpy", + "request": "launch", + "module": "flask", + "env": { + "FLASK_APP": "app.py", + "FLASK_DEBUG": "1", + }, + "args": [ + "run", + "--no-debugger", + "--no-reload", + "--port=8109" + ], + "jinja": true, + "autoStartBrowser": false + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..85b4294 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.associations": { + "*.html": "jinja-html" + } +} \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..3788438 --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2024 Private.coffee Team + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..3d5cba4 --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +# Wikimore - A simple frontend for Wikimedia projects + +Wikimore is a simple frontend for Wikimedia projects. It uses the MediaWiki API to fetch data from Wikimedia projects and display it in a user-friendly way. It is built using Flask. + +This project is still in development and more features will be added in the future. It is useful for anyone who wants to access Wikimedia projects with a more basic frontend, or to provide access to Wikimedia projects to users who cannot access them directly, for example due to state censorship. + +## Features + +- Multi-language support (currently English and German, more can and will be added) +- Multi-project support (currently Wikipedia and Wiktionary, more can and will be added) +- Search functionality +- Proxy support for Wikimedia images + +## Installation + +1. Clone the repository +2. Install the required packages using `pip install -r requirements.txt` +3. Run the app using `python app.py` + +## Usage + +1. Open your browser and navigate to `http://localhost:5000` +2. Use the search bar to search for articles on a given Wikimedia project, in a given language + +## License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. diff --git a/app.py b/app.py new file mode 100644 index 0000000..7e3642a --- /dev/null +++ b/app.py @@ -0,0 +1,119 @@ +from flask import Flask, render_template, request, redirect, url_for +import urllib.request +from urllib.parse import urlencode +from html import escape +import json +from bs4 import BeautifulSoup + +app = Flask(__name__) + +WIKIMEDIA_PROJECTS = { + "wikipedia": "wikipedia.org", + "wiktionary": "wiktionary.org", + # TODO: Add more Wikimedia projects +} + + +def get_proxy_url(url): + if url.startswith("//"): + url = "https:" + url + + if not url.startswith("https://upload.wikimedia.org/"): + return url + + return f"/proxy?{urlencode({'url': url})}" + + +@app.route("/proxy") +def proxy(): + url = request.args.get("url") + with urllib.request.urlopen(url) as response: + data = response.read() + return data + + +@app.route("/") +def home(): + return render_template("home.html") + + +@app.route("/search", methods=["GET", "POST"]) +def search(): + if request.method == "POST": + query = request.form["query"] + lang = request.form["lang"] + project = request.form["project"] + return redirect( + url_for("search_results", project=project, lang=lang, query=query) + ) + return render_template("search.html") + + +@app.route("///wiki/") +def wiki_article(project, lang, title): + base_url = WIKIMEDIA_PROJECTS.get(project, "wikipedia.org") + url = f"https://{lang}.{base_url}/w/api.php?action=query&format=json&titles={escape(title.replace(" ", "_"), True)}&prop=revisions&rvprop=content&rvparse=1" + with urllib.request.urlopen(url) as response: + data = json.loads(response.read().decode()) + pages = data["query"]["pages"] + article_html = next(iter(pages.values()))["revisions"][0]["*"] + + soup = BeautifulSoup(article_html, "html.parser") + for a in soup.find_all("a", href=True): + href = a["href"] + if href.startswith("/wiki/"): + a["href"] = f"/{project}/{lang}{href}" + + elif href.startswith("//") or href.startswith("https://"): + parts = href.split("/") + if len(parts) > 4: + target_project = ".".join(parts[2].split(".")[1:]) + target_lang = parts[2].split(".")[0] + target_title = "/".join(parts[4:]) + if target_project in WIKIMEDIA_PROJECTS.values(): + target_project = list(WIKIMEDIA_PROJECTS.keys())[ + list(WIKIMEDIA_PROJECTS.values()).index(target_project) + ] + a["href"] = f"/{target_project}/{target_lang}/wiki/{target_title}" + + for span in soup.find_all("span", class_="mw-editsection"): + span.decompose() + + for style in soup.find_all("style"): + style.decompose() + + for img in soup.find_all("img"): + img["src"] = get_proxy_url(img["src"]) + + for li in soup.find_all("li"): + # If "nv-view", "nv-talk", "nv-edit" classes are on the li element, remove it + if any(cls in li.get("class", []) for cls in ["nv-view", "nv-talk", "nv-edit"]): + li.decompose() + + processed_html = str(soup) + return render_template("article.html", title=title, content=processed_html) + + +@app.route("/<project>/<lang>/search/<query>") +def search_results(project, lang, query): + base_url = WIKIMEDIA_PROJECTS.get(project, "wikipedia.org") + url = f"https://{lang}.{base_url}/w/api.php?action=query&format=json&list=search&srsearch={query}" + with urllib.request.urlopen(url) as response: + data = json.loads(response.read().decode()) + search_results = data["query"]["search"] + return render_template( + "search_results.html", + query=query, + search_results=search_results, + project=project, + lang=lang, + ) + + +@app.route("/<project>/<lang>/wiki/Special:Search/<query>") +def search_redirect(project, lang, query): + return redirect(url_for("search_results", project=project, lang=lang, query=query)) + + +if __name__ == "__main__": + app.run(debug=True) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2581330 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +flask +bs4 \ No newline at end of file diff --git a/templates/article.html b/templates/article.html new file mode 100644 index 0000000..e485e1f --- /dev/null +++ b/templates/article.html @@ -0,0 +1,6 @@ +{% extends "base.html" %} + +{% block content %} + <h1>{{ title }}</h1> + <p>{{ content|safe }}</p> +{% endblock %} \ No newline at end of file diff --git a/templates/base.html b/templates/base.html new file mode 100644 index 0000000..268f451 --- /dev/null +++ b/templates/base.html @@ -0,0 +1,110 @@ +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <title>{{ title }}{% if title %} ‐ {% endif %}Wikimore + + + + +
+ {% block content %}{% endblock %} +
+ + + \ No newline at end of file diff --git a/templates/home.html b/templates/home.html new file mode 100644 index 0000000..6401233 --- /dev/null +++ b/templates/home.html @@ -0,0 +1,6 @@ +{% extends "base.html" %} + +{% block content %} +

Welcome to Wikimore

+

Use the search form above to find articles on Wikipedia.

+{% endblock %} \ No newline at end of file diff --git a/templates/search_results.html b/templates/search_results.html new file mode 100644 index 0000000..811fe8c --- /dev/null +++ b/templates/search_results.html @@ -0,0 +1,13 @@ +{% extends "base.html" %} + +{% block content %} +

Search Results for "{{ query }}"

+
    + {% for result in search_results %} +
  • + {{ result['title'] }} +

    {{ result['snippet']|safe }}

    +
  • + {% endfor %} +
+{% endblock %} \ No newline at end of file