From cfc88a5c4e57dddcce6b3ed8b7083d06fa2cd234 Mon Sep 17 00:00:00 2001 From: Kumi Date: Thu, 5 Dec 2024 13:25:41 +0100 Subject: [PATCH] feat: Enhances language sorting and interwiki feature Adds functionality to sort languages by user activity with overriding via environment variables. Implements fetching and integration of interwiki links using the MediaWiki API. Introduces UI elements for language selection with new styling. Improves user experience by prioritizing more active languages and providing easy navigation via interwiki links. Fixes #45 --- src/wikimore/app.py | 144 ++++++++++++++++++++++++++-- src/wikimore/static/css/style.css | 87 +++++++++++++++++ src/wikimore/templates/article.html | 18 ++++ 3 files changed, 243 insertions(+), 6 deletions(-) diff --git a/src/wikimore/app.py b/src/wikimore/app.py index ca43457..b11eca5 100644 --- a/src/wikimore/app.py +++ b/src/wikimore/app.py @@ -123,6 +123,90 @@ logger.debug( ) +# Get number of active Wikipedia users for each language +def get_active_users() -> Dict[str, int]: + """Fetch the number of active Wikipedia users for each language. + + Returns: + Dict[str, int]: A dictionary mapping language codes to the number of active Wikipedia users. + """ + path = "/w/api.php?action=query&format=json&meta=siteinfo&siprop=statistics" + + active_users = {} + + for lang, data in app.languages.items(): + try: + url = f"{data['projects']['wiki']}{path}" + with urllib.request.urlopen(url) as response: + data = json.loads(response.read().decode()) + active_users[lang] = data["query"]["statistics"]["activeusers"] + except Exception as e: + logger.error(f"Error fetching active users for {lang}: {e}") + + return sorted(active_users.items(), key=lambda x: x[1], reverse=True) + + +if os.environ.get("NO_LANGSORT", False): + LANGSORT = [] +elif os.environ.get("LANGSORT") == "auto": + LANGSORT = [lang for lang, _ in get_active_users()[:50]] +elif os.environ.get("LANGSORT"): + LANGSORT = os.environ["LANGSORT"].split(",") +else: + # Opinionated sorting of languages + LANGSORT = [ + "en", + "es", + "ja", + "de", + "fr", + "zh", + "ru", + "it", + "pt", + "pl", + "nl", + "ar", + ] + +def langsort(input: list[dict], key: str = "lang") -> list[dict]: + """Sorting of language data. + + Sorts a list of dictionaries containing "lang" keys such that the most common languages are first. + + Allows specifying a custom order using the `LANGSORT` environment variable. + + Args: + input (list[dict]): A list of dictionaries containing "lang" keys. + + Returns: + list[dict]: The sorted list of dictionaries. + """ + + if not LANGSORT: + return input + + output = [] + + for lang in LANGSORT: + for item in input: + if item[key] == lang: + output.append(item) + + for item in input: + if item[key] not in LANGSORT: + output.append(item) + + return output + +logger.debug("Initialized language sort order") + +app_languages = [{"lang": lang, "name": data["name"]} for lang, data in app.languages.items()] +app_languages = langsort(app_languages) + +app.languages = {lang: app.languages[lang] for lang in [lang["lang"] for lang in app_languages]} + + def render_template(*args, **kwargs) -> Text: """A wrapper around Flask's `render_template` that adds the `languages` and `wikimedia_projects` context variables. @@ -243,18 +327,16 @@ def inbound_redirect(domain: str, url: str) -> Union[Text, Response, Tuple[Text, Returns: Response: A redirect to the corresponding route """ + # TODO: Make this the default route scheme instead of a redirect + for language, language_projects in app.languages.items(): for project_name, project_url in language_projects["projects"].items(): if project_url == f"https://{domain}": - return redirect( - f"{url_for('home')}{project_name}/{language}/{url}" - ) + return redirect(f"{url_for('home')}{project_name}/{language}/{url}") for project_name, project_url in app.languages["special"]["projects"].items(): if project_url == f"https://{domain}": - return redirect( - f"{url_for('home')}/{project_name}/{language}/{url}" - ) + return redirect(f"{url_for('home')}/{project_name}/{language}/{url}") # TODO / IDEA: Handle non-Wikimedia Mediawiki projects here? @@ -267,6 +349,7 @@ def inbound_redirect(domain: str, url: str) -> Union[Text, Response, Tuple[Text, 404, ) + @app.route("///wiki/") def wiki_article( project: str, lang: str, title: str @@ -308,6 +391,54 @@ def wiki_article( headers=HEADERS, ) + logger.debug(f"Request URL: {api_request.full_url}") + + # Use the MediaWiki API to fetch interwiki links + api_request_interwiki = urllib.request.Request( + f"{base_url}/w/api.php?action=query&format=json&titles={escape(quote(title.replace(' ', '_')), True)}&prop=langlinks&lllimit=500", + headers=HEADERS, + ) + + with urllib.request.urlopen(api_request_interwiki) as response: + logger.debug( + f"Tried to fetch interwiki links from {api_request_interwiki.full_url}" + ) + data = json.loads(response.read().decode()) + langlinks = data["query"]["pages"].popitem()[1].get("langlinks", []) + + logger.debug(f"Original Interwiki links: {langlinks}") + + interwiki = [] + + # Translate the interwiki links to internal links where possible + for link in langlinks: + try: + interwiki_lang = link["lang"] + interwiki_title = link["*"] + + logger.debug( + f"Generating interwiki link for: {interwiki_lang}.{project}/{interwiki_title}" + ) + + interwiki_url = url_for( + "wiki_article", + project=project, + lang=interwiki_lang, + title=interwiki_title, + ) + link["url"] = interwiki_url + + link["langname"] = app.languages[interwiki_lang]["name"] + + interwiki.append(link) + + except KeyError as e: + logger.error( + f"Error processing interwiki link for title {title} in language {lang}: {e}" + ) + + interwiki = langsort(interwiki) + # Add the `variant` header if the `variant` query parameter is present # This is used to fetch articles in a specific script variant (https://www.mediawiki.org/wiki/Writing_systems/LanguageConverter) if request.args.get("variant", None): @@ -490,6 +621,7 @@ def wiki_article( project=project, rtl=rtl, license=license, + interwiki=interwiki, ) diff --git a/src/wikimore/static/css/style.css b/src/wikimore/static/css/style.css index f1a9b9e..1ab7b5b 100644 --- a/src/wikimore/static/css/style.css +++ b/src/wikimore/static/css/style.css @@ -1228,4 +1228,91 @@ Currently blocked by implementation of comments retrieval in the backend .side-box-text { font-size: 0.8em; } +} + +/* Language selector styling */ +.language-selector { + position: relative; + display: inline-block; +} + +.language-selector-toggle { + display: none; +} + +.language-selector-label { + cursor: pointer; + display: inline-block; + padding: 0.5em; + border: 1px solid #ccc; + border-radius: 0.25em; +} + +.language-selector-label-text { + display: inline-block; + margin-right: 0.5em; +} + +.language-selector-label-icon { + display: inline-block; + width: 0; + height: 0; + border-left: 0.25em solid transparent; + border-right: 0.25em solid transparent; + border-top: 0.25em solid #333; +} + +.language-selector-menu { + display: none; + position: absolute; + top: 100%; + left: 0; + z-index: 1; + background-color: #fff; + border: 1px solid #ccc; + border-radius: 0.25em; +} + +.language-selector-toggle:checked + .language-selector-label + .language-selector-menu { + display: block; +} + +.language-selector-list { + list-style-type: none; + margin: 0; + padding: 0; +} + +.language-selector-item { + border-top: 1px solid #ccc; +} + +.language-selector-link { + display: block; + padding: 0.5em; + text-decoration: none; + color: #333; +} + +.language-selector-link:hover { + background-color: #f0f0f0; +} + +.language-selector-link:active { + background-color: #e0e0e0; +} + +.language-selector-link:focus { + outline: 1px dotted #333; + outline: 5px auto -webkit-focus-ring-color; +} + +.language-selector-link:active, +.language-selector-link:focus { + outline: none; +} + +.language-selector-link:active, +.language-selector-link:focus { + outline: none; } \ No newline at end of file diff --git a/src/wikimore/templates/article.html b/src/wikimore/templates/article.html index cc3e581..48c43c3 100644 --- a/src/wikimore/templates/article.html +++ b/src/wikimore/templates/article.html @@ -2,6 +2,24 @@ {% block content %}

{{ title }}

+ {% if interwiki %} +
+ + +
+ +
+
+ {% endif %} {{ content|safe }} {% endblock %}