feat: Enhances language sorting and interwiki feature

Adds functionality to sort languages by user activity with overriding via environment variables.
Implements fetching and integration of interwiki links using the MediaWiki API.
Introduces UI elements for language selection with new styling.

Improves user experience by prioritizing more active languages and providing easy navigation via interwiki links.

Fixes #45
This commit is contained in:
Kumi 2024-12-05 13:25:41 +01:00
parent 01b63f7a82
commit cfc88a5c4e
Signed by: kumi
GPG key ID: ECBCC9082395383F
3 changed files with 243 additions and 6 deletions

View file

@ -123,6 +123,90 @@ logger.debug(
)
# Get number of active Wikipedia users for each language
def get_active_users() -> Dict[str, int]:
"""Fetch the number of active Wikipedia users for each language.
Returns:
Dict[str, int]: A dictionary mapping language codes to the number of active Wikipedia users.
"""
path = "/w/api.php?action=query&format=json&meta=siteinfo&siprop=statistics"
active_users = {}
for lang, data in app.languages.items():
try:
url = f"{data['projects']['wiki']}{path}"
with urllib.request.urlopen(url) as response:
data = json.loads(response.read().decode())
active_users[lang] = data["query"]["statistics"]["activeusers"]
except Exception as e:
logger.error(f"Error fetching active users for {lang}: {e}")
return sorted(active_users.items(), key=lambda x: x[1], reverse=True)
if os.environ.get("NO_LANGSORT", False):
LANGSORT = []
elif os.environ.get("LANGSORT") == "auto":
LANGSORT = [lang for lang, _ in get_active_users()[:50]]
elif os.environ.get("LANGSORT"):
LANGSORT = os.environ["LANGSORT"].split(",")
else:
# Opinionated sorting of languages
LANGSORT = [
"en",
"es",
"ja",
"de",
"fr",
"zh",
"ru",
"it",
"pt",
"pl",
"nl",
"ar",
]
def langsort(input: list[dict], key: str = "lang") -> list[dict]:
"""Sorting of language data.
Sorts a list of dictionaries containing "lang" keys such that the most common languages are first.
Allows specifying a custom order using the `LANGSORT` environment variable.
Args:
input (list[dict]): A list of dictionaries containing "lang" keys.
Returns:
list[dict]: The sorted list of dictionaries.
"""
if not LANGSORT:
return input
output = []
for lang in LANGSORT:
for item in input:
if item[key] == lang:
output.append(item)
for item in input:
if item[key] not in LANGSORT:
output.append(item)
return output
logger.debug("Initialized language sort order")
app_languages = [{"lang": lang, "name": data["name"]} for lang, data in app.languages.items()]
app_languages = langsort(app_languages)
app.languages = {lang: app.languages[lang] for lang in [lang["lang"] for lang in app_languages]}
def render_template(*args, **kwargs) -> Text:
"""A wrapper around Flask's `render_template` that adds the `languages` and `wikimedia_projects` context variables.
@ -243,18 +327,16 @@ def inbound_redirect(domain: str, url: str) -> Union[Text, Response, Tuple[Text,
Returns:
Response: A redirect to the corresponding route
"""
# TODO: Make this the default route scheme instead of a redirect
for language, language_projects in app.languages.items():
for project_name, project_url in language_projects["projects"].items():
if project_url == f"https://{domain}":
return redirect(
f"{url_for('home')}{project_name}/{language}/{url}"
)
return redirect(f"{url_for('home')}{project_name}/{language}/{url}")
for project_name, project_url in app.languages["special"]["projects"].items():
if project_url == f"https://{domain}":
return redirect(
f"{url_for('home')}/{project_name}/{language}/{url}"
)
return redirect(f"{url_for('home')}/{project_name}/{language}/{url}")
# TODO / IDEA: Handle non-Wikimedia Mediawiki projects here?
@ -267,6 +349,7 @@ def inbound_redirect(domain: str, url: str) -> Union[Text, Response, Tuple[Text,
404,
)
@app.route("/<project>/<lang>/wiki/<path:title>")
def wiki_article(
project: str, lang: str, title: str
@ -308,6 +391,54 @@ def wiki_article(
headers=HEADERS,
)
logger.debug(f"Request URL: {api_request.full_url}")
# Use the MediaWiki API to fetch interwiki links
api_request_interwiki = urllib.request.Request(
f"{base_url}/w/api.php?action=query&format=json&titles={escape(quote(title.replace(' ', '_')), True)}&prop=langlinks&lllimit=500",
headers=HEADERS,
)
with urllib.request.urlopen(api_request_interwiki) as response:
logger.debug(
f"Tried to fetch interwiki links from {api_request_interwiki.full_url}"
)
data = json.loads(response.read().decode())
langlinks = data["query"]["pages"].popitem()[1].get("langlinks", [])
logger.debug(f"Original Interwiki links: {langlinks}")
interwiki = []
# Translate the interwiki links to internal links where possible
for link in langlinks:
try:
interwiki_lang = link["lang"]
interwiki_title = link["*"]
logger.debug(
f"Generating interwiki link for: {interwiki_lang}.{project}/{interwiki_title}"
)
interwiki_url = url_for(
"wiki_article",
project=project,
lang=interwiki_lang,
title=interwiki_title,
)
link["url"] = interwiki_url
link["langname"] = app.languages[interwiki_lang]["name"]
interwiki.append(link)
except KeyError as e:
logger.error(
f"Error processing interwiki link for title {title} in language {lang}: {e}"
)
interwiki = langsort(interwiki)
# Add the `variant` header if the `variant` query parameter is present
# This is used to fetch articles in a specific script variant (https://www.mediawiki.org/wiki/Writing_systems/LanguageConverter)
if request.args.get("variant", None):
@ -490,6 +621,7 @@ def wiki_article(
project=project,
rtl=rtl,
license=license,
interwiki=interwiki,
)

View file

@ -1228,4 +1228,91 @@ Currently blocked by implementation of comments retrieval in the backend
.side-box-text {
font-size: 0.8em;
}
}
/* Language selector styling */
.language-selector {
position: relative;
display: inline-block;
}
.language-selector-toggle {
display: none;
}
.language-selector-label {
cursor: pointer;
display: inline-block;
padding: 0.5em;
border: 1px solid #ccc;
border-radius: 0.25em;
}
.language-selector-label-text {
display: inline-block;
margin-right: 0.5em;
}
.language-selector-label-icon {
display: inline-block;
width: 0;
height: 0;
border-left: 0.25em solid transparent;
border-right: 0.25em solid transparent;
border-top: 0.25em solid #333;
}
.language-selector-menu {
display: none;
position: absolute;
top: 100%;
left: 0;
z-index: 1;
background-color: #fff;
border: 1px solid #ccc;
border-radius: 0.25em;
}
.language-selector-toggle:checked + .language-selector-label + .language-selector-menu {
display: block;
}
.language-selector-list {
list-style-type: none;
margin: 0;
padding: 0;
}
.language-selector-item {
border-top: 1px solid #ccc;
}
.language-selector-link {
display: block;
padding: 0.5em;
text-decoration: none;
color: #333;
}
.language-selector-link:hover {
background-color: #f0f0f0;
}
.language-selector-link:active {
background-color: #e0e0e0;
}
.language-selector-link:focus {
outline: 1px dotted #333;
outline: 5px auto -webkit-focus-ring-color;
}
.language-selector-link:active,
.language-selector-link:focus {
outline: none;
}
.language-selector-link:active,
.language-selector-link:focus {
outline: none;
}

View file

@ -2,6 +2,24 @@
{% block content %}
<h1 class="title{% if rtl %} title-rtl{% endif %}">{{ title }}</h1>
{% if interwiki %}
<div class="language-selector">
<input type="checkbox" id="language-selector-toggle" class="language-selector-toggle">
<label for="language-selector-toggle" class="language-selector-label">
<span class="language-selector-label-text">Language</span>
<span class="language-selector-label-icon"></span>
</label>
<div class="language-selector-menu">
<ul class="language-selector-list">
{% for lang in interwiki %}
<li class="language-selector-item">
<a href="{{ lang.url }}" class="language-selector-link">{{ lang.langname }}</a>
</li>
{% endfor %}
</ul>
</div>
</div>
{% endif %}
{{ content|safe }}
{% endblock %}