feat: Enhances wiki article info fetching

Refactors to integrate fetching article info, badges, and interwiki links in a single request. Adds support to display category members and improves interwiki link processing by translating links where feasible. Adds error handling for interwiki processing. Updates HTML template to display category members.

Improves performance and reduces external requests, enhancing user navigation and data retrieval efficiency.

Fixes #19.
This commit is contained in:
Kumi 2024-12-06 08:35:28 +01:00
parent 4479a109b3
commit de15cbdb75
Signed by: kumi
GPG key ID: ECBCC9082395383F
2 changed files with 100 additions and 54 deletions

View file

@ -392,25 +392,55 @@ def wiki_article(
logger.debug(f"Fetching {title} from {base_url}") logger.debug(f"Fetching {title} from {base_url}")
api_request = urllib.request.Request( # Check if the article is something we need to handle differently
f"{base_url}/api/rest_v1/page/html/{escape(quote(title.replace(' ', '_')), True).replace('/', '%2F')}", info_api_request = urllib.request.Request(
headers=HEADERS, f"{base_url}/w/api.php?action=query&format=json&titles={escape(quote(title.replace(' ', '_')), True)}&prop=info|pageprops|categoryinfo|langlinks&lllimit=500",
)
logger.debug(f"Request URL: {api_request.full_url}")
# Use the MediaWiki API to fetch any badges for the article
api_request_badges = urllib.request.Request(
f"{base_url}/w/api.php?action=query&format=json&titles={escape(quote(title.replace(' ', '_')), True)}&prop=pageprops",
headers=HEADERS, headers=HEADERS,
) )
category_members = []
interwiki = []
badges = [] badges = []
with urllib.request.urlopen(api_request_badges) as response: with urllib.request.urlopen(info_api_request) as response:
logger.debug(f"Tried to fetch badges from {api_request_badges.full_url}") logger.debug(
f"Tried to fetch info for {title} from {info_api_request.full_url}"
)
data = json.loads(response.read().decode()) data = json.loads(response.read().decode())
page = data["query"]["pages"].popitem()[1] page = data["query"]["pages"].popitem()[1]
langlinks = page.get("langlinks", [])
logger.debug(f"Original Interwiki links for {title}: {langlinks}")
# Get interwiki links and translate them to internal links where possible
for link in langlinks:
try:
interwiki_lang = link["lang"]
interwiki_title = link["*"]
logger.debug(
f"Generating interwiki link for: {interwiki_lang}.{project}/{interwiki_title}"
)
interwiki_url = url_for(
"wiki_article",
project=project,
lang=interwiki_lang,
title=interwiki_title,
)
link["url"] = interwiki_url
link["langname"] = app.languages[interwiki_lang]["name"]
interwiki.append(link)
except KeyError as e:
logger.error(
f"Error processing interwiki link for title {title} in language {lang}: {e}"
)
# Get badges (e.g. "Good Article", "Featured Article")
props = page.get("pageprops", {}) props = page.get("pageprops", {})
for prop in props: for prop in props:
@ -418,6 +448,7 @@ def wiki_article(
try: try:
badge_id = prop.replace("wikibase-badge-", "") badge_id = prop.replace("wikibase-badge-", "")
# Fetch the badge data from Wikidata
badge_request = urllib.request.Request( badge_request = urllib.request.Request(
f"https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&ids={badge_id}&languages={lang}", f"https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&ids={badge_id}&languages={lang}",
headers=HEADERS, headers=HEADERS,
@ -449,52 +480,58 @@ def wiki_article(
except Exception as e: except Exception as e:
logger.error(f"Error fetching badge {prop}: {e}") logger.error(f"Error fetching badge {prop}: {e}")
# Use the MediaWiki API to fetch interwiki links # If the article is a category, fetch the category members
api_request_interwiki = urllib.request.Request( if "categoryinfo" in page:
f"{base_url}/w/api.php?action=query&format=json&titles={escape(quote(title.replace(' ', '_')), True)}&prop=langlinks&lllimit=500", category_api_url = f"{base_url}/w/api.php?action=query&format=json&list=categorymembers&cmtitle={escape(quote(title.replace(' ', '_')), True)}&cmlimit=500"
category_api_request = urllib.request.Request(
category_api_url,
headers=HEADERS, headers=HEADERS,
) )
with urllib.request.urlopen(api_request_interwiki) as response: all_members = []
with urllib.request.urlopen(category_api_request) as category_api_response:
logger.debug( logger.debug(
f"Tried to fetch interwiki links from {api_request_interwiki.full_url}" f"Tried to fetch category members for {title} from {category_api_request.full_url}"
) )
data = json.loads(response.read().decode()) data = json.loads(category_api_response.read().decode())
langlinks = data["query"]["pages"].popitem()[1].get("langlinks", []) category_members = data["query"]["categorymembers"]
all_members += category_members
logger.debug(f"Original Interwiki links: {langlinks}") if "continue" in data:
continue_params = f"&cmcontinue={data['continue']['cmcontinue']}"
interwiki = [] category_api_request = urllib.request.Request(
category_api_url + continue_params,
# Translate the interwiki links to internal links where possible headers=HEADERS,
for link in langlinks:
try:
interwiki_lang = link["lang"]
interwiki_title = link["*"]
logger.debug(
f"Generating interwiki link for: {interwiki_lang}.{project}/{interwiki_title}"
) )
interwiki_url = url_for( with urllib.request.urlopen(
category_api_request
) as category_api_response:
data = json.loads(category_api_response.read().decode())
all_members += data["query"]["categorymembers"]
category_members = all_members
for member in category_members:
member["url"] = url_for(
"wiki_article", "wiki_article",
project=project, project=project,
lang=interwiki_lang, lang=lang,
title=interwiki_title, title=member["title"],
)
link["url"] = interwiki_url
link["langname"] = app.languages[interwiki_lang]["name"]
interwiki.append(link)
except KeyError as e:
logger.error(
f"Error processing interwiki link for title {title} in language {lang}: {e}"
) )
interwiki = langsort(interwiki) interwiki = langsort(interwiki)
# Prepare the API request to fetch the article content
api_request = urllib.request.Request(
f"{base_url}/api/rest_v1/page/html/{escape(quote(title.replace(' ', '_')), True).replace('/', '%2F')}",
headers=HEADERS,
)
logger.debug(f"Article content URL: {api_request.full_url}")
# Add the `variant` header if the `variant` query parameter is present # Add the `variant` header if the `variant` query parameter is present
# This is used to fetch articles in a specific script variant (https://www.mediawiki.org/wiki/Writing_systems/LanguageConverter) # This is used to fetch articles in a specific script variant (https://www.mediawiki.org/wiki/Writing_systems/LanguageConverter)
if request.args.get("variant", None): if request.args.get("variant", None):
@ -679,6 +716,7 @@ def wiki_article(
license=license, license=license,
interwiki=interwiki, interwiki=interwiki,
badges=badges, badges=badges,
category_members=category_members,
) )

View file

@ -32,6 +32,14 @@
</div> </div>
{% endif %} {% endif %}
{{ content|safe }} {{ content|safe }}
{% if category_members %}
<h2>Pages in category "{{ title }}"</h2>
<ul>
{% for member in category_members %}
<li><a href="{{ member.url }}">{{ member.title }}</a></li>
{% endfor %}
</ul>
{% endif %}
{% endblock %} {% endblock %}
{% block license %} {% block license %}