forked from PrivateCoffee/wikimore
feat(app): enhance article rendering with project/lang params
Extended the home route to accept optional project and language parameters to dynamically render content based on these inputs. Improved handling within the article fetching function, including validating projects and languages, adding variant headers, and following redirects conditionally. Updated links to accommodate internal, external, and interwiki links, with structured checks for main page redirections. Removed redundant meta links and edit sections to clean up the rendered content. Added proxying for images and videos, converted category elements to links, and improved class assignments for reference links. Ensured right-to-left languages are handled appropriately. Fixes #29.
This commit is contained in:
parent
219a36cad8
commit
872157139a
1 changed files with 43 additions and 13 deletions
|
@ -192,13 +192,13 @@ def proxy() -> bytes:
|
||||||
|
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def home() -> Text:
|
def home(project=None, lang=None) -> Text:
|
||||||
"""Renders the home page.
|
"""Renders the home page.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Text: The rendered home page.
|
Text: The rendered home page.
|
||||||
"""
|
"""
|
||||||
return render_template("home.html")
|
return render_template("home.html", project=project, lang=lang)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/search", methods=["GET", "POST"])
|
@app.route("/search", methods=["GET", "POST"])
|
||||||
|
@ -247,6 +247,7 @@ def wiki_article(
|
||||||
Returns:
|
Returns:
|
||||||
str|Response|Tuple[str, int]: The rendered article, a redirect to another article, or an error message with a status code.
|
str|Response|Tuple[str, int]: The rendered article, a redirect to another article, or an error message with a status code.
|
||||||
"""
|
"""
|
||||||
|
# Check if the project and language are valid
|
||||||
language_projects = app.languages.get(lang, {}).get("projects", {})
|
language_projects = app.languages.get(lang, {}).get("projects", {})
|
||||||
base_url = language_projects.get(project)
|
base_url = language_projects.get(project)
|
||||||
|
|
||||||
|
@ -271,9 +272,12 @@ def wiki_article(
|
||||||
headers=HEADERS,
|
headers=HEADERS,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Add the `variant` header if the `variant` query parameter is present
|
||||||
|
# This is used to fetch articles in a specific script variant (https://www.mediawiki.org/wiki/Writing_systems/LanguageConverter)
|
||||||
if request.args.get("variant", None):
|
if request.args.get("variant", None):
|
||||||
api_request.add_header("Accept-Language", f"{request.args['variant']}")
|
api_request.add_header("Accept-Language", f"{request.args['variant']}")
|
||||||
|
|
||||||
|
# Fetch the article content
|
||||||
try:
|
try:
|
||||||
with urllib.request.urlopen(api_request) as response:
|
with urllib.request.urlopen(api_request) as response:
|
||||||
article_html = response.read().decode()
|
article_html = response.read().decode()
|
||||||
|
@ -313,10 +317,11 @@ def wiki_article(
|
||||||
soup = BeautifulSoup(article_html, "html.parser")
|
soup = BeautifulSoup(article_html, "html.parser")
|
||||||
body = soup.find("div", class_="mw-body-content")
|
body = soup.find("div", class_="mw-body-content")
|
||||||
|
|
||||||
|
# Turn the body into a div
|
||||||
body.name = "div"
|
body.name = "div"
|
||||||
|
|
||||||
|
# If the article is a redirect, follow the redirect, unless the `redirect=no` query parameter is present
|
||||||
redirect_message = soup.find("div", class_="redirectMsg")
|
redirect_message = soup.find("div", class_="redirectMsg")
|
||||||
|
|
||||||
if redirect_message and not (request.args.get("redirect") == "no"):
|
if redirect_message and not (request.args.get("redirect") == "no"):
|
||||||
redirect_dest = redirect_message.find("a")["title"]
|
redirect_dest = redirect_message.find("a")["title"]
|
||||||
logger.debug(f"Redirecting to {redirect_dest}")
|
logger.debug(f"Redirecting to {redirect_dest}")
|
||||||
|
@ -326,44 +331,64 @@ def wiki_article(
|
||||||
logger.debug(f"Redirect URL: {destination}")
|
logger.debug(f"Redirect URL: {destination}")
|
||||||
return redirect(destination)
|
return redirect(destination)
|
||||||
|
|
||||||
|
# Update links to other articles
|
||||||
for a in soup.find_all("a", href=True) + soup.find_all("area", href=True):
|
for a in soup.find_all("a", href=True) + soup.find_all("area", href=True):
|
||||||
href = a["href"]
|
href = a["href"]
|
||||||
|
|
||||||
|
# Internal links
|
||||||
if href.startswith("/wiki/"):
|
if href.startswith("/wiki/"):
|
||||||
a["href"] = f"/{project}/{lang}{href}"
|
a["href"] = f"/{project}/{lang}{href}"
|
||||||
|
|
||||||
|
# External links
|
||||||
elif href.startswith("//") or href.startswith("https://"):
|
elif href.startswith("//") or href.startswith("https://"):
|
||||||
parts = urlparse(href)
|
parts = urlparse(href)
|
||||||
|
|
||||||
target_domain = f"https://{parts.netloc}"
|
target_domain = f"https://{parts.netloc}"
|
||||||
path_parts = parts.path.split("/")
|
path_parts = parts.path.split("/")
|
||||||
|
|
||||||
|
target_title = None
|
||||||
|
|
||||||
if len(path_parts) >= 3:
|
if len(path_parts) >= 3:
|
||||||
target_title = "/".join(path_parts[2:])
|
target_title = "/".join(path_parts[2:])
|
||||||
|
|
||||||
found = False
|
found = False
|
||||||
|
|
||||||
|
# Check if it is an interwiki link
|
||||||
for language, language_projects in app.languages.items():
|
for language, language_projects in app.languages.items():
|
||||||
for project_name, project_url in language_projects[
|
for project_name, project_url in language_projects["projects"].items():
|
||||||
"projects"
|
|
||||||
].items():
|
|
||||||
if project_url == target_domain:
|
if project_url == target_domain:
|
||||||
|
if target_title:
|
||||||
a["href"] = url_for(
|
a["href"] = url_for(
|
||||||
"wiki_article",
|
"wiki_article",
|
||||||
project=project_name,
|
project=project_name,
|
||||||
lang=language,
|
lang=language,
|
||||||
title=target_title,
|
title=target_title,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
a["href"] = url_for(
|
||||||
|
"index_php_redirect",
|
||||||
|
project=project_name,
|
||||||
|
lang=language,
|
||||||
|
)
|
||||||
found = True
|
found = True
|
||||||
break
|
|
||||||
|
# Try to check if it is a link to the "main page" of a project
|
||||||
|
elif (
|
||||||
|
language == "en"
|
||||||
|
and project_url.replace("en.", "www.") == target_domain
|
||||||
|
):
|
||||||
|
a["href"] = url_for("home", project=project_name, lang=language)
|
||||||
if found:
|
if found:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Remove edit sections and styles
|
||||||
for span in soup.find_all("span", class_="mw-editsection"):
|
for span in soup.find_all("span", class_="mw-editsection"):
|
||||||
span.decompose()
|
span.decompose()
|
||||||
|
|
||||||
for style in soup.find_all("style"):
|
for style in soup.find_all("style"):
|
||||||
style.decompose()
|
style.decompose()
|
||||||
|
|
||||||
|
# Proxy images and videos
|
||||||
for img in soup.find_all("img"):
|
for img in soup.find_all("img"):
|
||||||
img["src"] = get_proxy_url(img["src"])
|
img["src"] = get_proxy_url(img["src"])
|
||||||
|
|
||||||
|
@ -373,20 +398,24 @@ def wiki_article(
|
||||||
for video in soup.find_all("video"):
|
for video in soup.find_all("video"):
|
||||||
video["poster"] = get_proxy_url(video["poster"])
|
video["poster"] = get_proxy_url(video["poster"])
|
||||||
|
|
||||||
|
# Convert category elements to links
|
||||||
for link in soup.find_all("link", rel="mw:PageProp/Category"):
|
for link in soup.find_all("link", rel="mw:PageProp/Category"):
|
||||||
link.name = "a"
|
link.name = "a"
|
||||||
link.string = link["href"][2:].replace("_", " ")
|
link.string = link["href"][2:].replace("_", " ")
|
||||||
link["class"] = "category-link"
|
link["class"] = "category-link"
|
||||||
|
|
||||||
|
# Remove meta links
|
||||||
for li in soup.find_all("li"):
|
for li in soup.find_all("li"):
|
||||||
if any(cls in li.get("class", []) for cls in ["nv-view", "nv-talk", "nv-edit"]):
|
if any(cls in li.get("class", []) for cls in ["nv-view", "nv-talk", "nv-edit"]):
|
||||||
li.decompose()
|
li.decompose()
|
||||||
|
|
||||||
|
# Add classes to reference links
|
||||||
for span in soup.find_all(class_="mw-reflink-text"):
|
for span in soup.find_all(class_="mw-reflink-text"):
|
||||||
parent = span.parent
|
parent = span.parent
|
||||||
if parent.attrs.get("data-mw-group", None):
|
if parent.attrs.get("data-mw-group", None):
|
||||||
span["class"] = span.get("class", []) + [parent.attrs["data-mw-group"]]
|
span["class"] = span.get("class", []) + [parent.attrs["data-mw-group"]]
|
||||||
|
|
||||||
|
# Check if the article is in a right-to-left language
|
||||||
rtl = bool(soup.find("div", class_="mw-parser-output", dir="rtl"))
|
rtl = bool(soup.find("div", class_="mw-parser-output", dir="rtl"))
|
||||||
|
|
||||||
# Edge case: When passing the `ku-arab` variant, the article is in Arabic
|
# Edge case: When passing the `ku-arab` variant, the article is in Arabic
|
||||||
|
@ -411,6 +440,7 @@ def wiki_article(
|
||||||
logger.error(f"Error fetching license information: {e}")
|
logger.error(f"Error fetching license information: {e}")
|
||||||
license = None
|
license = None
|
||||||
|
|
||||||
|
# Render the article
|
||||||
return render_template(
|
return render_template(
|
||||||
"article.html",
|
"article.html",
|
||||||
title=title.replace("_", " "),
|
title=title.replace("_", " "),
|
||||||
|
|
Loading…
Reference in a new issue