forked from PrivateCoffee/wikimore
feat: update API endpoints and add custom headers
Replaced outdated Wikimedia API endpoints with updated REST API endpoints for fetching articles and search results. Introduced custom headers to include User-Agent for requests, which helps in identifying the source of traffic. Enhanced error handling for article fetch by changing the container element from 'body' to 'div' and handling potential language variants in requests. Ref: #16
This commit is contained in:
parent
a182e71661
commit
a3395e118e
1 changed files with 19 additions and 22 deletions
|
@ -22,6 +22,10 @@ logger.addHandler(handler)
|
||||||
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||||
handler.setFormatter(formatter)
|
handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
HEADERS = {
|
||||||
|
"User-Agent": "Wikimore/dev (https://git.private.coffee/privatecoffee/wikimore)"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_wikimedia_projects():
|
def get_wikimedia_projects():
|
||||||
url = "https://meta.wikimedia.org/w/api.php?action=sitematrix&format=json"
|
url = "https://meta.wikimedia.org/w/api.php?action=sitematrix&format=json"
|
||||||
|
@ -119,27 +123,22 @@ def wiki_article(project, lang, title):
|
||||||
|
|
||||||
logger.debug(f"Fetching {title} from {base_url}")
|
logger.debug(f"Fetching {title} from {base_url}")
|
||||||
|
|
||||||
url = f"{base_url}/w/api.php?action=query&format=json&titles={quote(escape(title.replace(' ', '_'), False))}&prop=revisions&rvprop=content&rvparse=1"
|
api_request = urllib.request.Request(
|
||||||
with urllib.request.urlopen(url) as response:
|
f"{base_url}/api/rest_v1/page/html/{quote(escape(title.replace(' ', '_'), False))}",
|
||||||
data = json.loads(response.read().decode())
|
headers=HEADERS,
|
||||||
pages = data["query"]["pages"]
|
)
|
||||||
|
|
||||||
try:
|
if request.args.get("variant", None):
|
||||||
article_html = next(iter(pages.values()))["revisions"][0]["*"]
|
api_request.add_header("Accept-Language", f"{request.args['variant']}")
|
||||||
except KeyError:
|
|
||||||
return (
|
with urllib.request.urlopen(api_request) as response:
|
||||||
render_template(
|
article_html = response.read().decode()
|
||||||
"article.html",
|
|
||||||
title=title.replace("_", " "),
|
|
||||||
content="Article not found",
|
|
||||||
wikimedia_projects=app.wikimedia_projects,
|
|
||||||
languages=app.languages,
|
|
||||||
),
|
|
||||||
404,
|
|
||||||
)
|
|
||||||
|
|
||||||
soup = BeautifulSoup(article_html, "html.parser")
|
soup = BeautifulSoup(article_html, "html.parser")
|
||||||
|
|
||||||
|
body = soup.find("body")
|
||||||
|
body.name = "div"
|
||||||
|
|
||||||
redirect_message = soup.find("div", class_="redirectMsg")
|
redirect_message = soup.find("div", class_="redirectMsg")
|
||||||
|
|
||||||
if redirect_message and not (request.args.get("redirect") == "no"):
|
if redirect_message and not (request.args.get("redirect") == "no"):
|
||||||
|
@ -203,7 +202,7 @@ def wiki_article(project, lang, title):
|
||||||
if any(cls in li.get("class", []) for cls in ["nv-view", "nv-talk", "nv-edit"]):
|
if any(cls in li.get("class", []) for cls in ["nv-view", "nv-talk", "nv-edit"]):
|
||||||
li.decompose()
|
li.decompose()
|
||||||
|
|
||||||
processed_html = str(soup)
|
processed_html = str(body)
|
||||||
return render_template(
|
return render_template(
|
||||||
"article.html",
|
"article.html",
|
||||||
title=title.replace("_", " "),
|
title=title.replace("_", " "),
|
||||||
|
@ -226,12 +225,10 @@ def search_results(project, lang, query):
|
||||||
|
|
||||||
srquery = quote(escape(query.replace(" ", "_"), True))
|
srquery = quote(escape(query.replace(" ", "_"), True))
|
||||||
|
|
||||||
url = (
|
url = f"{base_url}/api/rest_v1/page/search/{srquery}"
|
||||||
f"{base_url}/w/api.php?action=query&format=json&list=search&srsearch={srquery}"
|
|
||||||
)
|
|
||||||
with urllib.request.urlopen(url) as response:
|
with urllib.request.urlopen(url) as response:
|
||||||
data = json.loads(response.read().decode())
|
data = json.loads(response.read().decode())
|
||||||
search_results = data["query"]["search"]
|
search_results = data["pages"]
|
||||||
return render_template(
|
return render_template(
|
||||||
"search_results.html",
|
"search_results.html",
|
||||||
query=query,
|
query=query,
|
||||||
|
|
Loading…
Reference in a new issue