fix: properly escape titles and queries in API requests

Updated URL construction to correct escaping order of title and query parameters using `escape` after `quote`. This ensures proper encoding of characters and prevents potential errors when fetching articles and search results from the API.

Additionally, added a debug log for search result fetching URL to aid in troubleshooting.

Fixes #36.
This commit is contained in:
Kumi 2024-08-30 17:43:06 +02:00
parent 87a4f07285
commit 219a36cad8
Signed by: kumi
GPG key ID: ECBCC9082395383F

View file

@ -267,7 +267,7 @@ def wiki_article(
logger.debug(f"Fetching {title} from {base_url}") logger.debug(f"Fetching {title} from {base_url}")
api_request = urllib.request.Request( api_request = urllib.request.Request(
f"{base_url}/api/rest_v1/page/html/{quote(escape(title.replace(' ', '_'), False)).replace('/', '%2F')}", f"{base_url}/api/rest_v1/page/html/{escape(quote(title.replace(" ", "_")), True).replace('/', '%2F')}",
headers=HEADERS, headers=HEADERS,
) )
@ -399,7 +399,7 @@ def wiki_article(
# Get license information from the article # Get license information from the article
mediawiki_api_request = urllib.request.Request( mediawiki_api_request = urllib.request.Request(
f"{base_url}/w/rest.php/v1/page/{quote(escape(title.replace(' ', '_'), False))}", f"{base_url}/w/rest.php/v1/page/{escape(quote(title.replace(" ", "_")), True)}",
headers=HEADERS, headers=HEADERS,
) )
@ -456,12 +456,14 @@ def search_results(
logger.debug(f"Searching {base_url} for {query}") logger.debug(f"Searching {base_url} for {query}")
srquery = quote(escape(query.replace(" ", "_"), True)) srquery = escape(quote(query.replace(" ", "_")), True)
url = ( url = (
f"{base_url}/w/api.php?action=query&format=json&list=search&srsearch={srquery}" f"{base_url}/w/api.php?action=query&format=json&list=search&srsearch={srquery}"
) )
logger.debug(f"Fetching search results from {url}")
try: try:
with urllib.request.urlopen(url) as response: with urllib.request.urlopen(url) as response:
data = json.loads(response.read().decode()) data = json.loads(response.read().decode())