From c633a3ec4767aa15d2c6c66ef2144955e1613670 Mon Sep 17 00:00:00 2001 From: Kumi Date: Wed, 2 Oct 2024 13:48:25 +0200 Subject: [PATCH] feat: switch to JSON API for fetching contest data Updated the contest data fetching logic to use the JSON API instead of scraping the HTML directly. This change improves the reliability and performance of data retrieval by utilizing a structured API endpoint. - Replaced HTML parsing with JSON decoding - Simplified the contest details extraction - Updated template to display new data structure Addresses issues with parsing inconsistencies and changes in HTML structure. --- src/structables/routes/contest.py | 81 +++++++------------------ src/structables/templates/contests.html | 75 ++++++++++++----------- 2 files changed, 61 insertions(+), 95 deletions(-) diff --git a/src/structables/routes/contest.py b/src/structables/routes/contest.py index 3c88e13..9e29e47 100644 --- a/src/structables/routes/contest.py +++ b/src/structables/routes/contest.py @@ -49,7 +49,7 @@ def init_contest_routes(app): "total_pages": total_pages, "has_prev": page > 1, "has_next": page < total_pages, - "limit": limit + "limit": limit, } return render_template( @@ -125,68 +125,33 @@ def init_contest_routes(app): @app.route("/contest/") def route_contests(): try: - data = urlopen("https://www.instructables.com/contest/") + # Fetch current contests from the JSON API + response = urlopen( + "https://www.instructables.com/json-api/getCurrentContests?limit=50&offset=0" + ) + data = json.loads(response.read().decode()) except HTTPError as e: abort(e.code) + except Exception as e: + abort(500) # Handle other exceptions such as JSON decode errors - soup = BeautifulSoup(data.read().decode(), "html.parser") - - contest_count = "0" - - contests = [] - for contest in soup.select("div#cur-contests div.row-fluid div.contest-banner"): - link = contest.select("div.contest-banner-inner a")[0].get("href") - img = proxy(contest.select("div.contest-banner-inner a img")[0].get("src")) - alt = contest.select("div.contest-banner-inner a img")[0].get("alt") - deadline = contest.select("span.contest-meta-deadline")[0].get( - "data-deadline" - ) - prizes = contest.select("span.contest-meta-count")[0].text - entries = contest.select("span.contest-meta-count")[1].text - - contests.append( - { - "link": link, - "img": img, - "alt": alt, - "deadline": deadline, - "prizes": prizes, - "entries": entries, - } - ) - - closed = [] - for display in soup.select("div.contest-winner-display"): - link = display.select("div.contest-banner-inner a")[0].get("href") - img = proxy(display.select("div.contest-banner-inner a img")[0].get("src")) - alt = display.select("div.contest-banner-inner a img")[0].get("alt") - featured_items = [] - for featured_item in display.select("ul.featured-items li"): - item_link = featured_item.select("div.ible-thumb a")[0].get("href") - item_img = proxy( - featured_item.select("div.ible-thumb a img")[0].get("src") - ) - item_title = featured_item.select("a.title")[0].text - item_author = featured_item.select("a.author")[0].text - item_author_link = featured_item.select("a.author")[0].get("href") - - featured_items.append( - { - "link": item_link, - "img": item_img, - "title": item_title, - "author": item_author, - "author_link": item_author_link, - } - ) - closed.append( - {"link": link, "img": img, "alt": alt, "featured_items": featured_items} - ) + contests = data.get("contests", []) + contest_list = [] + for contest in contests: + contest_details = { + "link": f"https://www.instructables.com/{contest['urlString']}", + "img": contest["bannerUrlMedium"], + "alt": contest["title"], + "title": contest["title"], + "deadline": contest["deadline"], + "prizes": contest["prizeCount"], + "entries": contest["numEntries"], + } + contest_list.append(contest_details) return render_template( "contests.html", title="Contests", - contest_count=contest_count, - contests=contests, - closed=closed, + contest_count=len(contest_list), + contests=contest_list, ) diff --git a/src/structables/templates/contests.html b/src/structables/templates/contests.html index ad30a4f..3c000de 100644 --- a/src/structables/templates/contests.html +++ b/src/structables/templates/contests.html @@ -1,40 +1,41 @@ {% extends "base.html" %} {% block content %} -
-

{{ title }}

- {{ contest_count|safe }} -
-
- {% for contest in contests %} -
- - {{ contest.alt }} - -

Closes {{ contest.deadline }}

-

{{ contest.prizes }} Prizes, {{ contest.entries }} Entries

-
- {% endfor %} -
-
-

Winner's Circle

- {% for closed in closed %} -
- {{ closed.alt }} - {% for featured_items in closed.featured_items %} - - {% endfor %} -
- {% endfor %} -
-
-{% endblock %} \ No newline at end of file +
+

{{ title }}

+

Total Running Contests: {{ contest_count }}

+
+
+ {% for contest in contests %} +
+ + {{ contest.alt }} + +

{{ contest.title }}

+

Closes: {{ contest.deadline }}

+

{{ contest.prizes }} Prizes, {{ contest.entries }} Entries

+
+ {% endfor %} +
+
+

Winner's Circle

+ {% for closed in closed %} +
+ {{ closed.alt }} + {% for featured_items in closed.featured_items %} + + {% endfor %} +
+ {% endfor %} +
+
+{% endblock %}