From bcdbf5ec69b77a2d89235b5868c254472df32843 Mon Sep 17 00:00:00 2001 From: Kumi Date: Wed, 31 Jan 2024 19:22:58 +0100 Subject: [PATCH] Enhanced search integration Refactored the project search function to dynamically construct query parameters, improving readability and maintainability. Introduced several new parameters to refine search results and included a timeout for reliability. Added logging for better debugging and transparency about the search operations. Updated category and search routes to use the new search function capabilities, ensuring relevant results and proper exception handling. Additionally, implemented pagination support for better user experience when browsing projects. To improve the UI, the category and project listing templates have been refreshed with a modern layout, including the use of Bootstrap classes and better organization of content. Resolves issues with inefficient search parameter construction and user interface inconsistencies. --- main.py | 258 +++++++++++++++++++++++++--------------- templates/category.html | 111 ++++++++++------- templates/projects.html | 12 ++ 3 files changed, 242 insertions(+), 139 deletions(-) diff --git a/main.py b/main.py index fe4055c..7c9604f 100644 --- a/main.py +++ b/main.py @@ -9,7 +9,7 @@ from flask import ( stream_with_context, ) -from urllib.parse import quote, unquote +from urllib.parse import quote, unquote, urlencode from urllib.request import Request, urlopen from urllib.error import HTTPError from traceback import print_exc @@ -61,9 +61,12 @@ def projects_search( query="*", category="", channel="", - filter_by="featureFlag:=true", + filter_by="", page=1, per_page=50, + query_by="title,stepBody,screenName", + sort_by="publishDate:desc", + timeout=5, ): if category: if filter_by: @@ -78,21 +81,36 @@ def projects_search( query = quote(query) filter_by = quote(filter_by) - logging.debug(f"Searching projects with query {query} and filter {filter_by}") + logging.debug( + f"Searching projects with query {query} and filter {filter_by}, page {page}" + ) projects_headers = {"x-typesense-api-key": TYPESENSE_API_KEY} + + request_args = { + "q": query, + "query_by": query_by, + "page": page, + "sort_by": sort_by, + "include_fields": "title,urlString,coverImageUrl,screenName,favorites,views,primaryClassification,featureFlag,prizeLevel,IMadeItCount", + "filter_by": filter_by, + "per_page": per_page, + } + + args_str = "&".join([f"{key}={value}" for key, value in request_args.items()]) + projects_request = Request( - f"https://www.instructables.com/api_proxy/search/collections/projects/documents/search?q={query}&query_by=title,stepBody,screenName&page={page}&sort_by=publishDate:desc&include_fields=title,urlString,coverImageUrl,screenName,favorites,views,primaryClassification,featureFlag,prizeLevel,IMadeItCount&filter_by={filter_by}&per_page={per_page}", + f"https://www.instructables.com/api_proxy/search/collections/projects/documents/search?{args_str}", headers=projects_headers, ) - projects_data = urlopen(projects_request) + projects_data = urlopen(projects_request, timeout=timeout) project_obj = json.loads(projects_data.read().decode()) project_ibles = project_obj["hits"] logging.debug(f"Got {len(project_ibles)} projects") - return project_ibles + return project_ibles, project_obj["out_of"] def update_data(): @@ -109,7 +127,7 @@ def update_data(): channels.append(group.select("h2 a")[0].text.lower()) global_ibles["/projects"] = [] - project_ibles = projects_search() + project_ibles, total = projects_search(filter_by="featureFlag:=true") while len(global_ibles["/projects"]) <= 0: for ible in project_ibles: @@ -308,47 +326,36 @@ def member_header(header): } -def category_page(path, name, teachers=False): - # TODO: Figure out why this doesn't work - probably using the search function would help... - - try: - data = urlopen("https://www.instructables.com" + path) - except HTTPError as e: - abort(e.code) - - soup = BeautifulSoup(data.read().decode(), "html.parser") - - channels = [] - for card in soup.select("div.scrollable-cards-inner div.scrollable-card"): - link = card.a["href"] - img = proxy( - card.select(f"a{' noscript' if teachers else ''} img")[0].get("src") - ) - title = card.select("a img")[0].get("alt") - - channels.append({"link": link, "title": title, "img": img}) +def category_page(name, teachers=False): + path = urlparse(request.path).path + page = request.args.get("page", 1, type=int) ibles = [] - for ible in soup.select( - "div.category-landing-projects-list div.category-landing-projects-ible" - ): - link = ible.a["href"] - img = proxy(ible.select("a noscript img")[0].get("src")) - info = ible.select("div.category-landing-projects-ible-info")[0] - title = info.select("a.ible-title")[0].text - author = info.select("span.ible-author a")[0].text - author_link = info.select("span.ible-author a")[0].get("href") - channel = info.select("span.ible-channel a")[0].text - channel_link = info.select("span.ible-channel a")[0].get("href") + channels = [] + contests = [] - stats = ible.select("span.ible-stats-right-col")[0] - views = 0 - if stats.select("span.ible-views") != []: - views = stats.select("span.ible-views")[0].text - favorites = 0 - if stats.select("span.ible-favorites") != []: - favorites = stats.select("span.ible-favorites")[0].text + for channel in global_ibles["/projects"]: + if channel["channel"].startswith(name.lower()) and channel["channel"] not in channels: + channels.append(channel["channel"]) + + category_ibles, total = projects_search( + category=name, page=page, filter_by="featureFlag:=true" + ) + + for ible in category_ibles: + link = f"/{ible['document']['urlString']}" + img = proxy(ible["document"]["coverImageUrl"]) + + title = ible["document"]["title"] + author = ible["document"]["screenName"] + author_link = f"/member/{author}" + + channel = ible["document"]["primaryClassification"] + channel_link = f"/channel/{channel}" + + views = ible["document"]["views"] + favorites = ible["document"]["favorites"] ibles.append( { @@ -364,19 +371,9 @@ def category_page(path, name, teachers=False): } ) - contests = [] - for contest in soup.select( - "div.category-landing-contests-list div.category-landing-contests-item" - ): - link = contest.a["href"] - img = proxy(contest.select("a noscript img")[0].get("src")) - title = contest.select("a img")[0].get("alt") - - contests.append({"link": link, "img": img, "title": title}) - return render_template( "category.html", - name=name, + title=name, channels=channels, ibles=ibles, contests=contests, @@ -384,27 +381,97 @@ def category_page(path, name, teachers=False): ) -def project_list(path, head, sort=""): +def get_pagination(request, total, per_page=1): + pagination = [] + + args = request.args.copy() + current = int(args.pop("page", 1)) + + query_string = urlencode(args) + + total_pages = int(total / per_page) + + if query_string: + query_string = "&" + query_string + + if current > 1: + pagination.append( + { + "link": f"?page={current - 1}{query_string}", + "text": "Previous", + "disabled": False, + "active": False, + } + ) + + for page in range(max(current - 5, 1), min(current + 5, total_pages)): + if page == current: + pagination.append( + { + "link": f"?page={page}{query_string}", + "text": page, + "disabled": False, + "active": True, + } + ) + else: + pagination.append( + { + "link": f"?page={page}{query_string}", + "text": page, + "disabled": False, + "active": False, + } + ) + + if current < total_pages: + pagination.append( + { + "link": f"?page={current + 1}{query_string}", + "text": "Next", + "disabled": False, + "active": False, + } + ) + + return pagination + + +def project_list(head, sort="", per_page=20): head = f"{head + ' ' if head != '' else ''}Projects" + sort - path = urlparse(path).path + path = urlparse(request.path).path + + page = request.args.get("page", 1, type=int) if path in ("/projects/", "/projects"): ibles = global_ibles["/projects"] else: - if not "projects" in path.split("/"): + if "projects" in path.split("/"): + ibles = [] + + parts = path.split("/") + category = parts[1] + channel = "" if parts[2] == "projects" else parts[2] + + project_ibles, total = projects_search( + category=category, channel=channel, per_page=per_page, page=page + ) + + elif "search" in path.split("/"): + ibles = [] + query = request.args.get("q") + + project_ibles, total = projects_search( + query=query, + filter_by="", + per_page=per_page, + page=page, + query_by="title,screenName", + ) + + else: abort(404) - ibles = [] - - parts = path.split("/") - - category = parts[1] - channel = "" if parts[2] == "projects" else parts[2] - - # TODO: Add pagination, popular, etc. - - project_ibles = projects_search(category=category, channel=channel) - for ible in project_ibles: link = f"/{ible['document']['urlString']}" img = proxy(ible["document"]["coverImageUrl"]) @@ -433,11 +500,13 @@ def project_list(path, head, sort=""): } ) - if len(ibles) >= 8: - break - - print(ibles) - return render_template("projects.html", title=head, ibles=ibles, path=path) + return render_template( + "projects.html", + title=head, + ibles=ibles, + path=path, + pagination=get_pagination(request, total, per_page), + ) @app.route("/sitemap/") @@ -654,13 +723,12 @@ def route_contests(): @app.route("///projects/") def route_channel_projects(category, channel): - return project_list(f"/{category}/{channel}/projects/", channel.title()) + return project_list(channel.title()) @app.route("///projects//") def route_channel_projects_sort(category, channel, sort): return project_list( - f"/{category}/{channel}/projects/{sort}", channel.title(), " Sorted by " + sort.title(), ) @@ -668,72 +736,62 @@ def route_channel_projects_sort(category, channel, sort): @app.route("//projects/") def route_category_projects(category): - return project_list(f"/{category}/projects/", category.title()) + return project_list(category.title()) @app.route("//projects//") def route_category_projects_sort(category, sort): - return project_list( - f"/{category}/projects/{sort}", category.title(), " Sorted by " + sort.title() - ) + return project_list(category.title(), " Sorted by " + sort.title()) @app.route("/projects/") def route_projects(): - return project_list("/projects/", "") + return project_list("") @app.route("/search") def route_search(): - # TODO: Fix this (using search function) - return project_list("/search/?q=" + request.args["q"] + "&projects=all", "Search") + return project_list("Search") @app.route("/projects//") def route_projects_sort(sort): - return project_list(f"/projects/{sort}", "", " Sorted by " + sort.title()) + return project_list("", " Sorted by " + sort.title()) @app.route("/circuits/") def route_circuits(): - return category_page("/circuits/", "Circuits") + return category_page("Circuits") @app.route("/workshop/") def route_workshop(): - return category_page("/workshop/", "Workshop") + return category_page("Workshop") @app.route("/craft/") def route_craft(): - return category_page("/craft/", "Craft") + return category_page("Craft") @app.route("/cooking/") def route_cooking(): - return category_page("/cooking/", "Cooking") + return category_page("Cooking") @app.route("/living/") def route_living(): - return category_page("/living/", "Living") + return category_page("Living") @app.route("/outside/") def route_outside(): - return category_page("/outside/", "Outside") + return category_page("Outside") @app.route("/teachers/") def route_teachers(): - return category_page("/teachers/", "Teachers", True) - - -@app.route("/sitemap/projects//") -def route_sitemap_circuits(category, subcategory): - return category_page( - "/" + category + "/" + subcategory, subcategory + " - " + category - ) + return category_page("Teachers", True) @app.route("/member//instructables/") @@ -1021,7 +1079,9 @@ def route_article(article): thumbnail_title = thumbnail["title"] thumbnail_author = thumbnail["author"]["screenName"] thumbnail_author_link = f"/member/{thumbnail_author}" - thumbnail_channel = thumbnail["classifications"][0]["channels"][0]["title"] + thumbnail_channel = thumbnail["classifications"][0]["channels"][0][ + "title" + ] thumbnail_category = thumbnail["classifications"][0]["title"] thumbnail_channel_link = f"/{thumbnail_category}/{thumbnail_channel}" diff --git a/templates/category.html b/templates/category.html index e8110e4..caac509 100644 --- a/templates/category.html +++ b/templates/category.html @@ -1,42 +1,73 @@ {% extends "base.html" %} - {% block content %} -
-

{{ title }}

-
- {% for channel in channels %} - - {% endfor %} -
-

Featured Projects

-
- {% for ible in ibles %} -
- - {{ ible.title }} -
- {{ ible.title }} -
- by {{ ible.author }} - in {{ ible.channel }} -
- {{ ible.views }} Views   - {{ ible.favorites }} Favorites -
- {% endfor %} -
-

Contests

-
- {% for contest in contests %} - - {{ contest.title }} - - {% endfor %} -
-
-{% endblock %} \ No newline at end of file +
+

{{ title }}

+
+ {% for channel in channels %} + + {% endfor %} +
+ +

+ Featured Projects +

+
+ {% for ible in ibles %} +
+
+ + {{ ible.title }} +
+
{{ ible.title }}
+

+ by + {{ ible.author }} +
+ in + {{ ible.channel }} +
+ {{ ible.views }} Views +
+ {{ ible.favorites }} Favorites +

+
+ +
+
+ {% endfor %} +
+ + +
+{% endblock %} diff --git a/templates/projects.html b/templates/projects.html index 2b4aab5..cb7751c 100644 --- a/templates/projects.html +++ b/templates/projects.html @@ -27,6 +27,18 @@

{{ ible.views }} Views, {{ ible.favorites }} Favorites

{% endfor %} + +