Enhanced search integration

Refactored the project search function to dynamically construct query parameters, improving readability and maintainability. Introduced several new parameters to refine search results and included a timeout for reliability. Added logging for better debugging and transparency about the search operations.

Updated category and search routes to use the new search function capabilities, ensuring relevant results and proper exception handling. Additionally, implemented pagination support for better user experience when browsing projects.

To improve the UI, the category and project listing templates have been refreshed with a modern layout, including the use of Bootstrap classes and better organization of content.

Resolves issues with inefficient search parameter construction and user interface inconsistencies.
This commit is contained in:
Kumi 2024-01-31 19:22:58 +01:00
parent c1f4e157a4
commit bcdbf5ec69
Signed by: kumi
GPG key ID: ECBCC9082395383F
3 changed files with 242 additions and 139 deletions

258
main.py
View file

@ -9,7 +9,7 @@ from flask import (
stream_with_context, stream_with_context,
) )
from urllib.parse import quote, unquote from urllib.parse import quote, unquote, urlencode
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
from urllib.error import HTTPError from urllib.error import HTTPError
from traceback import print_exc from traceback import print_exc
@ -61,9 +61,12 @@ def projects_search(
query="*", query="*",
category="", category="",
channel="", channel="",
filter_by="featureFlag:=true", filter_by="",
page=1, page=1,
per_page=50, per_page=50,
query_by="title,stepBody,screenName",
sort_by="publishDate:desc",
timeout=5,
): ):
if category: if category:
if filter_by: if filter_by:
@ -78,21 +81,36 @@ def projects_search(
query = quote(query) query = quote(query)
filter_by = quote(filter_by) filter_by = quote(filter_by)
logging.debug(f"Searching projects with query {query} and filter {filter_by}") logging.debug(
f"Searching projects with query {query} and filter {filter_by}, page {page}"
)
projects_headers = {"x-typesense-api-key": TYPESENSE_API_KEY} projects_headers = {"x-typesense-api-key": TYPESENSE_API_KEY}
request_args = {
"q": query,
"query_by": query_by,
"page": page,
"sort_by": sort_by,
"include_fields": "title,urlString,coverImageUrl,screenName,favorites,views,primaryClassification,featureFlag,prizeLevel,IMadeItCount",
"filter_by": filter_by,
"per_page": per_page,
}
args_str = "&".join([f"{key}={value}" for key, value in request_args.items()])
projects_request = Request( projects_request = Request(
f"https://www.instructables.com/api_proxy/search/collections/projects/documents/search?q={query}&query_by=title,stepBody,screenName&page={page}&sort_by=publishDate:desc&include_fields=title,urlString,coverImageUrl,screenName,favorites,views,primaryClassification,featureFlag,prizeLevel,IMadeItCount&filter_by={filter_by}&per_page={per_page}", f"https://www.instructables.com/api_proxy/search/collections/projects/documents/search?{args_str}",
headers=projects_headers, headers=projects_headers,
) )
projects_data = urlopen(projects_request) projects_data = urlopen(projects_request, timeout=timeout)
project_obj = json.loads(projects_data.read().decode()) project_obj = json.loads(projects_data.read().decode())
project_ibles = project_obj["hits"] project_ibles = project_obj["hits"]
logging.debug(f"Got {len(project_ibles)} projects") logging.debug(f"Got {len(project_ibles)} projects")
return project_ibles return project_ibles, project_obj["out_of"]
def update_data(): def update_data():
@ -109,7 +127,7 @@ def update_data():
channels.append(group.select("h2 a")[0].text.lower()) channels.append(group.select("h2 a")[0].text.lower())
global_ibles["/projects"] = [] global_ibles["/projects"] = []
project_ibles = projects_search() project_ibles, total = projects_search(filter_by="featureFlag:=true")
while len(global_ibles["/projects"]) <= 0: while len(global_ibles["/projects"]) <= 0:
for ible in project_ibles: for ible in project_ibles:
@ -308,47 +326,36 @@ def member_header(header):
} }
def category_page(path, name, teachers=False): def category_page(name, teachers=False):
# TODO: Figure out why this doesn't work - probably using the search function would help... path = urlparse(request.path).path
page = request.args.get("page", 1, type=int)
try:
data = urlopen("https://www.instructables.com" + path)
except HTTPError as e:
abort(e.code)
soup = BeautifulSoup(data.read().decode(), "html.parser")
channels = []
for card in soup.select("div.scrollable-cards-inner div.scrollable-card"):
link = card.a["href"]
img = proxy(
card.select(f"a{' noscript' if teachers else ''} img")[0].get("src")
)
title = card.select("a img")[0].get("alt")
channels.append({"link": link, "title": title, "img": img})
ibles = [] ibles = []
for ible in soup.select(
"div.category-landing-projects-list div.category-landing-projects-ible"
):
link = ible.a["href"]
img = proxy(ible.select("a noscript img")[0].get("src"))
info = ible.select("div.category-landing-projects-ible-info")[0] channels = []
title = info.select("a.ible-title")[0].text contests = []
author = info.select("span.ible-author a")[0].text
author_link = info.select("span.ible-author a")[0].get("href")
channel = info.select("span.ible-channel a")[0].text
channel_link = info.select("span.ible-channel a")[0].get("href")
stats = ible.select("span.ible-stats-right-col")[0] for channel in global_ibles["/projects"]:
views = 0 if channel["channel"].startswith(name.lower()) and channel["channel"] not in channels:
if stats.select("span.ible-views") != []: channels.append(channel["channel"])
views = stats.select("span.ible-views")[0].text
favorites = 0 category_ibles, total = projects_search(
if stats.select("span.ible-favorites") != []: category=name, page=page, filter_by="featureFlag:=true"
favorites = stats.select("span.ible-favorites")[0].text )
for ible in category_ibles:
link = f"/{ible['document']['urlString']}"
img = proxy(ible["document"]["coverImageUrl"])
title = ible["document"]["title"]
author = ible["document"]["screenName"]
author_link = f"/member/{author}"
channel = ible["document"]["primaryClassification"]
channel_link = f"/channel/{channel}"
views = ible["document"]["views"]
favorites = ible["document"]["favorites"]
ibles.append( ibles.append(
{ {
@ -364,19 +371,9 @@ def category_page(path, name, teachers=False):
} }
) )
contests = []
for contest in soup.select(
"div.category-landing-contests-list div.category-landing-contests-item"
):
link = contest.a["href"]
img = proxy(contest.select("a noscript img")[0].get("src"))
title = contest.select("a img")[0].get("alt")
contests.append({"link": link, "img": img, "title": title})
return render_template( return render_template(
"category.html", "category.html",
name=name, title=name,
channels=channels, channels=channels,
ibles=ibles, ibles=ibles,
contests=contests, contests=contests,
@ -384,27 +381,97 @@ def category_page(path, name, teachers=False):
) )
def project_list(path, head, sort=""): def get_pagination(request, total, per_page=1):
pagination = []
args = request.args.copy()
current = int(args.pop("page", 1))
query_string = urlencode(args)
total_pages = int(total / per_page)
if query_string:
query_string = "&" + query_string
if current > 1:
pagination.append(
{
"link": f"?page={current - 1}{query_string}",
"text": "Previous",
"disabled": False,
"active": False,
}
)
for page in range(max(current - 5, 1), min(current + 5, total_pages)):
if page == current:
pagination.append(
{
"link": f"?page={page}{query_string}",
"text": page,
"disabled": False,
"active": True,
}
)
else:
pagination.append(
{
"link": f"?page={page}{query_string}",
"text": page,
"disabled": False,
"active": False,
}
)
if current < total_pages:
pagination.append(
{
"link": f"?page={current + 1}{query_string}",
"text": "Next",
"disabled": False,
"active": False,
}
)
return pagination
def project_list(head, sort="", per_page=20):
head = f"{head + ' ' if head != '' else ''}Projects" + sort head = f"{head + ' ' if head != '' else ''}Projects" + sort
path = urlparse(path).path path = urlparse(request.path).path
page = request.args.get("page", 1, type=int)
if path in ("/projects/", "/projects"): if path in ("/projects/", "/projects"):
ibles = global_ibles["/projects"] ibles = global_ibles["/projects"]
else: else:
if not "projects" in path.split("/"): if "projects" in path.split("/"):
ibles = []
parts = path.split("/")
category = parts[1]
channel = "" if parts[2] == "projects" else parts[2]
project_ibles, total = projects_search(
category=category, channel=channel, per_page=per_page, page=page
)
elif "search" in path.split("/"):
ibles = []
query = request.args.get("q")
project_ibles, total = projects_search(
query=query,
filter_by="",
per_page=per_page,
page=page,
query_by="title,screenName",
)
else:
abort(404) abort(404)
ibles = []
parts = path.split("/")
category = parts[1]
channel = "" if parts[2] == "projects" else parts[2]
# TODO: Add pagination, popular, etc.
project_ibles = projects_search(category=category, channel=channel)
for ible in project_ibles: for ible in project_ibles:
link = f"/{ible['document']['urlString']}" link = f"/{ible['document']['urlString']}"
img = proxy(ible["document"]["coverImageUrl"]) img = proxy(ible["document"]["coverImageUrl"])
@ -433,11 +500,13 @@ def project_list(path, head, sort=""):
} }
) )
if len(ibles) >= 8: return render_template(
break "projects.html",
title=head,
print(ibles) ibles=ibles,
return render_template("projects.html", title=head, ibles=ibles, path=path) path=path,
pagination=get_pagination(request, total, per_page),
)
@app.route("/sitemap/") @app.route("/sitemap/")
@ -654,13 +723,12 @@ def route_contests():
@app.route("/<category>/<channel>/projects/") @app.route("/<category>/<channel>/projects/")
def route_channel_projects(category, channel): def route_channel_projects(category, channel):
return project_list(f"/{category}/{channel}/projects/", channel.title()) return project_list(channel.title())
@app.route("/<category>/<channel>/projects/<sort>/") @app.route("/<category>/<channel>/projects/<sort>/")
def route_channel_projects_sort(category, channel, sort): def route_channel_projects_sort(category, channel, sort):
return project_list( return project_list(
f"/{category}/{channel}/projects/{sort}",
channel.title(), channel.title(),
" Sorted by " + sort.title(), " Sorted by " + sort.title(),
) )
@ -668,72 +736,62 @@ def route_channel_projects_sort(category, channel, sort):
@app.route("/<category>/projects/") @app.route("/<category>/projects/")
def route_category_projects(category): def route_category_projects(category):
return project_list(f"/{category}/projects/", category.title()) return project_list(category.title())
@app.route("/<category>/projects/<sort>/") @app.route("/<category>/projects/<sort>/")
def route_category_projects_sort(category, sort): def route_category_projects_sort(category, sort):
return project_list( return project_list(category.title(), " Sorted by " + sort.title())
f"/{category}/projects/{sort}", category.title(), " Sorted by " + sort.title()
)
@app.route("/projects/") @app.route("/projects/")
def route_projects(): def route_projects():
return project_list("/projects/", "") return project_list("")
@app.route("/search") @app.route("/search")
def route_search(): def route_search():
# TODO: Fix this (using search function) return project_list("Search")
return project_list("/search/?q=" + request.args["q"] + "&projects=all", "Search")
@app.route("/projects/<sort>/") @app.route("/projects/<sort>/")
def route_projects_sort(sort): def route_projects_sort(sort):
return project_list(f"/projects/{sort}", "", " Sorted by " + sort.title()) return project_list("", " Sorted by " + sort.title())
@app.route("/circuits/") @app.route("/circuits/")
def route_circuits(): def route_circuits():
return category_page("/circuits/", "Circuits") return category_page("Circuits")
@app.route("/workshop/") @app.route("/workshop/")
def route_workshop(): def route_workshop():
return category_page("/workshop/", "Workshop") return category_page("Workshop")
@app.route("/craft/") @app.route("/craft/")
def route_craft(): def route_craft():
return category_page("/craft/", "Craft") return category_page("Craft")
@app.route("/cooking/") @app.route("/cooking/")
def route_cooking(): def route_cooking():
return category_page("/cooking/", "Cooking") return category_page("Cooking")
@app.route("/living/") @app.route("/living/")
def route_living(): def route_living():
return category_page("/living/", "Living") return category_page("Living")
@app.route("/outside/") @app.route("/outside/")
def route_outside(): def route_outside():
return category_page("/outside/", "Outside") return category_page("Outside")
@app.route("/teachers/") @app.route("/teachers/")
def route_teachers(): def route_teachers():
return category_page("/teachers/", "Teachers", True) return category_page("Teachers", True)
@app.route("/sitemap/projects/<category>/<subcategory>")
def route_sitemap_circuits(category, subcategory):
return category_page(
"/" + category + "/" + subcategory, subcategory + " - " + category
)
@app.route("/member/<member>/instructables/") @app.route("/member/<member>/instructables/")
@ -1021,7 +1079,9 @@ def route_article(article):
thumbnail_title = thumbnail["title"] thumbnail_title = thumbnail["title"]
thumbnail_author = thumbnail["author"]["screenName"] thumbnail_author = thumbnail["author"]["screenName"]
thumbnail_author_link = f"/member/{thumbnail_author}" thumbnail_author_link = f"/member/{thumbnail_author}"
thumbnail_channel = thumbnail["classifications"][0]["channels"][0]["title"] thumbnail_channel = thumbnail["classifications"][0]["channels"][0][
"title"
]
thumbnail_category = thumbnail["classifications"][0]["title"] thumbnail_category = thumbnail["classifications"][0]["title"]
thumbnail_channel_link = f"/{thumbnail_category}/{thumbnail_channel}" thumbnail_channel_link = f"/{thumbnail_category}/{thumbnail_channel}"

View file

@ -1,42 +1,73 @@
{% extends "base.html" %} {% extends "base.html" %}
{% block content %} {% block content %}
<center> <div class="container">
<h1>{{ title }}</h1> <h1 class="text-center">{{ title }}</h1>
<div class="channel-list"> <div class="row">
{% for channel in channels %} {% for channel in channels %}
<div style="display:inline-block"> <div class="col-6 col-md-4 col-lg-3 text-center mb-3">
<a href="{{ channel.link }}" style="position:relative;text-align:center;color:white;"> <a href="/{{ channel }}" class="btn btn-primary d-block position-relative">
<img src="{{ channel.img }}" alt="{{ channel.title }}" style="display:inline-block;max-width:200px;"> {{ channel }}
<p style="position:absolute;bottom:0px;left:4px;font-weight:thin;">{{ channel.title }}</p> </a>
</a> </div>
</div> {% endfor %}
{% endfor %} </div>
</div>
<h2><a href="{{ path }}projects/">Featured Projects</a></h2> <h2 class="text-center">
<div class="project-list"> <a href="{{ path }}projects/">Featured Projects</a>
{% for ible in ibles %} </h2>
<div class="ible-list-item"> <div class="row">
<a href="{{ ible.link }}"> {% for ible in ibles %}
<img src="{{ ible.img }}" alt="{{ ible.title }}"> <div class="col-12 col-sm-6 col-md-4 col-lg-3 mb-4">
<br> <div class="card">
<span><b>{{ ible.title }}</b></span> <a href="{{ ible.link }}" class="text-dark text-decoration-none">
</a> <img
<span>by <a href="{{ ible.author_link }}">{{ ible.author }}</a></span> src="{{ ible.img }}"
<span>in <a href="{{ ible.channel_link }}">{{ ible.channel }}</a></span> alt="{{ ible.title }}"
<br> class="card-img-top"
<span>{{ ible.views }} Views&nbsp;&nbsp;</span> />
<span>{{ ible.favorites }} Favorites</span> <div class="card-body">
</div> <h5 class="card-title">{{ ible.title }}</h5>
{% endfor %} <p class="card-text">
</div> <small class="text-muted"
<h2><a href="/contest/">Contests</a></h2> >by
<div style="display:inline-block;"> <a href="{{ ible.author_link }}" class="text-primary"
{% for contest in contests %} >{{ ible.author }}</a
<a href="{{ contest.link }}" style="color:#bbc2cf;"> ></small
<img src="{{ contest.img }}" alt="{{ contest.title }}" style="max-width:400px;"> >
</a> <br />
{% endfor %} <small class="text-muted"
</div> >in
</center> <a href="{{ ible.channel_link }}" class="text-primary"
{% endblock %} >{{ ible.channel }}</a
></small
>
<br />
<small class="text-muted">{{ ible.views }} Views</small>
<br />
<small class="text-muted">{{ ible.favorites }} Favorites</small>
</p>
</div>
</a>
</div>
</div>
{% endfor %}
</div>
<!-- Doesn't seem to be current
<h2 class="text-center"><a href="/contest/">Contests</a></h2>
<div class="row">
<div class="col">
{% for contest in contests %}
<a href="{{ contest.link }}">
<img
src="{{ contest.img }}"
alt="{{ contest.title }}"
class="img-fluid"
/>
</a>
{% endfor %}
</div>
</div>
-->
</div>
{% endblock %}

View file

@ -27,6 +27,18 @@
<p>{{ ible.views }} Views, {{ ible.favorites }} Favorites</p> <p>{{ ible.views }} Views, {{ ible.favorites }} Favorites</p>
</div> </div>
{% endfor %} {% endfor %}
<ul class="pagination">
{% for page in pagination %}
<li class="page-item">
<a
class="page-link {% if page.active %}active{% endif %} {% if page.disabled %}disabled{% endif %}"
href="{{ page.link }}"
>{{ page.text }}</a
>
</li>
{% endfor %}
</ul>
</div> </div>
</div> </div>
</center> </center>