diff --git a/README.md b/README.md index e2cc5ba..78c35e0 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,7 @@ Structables supports the use of the following environment variables for configur - `STRUCTABLES_UNSAFE`: If set, allow embedding untrusted iframes (if unset, display a warning and allow loading the content manually) - `STRUCTABLES_PRIVACY_FILE`: The path to a text file or Markdown file (with .md suffix) to use for the Privacy Policy page (if unset, try `privacy.txt` or `privacy.md` in the working directory, or fall back to a generic message) - `STRUCTABLES_DEBUG`: If set, log additional debug information to stdout +- `STRUCTABLES_THEME`: Allows selecting a theme for the frontend. Currently, only `dark` and `light` are supported. If not set, it will be automatically detected based on the user's system settings, and a toggle will be provided in the header. ## License diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..555ca07 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,4 @@ +ruff +black +isort +mypy \ No newline at end of file diff --git a/src/structables/config.py b/src/structables/config.py index 738df6d..b771d8a 100644 --- a/src/structables/config.py +++ b/src/structables/config.py @@ -7,6 +7,7 @@ class Config: INVIDIOUS = os.environ.get("STRUCTABLES_INVIDIOUS") UNSAFE = os.environ.get("STRUCTABLES_UNSAFE", False) PRIVACY_FILE = os.environ.get("STRUCTABLES_PRIVACY_FILE") + THEME = os.environ.get("STRUCTABLES_THEME", "auto") @staticmethod def init_app(app): diff --git a/src/structables/main.py b/src/structables/main.py index 7dae69c..32f0925 100644 --- a/src/structables/main.py +++ b/src/structables/main.py @@ -3,17 +3,23 @@ from flask import Flask import threading import time +import logging from .config import Config from .routes import init_routes from .utils.data import update_data from .utils.helpers import get_typesense_api_key +# Configure logging +logger = logging.getLogger(__name__) + app = Flask(__name__, template_folder="templates", static_folder="static") app.config.from_object(Config) app.typesense_api_key = get_typesense_api_key() +logger.debug("Initializing routes") init_routes(app) +logger.debug("Performing initial data update") update_data(app) @@ -25,13 +31,32 @@ def background_update_data(app): Args: app (Flask): The Flask app instance. """ + logger.debug("Starting background update thread") while True: + logger.debug("Running scheduled data update") update_data(app) + logger.debug("Data update complete, sleeping for 5 minutes") time.sleep(300) def main(): + if app.config["DEBUG"]: + logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + else: + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + + logger.debug("Starting background update thread") threading.Thread(target=background_update_data, args=(app,), daemon=True).start() + + logger.info( + f"Starting Structables on {app.config['LISTEN_HOST']}:{app.config['PORT']}" + ) app.run( port=app.config["PORT"], host=app.config["LISTEN_HOST"], diff --git a/src/structables/routes/category.py b/src/structables/routes/category.py index eb3c518..515eea7 100644 --- a/src/structables/routes/category.py +++ b/src/structables/routes/category.py @@ -1,15 +1,22 @@ from flask import redirect from werkzeug.exceptions import NotFound from ..utils.helpers import project_list, category_page +import logging + +logger = logging.getLogger(__name__) def init_category_routes(app): @app.route("///projects/") def route_channel_projects(category, channel): + logger.debug(f"Rendering channel projects for {category}/{channel}") return project_list(app, channel.title()) @app.route("///projects//") def route_channel_projects_sort(category, channel, sort): + logger.debug( + f"Rendering channel projects for {category}/{channel} sorted by {sort}" + ) return project_list( app, channel.title(), @@ -18,50 +25,62 @@ def init_category_routes(app): @app.route("//projects/") def route_category_projects(category): + logger.debug(f"Rendering category projects for {category}") return project_list(app, category.title()) @app.route("//projects//") def route_category_projects_sort(category, sort): + logger.debug(f"Rendering category projects for {category} sorted by {sort}") return project_list(app, category.title(), " Sorted by " + sort.title()) @app.route("/projects/") def route_projects(): + logger.debug("Rendering all projects") return project_list(app, "") @app.route("/projects//") def route_projects_sort(sort): + logger.debug(f"Rendering all projects sorted by {sort}") return project_list(app, "", " Sorted by " + sort.title()) @app.route("/circuits/") def route_circuits(): + logger.debug("Rendering circuits category page") return category_page(app, "Circuits") @app.route("/workshop/") def route_workshop(): + logger.debug("Rendering workshop category page") return category_page(app, "Workshop") @app.route("/craft/") def route_craft(): + logger.debug("Rendering craft category page") return category_page(app, "Craft") @app.route("/cooking/") def route_cooking(): + logger.debug("Rendering cooking category page") return category_page(app, "Cooking") @app.route("/living/") def route_living(): + logger.debug("Rendering living category page") return category_page(app, "Living") @app.route("/outside/") def route_outside(): + logger.debug("Rendering outside category page") return category_page(app, "Outside") @app.route("/teachers/") def route_teachers(): + logger.debug("Rendering teachers category page") return category_page(app, "Teachers", True) @app.route("///") def route_channel_redirect(category, channel): + logger.debug(f"Channel redirect for {category}/{channel}") if ( category == "circuits" or category == "workshop" @@ -71,6 +90,8 @@ def init_category_routes(app): or category == "outside" or category == "teachers" ): + logger.debug(f"Redirecting to /{category}/{channel}/projects/") return redirect(f"/{category}/{channel}/projects/", 307) else: + logger.warning(f"Invalid category: {category}") raise NotFound() diff --git a/src/structables/routes/contest.py b/src/structables/routes/contest.py index 3c72da6..f0f9b4d 100644 --- a/src/structables/routes/contest.py +++ b/src/structables/routes/contest.py @@ -4,6 +4,9 @@ from urllib.error import HTTPError from ..utils.helpers import proxy from bs4 import BeautifulSoup import json +import logging + +logger = logging.getLogger(__name__) def init_contest_routes(app): @@ -14,18 +17,27 @@ def init_contest_routes(app): page = request.args.get("page", default=1, type=int) offset = (page - 1) * limit + logger.debug(f"Fetching contest archive page {page} with limit {limit}") + try: # Fetch data using urlopen url = f"https://www.instructables.com/json-api/getClosedContests?limit={limit}&offset={offset}" + logger.debug(f"Making request to {url}") response = urlopen(url) data = json.loads(response.read().decode()) + logger.debug( + f"Received contest archive data with {len(data.get('contests', []))} contests" + ) except HTTPError as e: + logger.error(f"HTTP error fetching contest archive: {e.code}") abort(e.code) except Exception as e: + logger.error(f"Error fetching contest archive: {str(e)}") abort(500) # Handle other exceptions like JSON decode errors contests = data.get("contests", []) full_list_size = data.get("fullListSize", 0) + logger.debug(f"Total contests in archive: {full_list_size}") contest_list = [] for contest in contests: @@ -42,6 +54,7 @@ def init_contest_routes(app): # Calculate total pages total_pages = (full_list_size + limit - 1) // limit + logger.debug(f"Pagination: page {page}/{total_pages}") # Create pagination pagination = { @@ -66,16 +79,22 @@ def init_contest_routes(app): page, per_page = 1, 100 all_entries = [] + logger.debug(f"Fetching entries for contest: {contest}") + while True: try: url = f"{base_url}?q=*&filter_by=contestPath:{contest}&sort_by=contestEntryDate:desc&per_page={per_page}&page={page}" + logger.debug(f"Making request to {url} (page {page})") request = Request(url, headers=headers) response = urlopen(request) data = json.loads(response.read().decode()) except HTTPError as e: + logger.error(f"HTTP error fetching contest entries: {e.code}") abort(e.code) hits = data.get("hits", []) + logger.debug(f"Received {len(hits)} entries on page {page}") + if not hits: break @@ -84,10 +103,13 @@ def init_contest_routes(app): break page += 1 + logger.debug(f"Total entries fetched: {len(all_entries)}") return all_entries @app.route("/contest//") def route_contest(contest): + logger.debug(f"Fetching contest page for: {contest}") + try: data = urlopen(f"https://www.instructables.com/contest/{contest}/") html = data.read().decode() @@ -95,13 +117,19 @@ def init_contest_routes(app): title_tag = soup.find("h1") title = title_tag.get_text() if title_tag else "Contest" + logger.debug(f"Contest title: {title}") img_tag = soup.find("img", alt=lambda x: x and "Banner" in x) img = img_tag.get("src") if img_tag else "default.jpg" - entry_count = len(get_entries(contest)) + logger.debug(f"Fetching entries for contest: {contest}") + entries = get_entries(contest) + entry_count = len(entries) + logger.debug(f"Found {entry_count} entries") + prizes_items = soup.select("article") prizes = len(prizes_items) if prizes_items else 0 + logger.debug(f"Found {prizes} prizes") overview_section = soup.find("section", id="overview") info = ( @@ -111,10 +139,10 @@ def init_contest_routes(app): ) except HTTPError as e: + logger.error(f"HTTP error fetching contest page: {e.code}") abort(e.code) entry_list = [] - entries = get_entries(contest) for entry in entries: doc = entry["document"] entry_details = { @@ -141,18 +169,25 @@ def init_contest_routes(app): @app.route("/contest/") def route_contests(): + logger.debug("Fetching current contests") + try: # Fetch current contests from the JSON API response = urlopen( "https://www.instructables.com/json-api/getCurrentContests?limit=50&offset=0" ) data = json.loads(response.read().decode()) + logger.debug(f"Received current contests data") except HTTPError as e: + logger.error(f"HTTP error fetching current contests: {e.code}") abort(e.code) except Exception as e: + logger.error(f"Error fetching current contests: {str(e)}") abort(500) # Handle other exceptions such as JSON decode errors contests = data.get("contests", []) + logger.debug(f"Found {len(contests)} current contests") + contest_list = [] for contest in contests: contest_details = { diff --git a/src/structables/routes/main.py b/src/structables/routes/main.py index 0a52249..d79f84b 100644 --- a/src/structables/routes/main.py +++ b/src/structables/routes/main.py @@ -1,4 +1,4 @@ -from flask import render_template, abort +from flask import render_template, abort, request from urllib.request import urlopen from urllib.error import HTTPError from bs4 import BeautifulSoup @@ -8,18 +8,25 @@ from markdown2 import Markdown from traceback import print_exc import pathlib import json +import logging from ..utils.data import update_data from ..utils.helpers import explore_lists, proxy from .category import project_list +logger = logging.getLogger(__name__) + def init_main_routes(app): @app.route("/") def route_explore(): + logger.debug("Rendering explore page") + try: + logger.debug("Fetching data from instructables.com") data = urlopen("https://www.instructables.com/") except HTTPError as e: + logger.error(f"HTTP error fetching explore page: {e.code}") abort(e.code) soup = BeautifulSoup(data.read().decode(), "html.parser") @@ -27,7 +34,9 @@ def init_main_routes(app): explore = soup.select(".home-content-explore-wrap")[0] title = explore.select("h2")[0].text + logger.debug(f"Explore page title: {title}") + logger.debug("Parsing category sections") circuits = explore_lists( explore.select(".home-content-explore-category-circuits")[0] ) @@ -48,6 +57,8 @@ def init_main_routes(app): explore.select(".home-content-explore-category-teachers")[0] ) + logger.debug("Rendering explore page template") + return render_template( "index.html", title=title, @@ -65,9 +76,15 @@ def init_main_routes(app): @app.route("/sitemap/") @app.route("/sitemap/") def route_sitemap(path=""): + logger.debug(f"Rendering sitemap for path: {path}") + try: + logger.debug( + f"Fetching sitemap data from instructables.com for path: {path}" + ) data = urlopen("https://www.instructables.com/sitemap/" + path) except HTTPError as e: + logger.error(f"HTTP error fetching sitemap: {e.code}") abort(e.code) soup = BeautifulSoup(data.read().decode(), "html.parser") @@ -77,6 +94,7 @@ def init_main_routes(app): group_section = main.select("div.group-section") if group_section: + logger.debug(f"Found {len(group_section)} group sections") groups = [] for group in group_section: category = group.select("h2 a")[0].text @@ -87,8 +105,10 @@ def init_main_routes(app): channel_link = li.a["href"] channels.append([channel, channel_link]) groups.append([category, category_link, channels]) + logger.debug(f"Added group {category} with {len(channels)} channels") else: + logger.debug("No group sections found, using flat list") groups = [] channels = [] for li in main.select("ul.sitemap-listing li"): @@ -100,17 +120,23 @@ def init_main_routes(app): channels.append([channel, channel_link]) groups.append(["", "", channels]) + logger.debug(f"Added flat list with {len(channels)} channels") return render_template("sitemap.html", title="Sitemap", groups=groups) @app.route("/
/") def route_article(article): + logger.debug(f"Rendering article page for: {article}") + try: + logger.debug(f"Fetching article data from instructables.com for: {article}") data = urlopen( f"https://www.instructables.com/json-api/showInstructableModel?urlString={article}" ) data = json.loads(data.read().decode()) + logger.debug(f"Successfully fetched article data") except HTTPError as e: + logger.error(f"HTTP error fetching article: {e.code}") abort(e.code) try: @@ -127,16 +153,21 @@ def init_main_routes(app): views = data["views"] favorites = data["favorites"] + logger.debug(f"Article: {title} by {author} in {category}/{channel}") + if "steps" in data: + logger.debug(f"Article has {len(data['steps'])} steps") steps = [] if "supplies" in data: supplies = data["supplies"] + logger.debug("Article has supplies section") supplies_files = [] if "suppliesFiles" in data: supplies_files = data["suppliesFiles"] + logger.debug(f"Article has {len(supplies_files)} supply files") data["steps"].insert( 1, @@ -149,20 +180,68 @@ def init_main_routes(app): for step in data["steps"]: step_title = step["title"] + logger.debug(f"Processing step: {step_title}") + logger.debug(f"{step}") # TODO: Remove this line step_imgs = [] - step_videos = [] # TODO: Check if this is still required step_iframes = [] step_downloads = [] for file in step["files"]: - if file["image"] and "embedType" not in "file": - step_imgs.append( - { - "src": proxy(file["downloadUrl"], file["name"]), - "alt": file["name"], - } - ) + if file["image"]: + if "embedType" not in "file": + step_imgs.append( + { + "src": proxy(file["downloadUrl"], file["name"]), + "alt": file["name"], + } + ) + if file["embedType"] == "VIDEO": + embed_html_code = file["embedHtmlCode"] + soup = BeautifulSoup(embed_html_code, "html.parser") + if soup.select("iframe"): + src = soup.select("iframe")[0].get("src") + width = soup.select("iframe")[0].get("width") + height = soup.select("iframe")[0].get("height") + logger.debug( + f"Processing video iframe with src: {src}" + ) + + if src.startswith( + "https://content.instructables.com" + ): + src = src.replace( + "https://content.instructables.com", + f"/proxy/?url={src}", + ) + logger.debug( + f"Proxying instructables content: {src}" + ) + + elif app.config["INVIDIOUS"] and src.startswith( + "https://www.youtube.com" + ): + src = src.replace( + "https://www.youtube.com", + app.config["INVIDIOUS"], + ) + logger.debug( + f"Using Invidious for YouTube: {src}" + ) + + elif not app.config["UNSAFE"]: + src = "/iframe/?url=" + quote(src) + logger.debug( + f"Using iframe wrapper for safety: {src}" + ) + + step_iframes.append( + { + "src": src, + "width": width, + "height": height, + } + ) elif not file["image"]: if "downloadUrl" in file.keys(): @@ -180,12 +259,16 @@ def init_main_routes(app): iframe = soup.select("iframe")[0] src = iframe.get("src") + logger.debug(f"Processing iframe with src: {src}") if src.startswith("https://content.instructables.com"): src = src.replace( "https://content.instructables.com", f"/proxy/?url={src}", ) + logger.debug( + f"Proxying instructables content: {src}" + ) elif app.config["INVIDIOUS"] and src.startswith( "https://www.youtube.com" @@ -194,9 +277,13 @@ def init_main_routes(app): "https://www.youtube.com", app.config["INVIDIOUS"], ) + logger.debug(f"Using Invidious for YouTube: {src}") elif not app.config["UNSAFE"]: src = "/iframe/?url=" + quote(src) + logger.debug( + f"Using iframe wrapper for safety: {src}" + ) step_iframes.append( { @@ -211,12 +298,16 @@ def init_main_routes(app): "https://content.instructables.com", "/proxy/?url=https://content.instructables.com", ) + + logger.debug( + f"Step {step_title}: {len(step_imgs)} images, {len(step_iframes)} iframes, {len(step_downloads)} downloads" + ) + steps.append( { "title": step_title, "imgs": step_imgs, "text": step_text, - "videos": step_videos, "iframes": step_iframes, "downloads": step_downloads, } @@ -227,42 +318,7 @@ def init_main_routes(app): # TODO: Fix comments - # comments = body.select("section.discussion")[0] - - # comment_count = comments.select("h2")[0].text - # comment_list = comments.select("div.posts") - - # if comment_list != []: - # comment_list = comment_list[0] - # comments_list = [] - # replies_used = 0 - # for comment in comment_list.select(".post.js-comment:not(.reply)"): - # comment_votes = comment.select(".votes")[0].text - # comment_author_img_src = proxy(comment.select(".avatar a noscript img")[0].get("src")) - # comment_author_img_alt = comment.select(".avatar a noscript img")[0].get("alt") - # comment_author = comment.select(".posted-by a")[0].text - # comment_author_link = comment.select(".posted-by a")[0].get("href") - # comment_date = comment.select(".posted-by p.posted-date")[0].text - # comment_text = comment.select("div.text p")[0] - # comment_reply_count = comment.select("button.js-show-replies") - # if comment_reply_count != []: - # comment_reply_count = comment_reply_count[0].get("data-num-hidden") - # else: - # comment_reply_count = 0 - # reply_list = [] - # for index, reply in enumerate(comment_list.select(".post.js-comment:not(.reply) ~ .post.js-comment.reply.hide:has(~.post.js-comment:not(.reply))")[replies_used:int(comment_reply_count) + replies_used]): - # reply_votes = reply.select(".votes")[0].text - # reply_author_img_src = proxy(reply.select(".avatar a noscript img")[0].get("src")) - # reply_author_img_alt = reply.select(".avatar a noscript img")[0].get("alt") - # reply_author = reply.select(".posted-by a")[0].text - # reply_author_link = reply.select(".posted-by a")[0].get("href") - # reply_date = reply.select(".posted-by p.posted-date")[0].text - # reply_text = reply.select("div.text p")[0] - - # reply_list.append([reply_votes, reply_author_img_src, reply_author_img_alt, reply_author, reply_author_link, reply_date, reply_text]) - # replies_used += 1 - - # comments_list.append([comment_votes, comment_author_img_src, comment_author_img_alt, comment_author, comment_author_link, comment_date, comment_text, comment_reply_count, reply_list]) + logger.debug(f"Rendering article template with {len(steps)} steps") return render_template( "article.html", title=title, @@ -281,6 +337,7 @@ def init_main_routes(app): ) else: ## Collections + logger.debug("Article is a collection") thumbnails = [] for thumbnail in data["instructables"]: text = thumbnail["title"] @@ -310,6 +367,7 @@ def init_main_routes(app): } ) + logger.debug(f"Collection has {len(thumbnails)} items") return render_template( "collection.html", title=title, @@ -324,16 +382,25 @@ def init_main_routes(app): thumbnails=thumbnails, ) - except Exception: + except Exception as e: + logger.error(f"Error processing article: {str(e)}") print_exc() raise InternalServerError() @app.route("/search", methods=["POST", "GET"]) def route_search(): + if request.method == "POST": + query = request.form.get("q", "") + logger.debug(f"Search request (POST) for: {query}") + else: + query = request.args.get("q", "") + logger.debug(f"Search request (GET) for: {query}") + return project_list(app, "Search") @app.route("/cron/") def cron(): + logger.debug("Manual cron update triggered") update_data(app) return "OK" @@ -345,27 +412,33 @@ def init_main_routes(app): `STRUCTABLES_PRIVACY_FILE` environment variable. If that variable is unset or the file cannot be read, a default message is displayed. """ + logger.debug("Rendering privacy policy page") content = "No privacy policy found." path = app.config.get("PRIVACY_FILE") + logger.debug(f"Privacy policy file path: {path}") if not path: if pathlib.Path("privacy.md").exists(): path = "privacy.md" - + logger.debug("Found privacy.md in working directory") elif pathlib.Path("privacy.txt").exists(): path = "privacy.txt" + logger.debug("Found privacy.txt in working directory") if path: try: + logger.debug(f"Reading privacy policy from {path}") with pathlib.Path(path).open() as f: content = f.read() if path.endswith(".md"): + logger.debug("Converting Markdown to HTML") content = Markdown().convert(content) - except OSError: + except OSError as e: + logger.error(f"Error reading privacy policy file: {str(e)}") pass return render_template( @@ -374,16 +447,20 @@ def init_main_routes(app): @app.errorhandler(404) def not_found(e): + logger.warning(f"404 error: {request.path}") return render_template("404.html"), 404 @app.errorhandler(400) def bad_request(e): + logger.warning(f"400 error: {request.path}") return render_template("400.html"), 400 @app.errorhandler(429) def too_many_requests(e): + logger.warning(f"429 error: {request.path}") return render_template("429.html"), 429 @app.errorhandler(500) def internal_server_error(e): + logger.error(f"500 error: {request.path}") return render_template("500.html"), 500 diff --git a/src/structables/routes/member.py b/src/structables/routes/member.py index 2567af1..03cc5b3 100644 --- a/src/structables/routes/member.py +++ b/src/structables/routes/member.py @@ -5,7 +5,9 @@ from urllib.parse import quote from ..utils.helpers import proxy, member_header from bs4 import BeautifulSoup from urllib.request import Request +import logging +logger = logging.getLogger(__name__) def init_member_routes(app): """This function initializes all the routes related to Instructables member profiles. @@ -24,20 +26,23 @@ def init_member_routes(app): Returns: Response: The rendered HTML page. """ - + logger.debug(f"Fetching instructables for member: {member}") member = quote(member) try: + logger.debug(f"Making request to https://www.instructables.com/member/{member}/instructables/") data = urlopen( f"https://www.instructables.com/member/{member}/instructables/" ) except HTTPError as e: + logger.error(f"HTTP error fetching member instructables: {e.code}") abort(e.code) soup = BeautifulSoup(data.read().decode(), "html.parser") header = soup.select(".profile-header.profile-header-social")[0] header_content = member_header(header) + logger.debug(f"Parsed member header for {header_content['title']}") ibles = soup.select("ul.ible-list-items")[0] ible_list = [] @@ -63,6 +68,8 @@ def init_member_routes(app): "favorites": favorites, } ) + + logger.debug(f"Found {len(ible_list)} instructables for member {member}") return render_template( "member-instructables.html", @@ -81,19 +88,22 @@ def init_member_routes(app): Returns: Response: The rendered HTML page. """ - + logger.debug(f"Fetching profile for member: {member}") member = quote(member) request = Request(f"https://www.instructables.com/member/{member}/") try: + logger.debug(f"Making request to https://www.instructables.com/member/{member}/") data = urlopen(request) except HTTPError as e: + logger.error(f"HTTP error fetching member profile: {e.code}") abort(e.code) soup = BeautifulSoup(data.read().decode(), "html.parser") header_content = member_header(soup) + logger.debug(f"Parsed member header for {header_content['title']}") body = soup.select("div.member-profile-body")[0] @@ -105,12 +115,16 @@ def init_member_routes(app): if ible_list != []: ible_list = ible_list[0] ible_list_title = ible_list.select("h2.module-title")[0].text + logger.debug(f"Found promoted content: {ible_list_title}") + for ible in ible_list.select("ul.promoted-items li"): ible_title = ible.get("data-title") ible_link = ible.select("div.image-wrapper")[0].a.get("href") ible_img = proxy(ible.select("div.image-wrapper a img")[0].get("src")) ibles.append({"title": ible_title, "link": ible_link, "img": ible_img}) + + logger.debug(f"Found {len(ibles)} promoted instructables") ach_list = body.select( "div.two-col-section div.right-col-section.centered-sidebar div.boxed-content.about-me" @@ -122,6 +136,8 @@ def init_member_routes(app): if len(ach_list) > 1: ach_list = ach_list[1] ach_list_title = ach_list.select("h2.module-title")[0].text + logger.debug(f"Found achievements section: {ach_list_title}") + for ach in ach_list.select( "div.achievements-section.main-achievements.contest-achievements div.achievement-item:not(.two-column-filler)" ): @@ -134,7 +150,10 @@ def init_member_routes(app): )[0].text achs.append([ach_title, ach_desc]) except IndexError: + logger.warning("Failed to parse an achievement item") pass + + logger.debug(f"Found {len(achs)} achievements") return render_template( "member.html", @@ -144,4 +163,4 @@ def init_member_routes(app): ibles=ibles, ach_list_title=ach_list_title, achs=achs, - ) + ) \ No newline at end of file diff --git a/src/structables/routes/proxy.py b/src/structables/routes/proxy.py index 92087c9..bf93073 100644 --- a/src/structables/routes/proxy.py +++ b/src/structables/routes/proxy.py @@ -3,38 +3,51 @@ from werkzeug.exceptions import BadRequest, InternalServerError from urllib.parse import unquote from urllib.error import HTTPError from urllib.request import urlopen +import logging +logger = logging.getLogger(__name__) def init_proxy_routes(app): @app.route("/proxy/") def route_proxy(): url = request.args.get("url") filename = request.args.get("filename") + + logger.debug(f"Proxy request for URL: {url}, filename: {filename}") if url is not None: if url.startswith("https://cdn.instructables.com/") or url.startswith( "https://content.instructables.com/" ): + logger.debug(f"Valid proxy URL: {url}") def generate(): # Subfunction to allow streaming the data instead of # downloading all of it at once try: + logger.debug(f"Opening connection to {url}") with urlopen(unquote(url)) as data: + logger.debug("Connection established, streaming data") while True: chunk = data.read(1024 * 1024) if not chunk: break yield chunk + logger.debug("Finished streaming data") except HTTPError as e: + logger.error(f"HTTP error during streaming: {e.code}") abort(e.code) try: + logger.debug(f"Getting content type for {url}") with urlopen(unquote(url)) as data: content_type = data.headers["content-type"] + logger.debug(f"Content type: {content_type}") except HTTPError as e: + logger.error(f"HTTP error getting content type: {e.code}") abort(e.code) except KeyError: + logger.error("Content-Type header missing") raise InternalServerError() headers = dict() @@ -43,18 +56,25 @@ def init_proxy_routes(app): headers["Content-Disposition"] = ( f'attachment; filename="{filename}"' ) + logger.debug(f"Added Content-Disposition header for {filename}") return Response(generate(), content_type=content_type, headers=headers) else: + logger.warning(f"Invalid proxy URL: {url}") raise BadRequest() else: + logger.warning("No URL provided for proxy") raise BadRequest() @app.route("/iframe/") def route_iframe(): url = request.args.get("url") url = unquote(url) + + logger.debug(f"iframe request for URL: {url}") + if url is not None: return render_template("iframe.html", url=url) else: - raise BadRequest() + logger.warning("No URL provided for iframe") + raise BadRequest() \ No newline at end of file diff --git a/src/structables/static/css/iframe.css b/src/structables/static/css/iframe.css new file mode 100644 index 0000000..2a28d26 --- /dev/null +++ b/src/structables/static/css/iframe.css @@ -0,0 +1,114 @@ +/* Styles for the blocked iframe page */ +body { + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen-Sans, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; + line-height: 1.6; + color: #333; + background-color: #f5f5f5; + margin: 0; + padding: 20px; + text-align: center; + max-width: 800px; + margin: 0 auto; + } + + /* Dark mode support */ + @media (prefers-color-scheme: dark) { + body { + background-color: #121212; + color: #e0e0e0; + } + + a { + color: #4d9dff; + } + + a:hover { + color: #77b6ff; + } + + .warning-box { + background-color: #1e1e1e; + border-color: #444; + } + + .warning-icon { + color: #ffd04d; + } + + .action-button { + background-color: #ff8c3f; + } + + .action-button:hover { + background-color: #ff6b00; + } + } + + h1 { + font-size: 24px; + margin-bottom: 15px; + color: #ff6b00; + } + + p { + margin-bottom: 15px; + } + + a { + color: #0066cc; + text-decoration: none; + } + + a:hover { + text-decoration: underline; + color: #004080; + } + + .warning-box { + background-color: #fff; + border: 1px solid #ddd; + border-radius: 8px; + padding: 20px; + margin: 30px auto; + max-width: 600px; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); + } + + .warning-icon { + font-size: 48px; + color: #ffc107; + margin-bottom: 15px; + } + + .url-display { + background-color: rgba(0, 0, 0, 0.05); + padding: 10px; + border-radius: 4px; + word-break: break-all; + margin: 15px 0; + font-family: monospace; + font-size: 14px; + } + + .action-button { + display: inline-block; + background-color: #ff6b00; + color: white; + padding: 10px 20px; + border-radius: 4px; + margin-top: 15px; + font-weight: bold; + transition: background-color 0.2s; + } + + .action-button:hover { + background-color: #e05e00; + text-decoration: none; + color: white; + } + + .footer { + margin-top: 30px; + font-size: 12px; + color: #666; + } \ No newline at end of file diff --git a/src/structables/static/css/style.css b/src/structables/static/css/style.css index aa8d151..ce2fded 100644 --- a/src/structables/static/css/style.css +++ b/src/structables/static/css/style.css @@ -1,5 +1,6 @@ -/* Base styles */ +/* Theme Variables */ :root { + /* Light theme (default) */ --primary-color: #ff6b00; --secondary-color: #444; --text-color: #333; @@ -12,13 +13,67 @@ --success-color: #28a745; --error-color: #dc3545; --warning-color: #ffc107; + --card-bg: #fff; + --header-bg: #f5f5f5; + --footer-bg: #f5f5f5; + --shadow-color: rgba(0, 0, 0, 0.1); --font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen-Sans, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; } +/* Dark theme */ +[data-theme="dark"] { + --primary-color: #ff8c3f; + --secondary-color: #aaa; + --text-color: #e0e0e0; + --light-text: #aaa; + --bg-color: #121212; + --light-bg: #1e1e1e; + --border-color: #444; + --link-color: #4d9dff; + --link-hover: #77b6ff; + --success-color: #3dd06c; + --error-color: #ff5c5c; + --warning-color: #ffd04d; + --card-bg: #1e1e1e; + --header-bg: #1a1a1a; + --footer-bg: #1a1a1a; + --shadow-color: rgba(0, 0, 0, 0.3); +} + +/* Theme transition */ * { - box-sizing: border-box; - margin: 0; - padding: 0; + transition: background-color 0.3s ease, color 0.3s ease, border-color 0.3s ease, box-shadow 0.3s ease; +} + +.moon-icon, +.sun-icon { + width: 16px; + height: 16px; + display: inline-block; +} + +.moon-icon { + background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 512 512'%3E%3Cpath fill='%23000000' d='M283.211 512c78.962 0 151.079-35.925 198.857-94.792 7.068-8.708-.639-21.43-11.562-19.35-124.203 23.654-238.262-71.576-238.262-196.954 0-72.222 38.662-138.635 101.498-174.394 9.686-5.512 7.25-20.197-3.756-22.23A258.156 258.156 0 0 0 283.211 0c-141.309 0-256 114.511-256 256 0 141.309 114.511 256 256 256z'%3E%3C/path%3E%3C/svg%3E"); +} + +.sun-icon { + background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 512 512'%3E%3Cpath fill='%23000000' d='M256 160c-52.9 0-96 43.1-96 96s43.1 96 96 96 96-43.1 96-96-43.1-96-96-96zm246.4 80.5l-94.7-47.3 33.5-100.4c4.5-13.6-8.4-26.5-21.9-21.9l-100.4 33.5-47.4-94.8c-6.4-12.8-24.6-12.8-31 0l-47.3 94.7L92.7 70.8c-13.6-4.5-26.5 8.4-21.9 21.9l33.5 100.4-94.7 47.4c-12.8 6.4-12.8 24.6 0 31l94.7 47.3-33.5 100.5c-4.5 13.6 8.4 26.5 21.9 21.9l100.4-33.5 47.3 94.7c6.4 12.8 24.6 12.8 31 0l47.3-94.7 100.4 33.5c13.6 4.5 26.5-8.4 21.9-21.9l-33.5-100.4 94.7-47.3c13-6.5 13-24.7.2-31.1zm-155.9 106c-49.9 49.9-131.1 49.9-181 0-49.9-49.9-49.9-131.1 0-181 49.9-49.9 131.1-49.9 181 0 49.9 49.9 49.9 131.1 0 181z'%3E%3C/path%3E%3C/svg%3E"); +} + +[data-theme="dark"] .moon-icon { + filter: invert(1); +} + +[data-theme="dark"] .sun-icon { + filter: invert(1); +} + +/* Base styles */ +html, +body { + overflow-x: hidden; + width: 100%; + position: relative; } body { @@ -45,12 +100,18 @@ img { height: auto; } +[data-theme="dark"] img { + filter: brightness(0.9); + /* Slightly reduce brightness for better contrast */ +} + /* Layout */ .container { width: 100%; max-width: 1200px; margin: 0 auto; padding: 0 15px; + box-sizing: border-box; } main { @@ -171,7 +232,7 @@ p { /* Header & Navigation */ header { - background-color: var(--light-bg); + background-color: var(--header-bg); padding: 1rem 0; border-bottom: 1px solid var(--border-color); } @@ -222,6 +283,8 @@ header { .search-input { padding: 0.5rem; + color: var(--text-color); + background-color: var(--bg-color); border: 1px solid var(--border-color); border-radius: 4px 0 0 4px; font-size: 1rem; @@ -234,19 +297,26 @@ header { border: none; border-radius: 0 4px 4px 0; cursor: pointer; + display: flex; + align-items: center; + justify-content: center; } .search-button:hover { background-color: var(--link-hover); } +.search-button img { + filter: brightness(0) invert(1); +} + /* Cards */ .card { border: 1px solid var(--border-color); border-radius: 4px; margin-bottom: 1rem; - background-color: var(--bg-color); - box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); + background-color: var(--card-bg); + box-shadow: 0 2px 4px var(--shadow-color); display: flex; flex-direction: column; height: 100%; @@ -256,14 +326,18 @@ header { .card-img-top { width: 100%; - height: 200px; + height: auto; object-fit: cover; border-top-left-radius: 4px; border-top-right-radius: 4px; } +[data-theme="dark"] .card-img-top { + opacity: 0.9; + /* Slightly reduce opacity for better contrast */ +} + .card-body { - padding: 1rem; flex: 1; display: flex; flex-direction: column; @@ -282,7 +356,7 @@ header { .card-text { color: var(--light-text); - margin-bottom: 0.5rem; + margin-bottom: 0; /* Limit to 3 lines of text */ display: -webkit-box; -webkit-line-clamp: 3; @@ -292,7 +366,7 @@ header { } .card-footer { - padding: 1rem; + padding-bottom: 1rem; background-color: var(--light-bg); border-top: 1px solid var(--border-color); margin-top: auto; @@ -373,8 +447,8 @@ header { } .btn-primary:hover { - background-color: #e06000; - border-color: #e06000; + background-color: var(--link-hover); + border-color: var(--link-hover); } .btn-outline-success { @@ -395,7 +469,7 @@ header { border-color: var(--primary-color); } -.btn-outline-primary:hover, +.btn-outline-primary:hover, .btn-outline-primary.active { color: #fff; background-color: var(--primary-color); @@ -653,35 +727,232 @@ header { } /* Step sections in articles */ +.step-images { + display: flex; + flex-wrap: wrap; + gap: 0.5rem; + margin-bottom: 1rem; + justify-content: center; + width: 100%; + box-sizing: border-box; +} + +.step-images .col-md-4 { + flex: 0 0 auto; + width: 100%; + box-sizing: border-box; + padding: 0; +} + +@media (min-width: 576px) { + .step-images { + gap: 1rem; + } + + .step-images .col-md-4 { + max-width: 350px; + } +} + +.step-images img { + width: 100%; + max-width: 100%; + height: auto; + border-radius: 0.25rem; + box-shadow: 0 2px 4px var(--shadow-color); + box-sizing: border-box; + object-fit: contain; +} + +.step-images img:hover { + transform: scale(1.02); +} + .step-section { margin-bottom: 2rem; - padding: 1.5rem; + padding: 1rem; border: 1px solid var(--border-color); - border-radius: 4px; - background-color: var(--light-bg); + border-radius: 0.5rem; + background-color: var(--card-bg); + box-shadow: 0 2px 8px var(--shadow-color); + width: 100%; + box-sizing: border-box; +} + +@media (min-width: 768px) { + .step-section { + max-width: 1000px; + padding: 1.5rem; + margin-left: auto; + margin-right: auto; + } +} + +.step-section img { + max-width: 100%; + height: auto; + box-sizing: border-box; } .step-header { - margin-bottom: 1rem; - padding-bottom: 0.5rem; + margin-bottom: 1.5rem; + padding-bottom: 0.75rem; border-bottom: 1px solid var(--border-color); } -.step-images, +.step-header h2 { + margin-bottom: 0; + color: var(--primary-color); +} + +/* Hide step-header when h2 is empty */ +.step-header:has(h2:empty), +.step-header h2:empty { + display: none; +} + +/* Step parts */ +.step-text { + line-height: 1.7; + margin-bottom: 1rem; + width: 100%; + box-sizing: border-box; +} + +.step-text *:not(img):not(iframe):not(embed):not(object):not(video) { + max-width: 100%; + box-sizing: border-box; + word-wrap: break-word; + overflow-wrap: break-word; +} + .step-videos, .step-iframes { - display: flex; - flex-wrap: wrap; - gap: 1rem; margin-bottom: 1.5rem; } .step-downloads { - margin-top: 1.5rem; - padding-top: 1rem; + width: 100%; + box-sizing: border-box; + margin-top: 1rem; + padding-top: 0.5rem; border-top: 1px solid var(--border-color); } +.step-downloads .row { + width: 100%; + margin: 0; + box-sizing: border-box; +} + +.step-downloads .col-md-2 { + padding: 0 0.25rem; + margin-bottom: 0.5rem; + box-sizing: border-box; +} + +@media (min-width: 768px) { + .step-downloads { + margin-top: 1.5rem; + padding-top: 1rem; + } +} + +.step-downloads h3 { + margin-bottom: 0.5rem; + font-size: 1.25rem; + color: var(--primary-color); +} + +@media (min-width: 768px) { + .step-downloads h3 { + margin-bottom: 1rem; + } +} + +.step-downloads .col-md-2 { + flex: 0 0 auto; + width: 100%; + margin-bottom: 0.5rem; +} + +@media (min-width: 576px) { + .step-downloads .col-md-2 { + width: 50%; + } +} + +@media (min-width: 768px) { + .step-downloads .col-md-2 { + width: 33.333%; + } +} + +@media (min-width: 992px) { + .step-downloads .col-md-2 { + width: 16.666%; + } +} + +.step-iframes { + width: 100%; + box-sizing: border-box; + margin-bottom: 1rem; +} + +.step-iframes .col-md-8 { + width: 100%; + box-sizing: border-box; + padding: 0; +} + +iframe, +embed, +object, +video { + max-width: 100%; + box-sizing: border-box; +} + +@media (min-width: 768px) { + .step-iframes { + margin-bottom: 1.5rem; + } + + .step-iframes .col-md-8 { + max-width: 800px; + margin-left: auto; + margin-right: auto; + } +} + +.step-iframes iframe { + max-width: 100%; + width: 100%; + box-sizing: border-box; + border: 1px solid var(--border-color); + border-radius: 0.25rem; +} + +@media (min-width: 768px) { + .step-iframes iframe { + max-height: 450px; + height: 450px; + } +} + +[data-theme="dark"] .step-section { + background-color: var(--card-bg); +} + +[data-theme="dark"] .step-header h2 { + color: var(--primary-color); +} + +[data-theme="dark"] .step-downloads h3 { + color: var(--primary-color); +} + /* Contest lists */ .contest-list { display: flex; @@ -771,12 +1042,67 @@ header { /* Footer */ footer { - background-color: var(--light-bg); + background-color: var(--footer-bg); padding: 2rem 0; margin-top: 3rem; border-top: 1px solid var(--border-color); } +/* Theme Toggle Switch */ +.theme-switch-wrapper { + display: flex; + align-items: center; + margin-left: 1rem; +} + +.theme-switch { + display: inline-block; + height: 24px; + position: relative; + width: 50px; +} + +.theme-switch input { + display: none; +} + +.slider { + background-color: #ccc; + bottom: 0; + cursor: pointer; + left: 0; + position: absolute; + right: 0; + top: 0; + transition: .4s; + border-radius: 34px; +} + +.slider:before { + background-color: white; + bottom: 4px; + content: ""; + height: 16px; + left: 4px; + position: absolute; + transition: .4s; + width: 16px; + border-radius: 50%; +} + +input:checked+.slider { + background-color: var(--primary-color); +} + +input:checked+.slider:before { + transform: translateX(26px); +} + +.theme-icon { + margin-right: 5px; + font-size: 1.2rem; +} + /* Error pages */ .error-page { text-align: center; diff --git a/src/structables/templates/base.html b/src/structables/templates/base.html index bc98b40..84a3637 100644 --- a/src/structables/templates/base.html +++ b/src/structables/templates/base.html @@ -1,5 +1,6 @@ - + @@ -18,5 +19,40 @@ {% block content %}{% endblock %} {% include "footer.html" %} + {% if config["THEME"] == "auto" %} + + {% endif %} diff --git a/src/structables/templates/header.html b/src/structables/templates/header.html index 3e27304..795782f 100644 --- a/src/structables/templates/header.html +++ b/src/structables/templates/header.html @@ -20,6 +20,16 @@ + {% if config["THEME"] == "auto" %} + + {% endif %}
+ + - iframe content + + + External Content Blocked + -

Blocked iframe

-

This page contains content from outside Instructables.com. This was blocked for your safety.

-

It tries to load the following URL:

-

{{ url | safe }}

-

Click here to load the content.

+
+
⚠️
+

External Content Blocked

+

This page contains content from an external website that was blocked for your safety.

+

The content is trying to load from:

+
{{ url | safe }}
+

If you trust this source and want to proceed, you can:

+ Load External Content +
+ \ No newline at end of file diff --git a/src/structables/utils/data.py b/src/structables/utils/data.py index f713370..6391e76 100644 --- a/src/structables/utils/data.py +++ b/src/structables/utils/data.py @@ -3,54 +3,74 @@ import logging from bs4 import BeautifulSoup from .helpers import proxy, projects_search -logging.basicConfig(level=logging.DEBUG) - +logger = logging.getLogger(__name__) def update_data(app): - logging.debug("Updating data...") + """Update the application's cached data. + + This function fetches fresh data from Instructables.com and updates + the app's global cache. + + Args: + app: The Flask app instance. + """ + logger.debug("Starting data update") channels = [] try: app.global_ibles except AttributeError: + logger.debug("Initializing global_ibles dictionary") app.global_ibles = {} - sitemap_data = urlopen("https://www.instructables.com/sitemap/") - sitemap_soup = BeautifulSoup(sitemap_data.read().decode(), "html.parser") - main = sitemap_soup.select("div.sitemap-content")[0] + try: + logger.debug("Fetching sitemap data from instructables.com") + sitemap_data = urlopen("https://www.instructables.com/sitemap/") + sitemap_soup = BeautifulSoup(sitemap_data.read().decode(), "html.parser") + main = sitemap_soup.select("div.sitemap-content")[0] - for group in main.select("div.group-section"): - channels.append(group.select("h2 a")[0].text.lower()) + for group in main.select("div.group-section"): + channels.append(group.select("h2 a")[0].text.lower()) + + logger.debug(f"Found {len(channels)} channels in sitemap") - app.global_ibles["/projects"] = [] - project_ibles, total = projects_search(app, filter_by="featureFlag:=true") + logger.debug("Fetching featured projects") + app.global_ibles["/projects"] = [] + project_ibles, total = projects_search(app, filter_by="featureFlag:=true") + + logger.debug(f"Found {len(project_ibles)} featured projects") - while len(app.global_ibles["/projects"]) <= 0: - for ible in project_ibles: - link = f"/{ible['document']['urlString']}" - img = proxy(ible["document"]["coverImageUrl"]) + while len(app.global_ibles["/projects"]) <= 0: + for ible in project_ibles: + link = f"/{ible['document']['urlString']}" + img = proxy(ible['document']['coverImageUrl']) - title = ible["document"]["title"] - author = ible["document"]["screenName"] - author_link = f"/member/{author}" + title = ible['document']['title'] + author = ible['document']['screenName'] + author_link = f"/member/{author}" - channel = ible["document"]["primaryClassification"] - channel_link = f"/channel/{channel}" + channel = ible['document']['primaryClassification'] + channel_link = f"/channel/{channel}" - views = ible["document"]["views"] - favorites = ible["document"]["favorites"] + views = ible['document']['views'] + favorites = ible['document']['favorites'] - app.global_ibles["/projects"].append( - { - "link": link, - "img": img, - "title": title, - "author": author, - "author_link": author_link, - "channel": channel, - "channel_link": channel_link, - "views": views, - "favorites": favorites, - } - ) + app.global_ibles["/projects"].append( + { + "link": link, + "img": img, + "title": title, + "author": author, + "author_link": author_link, + "channel": channel, + "channel_link": channel_link, + "views": views, + "favorites": favorites, + } + ) + + logger.debug(f"Updated global projects list with {len(app.global_ibles['/projects'])} projects") + logger.debug("Data update completed successfully") + except Exception as e: + logger.error(f"Error updating data: {str(e)}") \ No newline at end of file diff --git a/src/structables/utils/helpers.py b/src/structables/utils/helpers.py index 27f3c9b..be4c68c 100644 --- a/src/structables/utils/helpers.py +++ b/src/structables/utils/helpers.py @@ -7,31 +7,45 @@ import json import math from flask import request, render_template, abort -logging.basicConfig(level=logging.DEBUG) - +logger = logging.getLogger(__name__) def proxy(url, filename=None): - logging.debug(f"Generating proxy URL for {url}") + """Generate a proxy URL for external content. + + Args: + url (str): The original URL to proxy. + filename (str, optional): The filename to use for downloads. + + Returns: + str: The proxied URL. + """ + logger.debug(f"Generating proxy URL for {url}") return f"/proxy/?url={url}" + (f"&filename={filename}" if filename else "") - def get_typesense_api_key(): - logging.debug("Getting Typesense API key...") + """Extract the Typesense API key from Instructables.com. + + Returns: + str: The Typesense API key. + """ + logger.debug("Getting Typesense API key...") - data = urlopen("https://www.instructables.com/") - soup = BeautifulSoup(data.read().decode(), "html.parser") - scripts = soup.select("script") + try: + data = urlopen("https://www.instructables.com/") + soup = BeautifulSoup(data.read().decode(), "html.parser") + scripts = soup.select("script") - for script in scripts: - if "typesense" in script.text and ( - matches := re.search(r'"typesenseApiKey":\s?"(.*?)"', script.text) - ): - api_key = matches.group(1) - logging.debug(f"Identified Typesense API key as {api_key}") - return api_key - - logging.error("Failed to get Typesense API key") + for script in scripts: + if "typesense" in script.text and ( + matches := re.search(r'"typesenseApiKey":\s?"(.*?)"', script.text) + ): + api_key = matches.group(1) + logger.debug(f"Identified Typesense API key: {api_key[:5]}...") + return api_key + logger.error("Failed to get Typesense API key") + except Exception as e: + logger.error(f"Error getting Typesense API key: {str(e)}") def unslugify(slug): """Return a list of possible original titles for a slug. @@ -42,6 +56,7 @@ def unslugify(slug): Returns: List[str]: A list of possible original titles for the slug. """ + logger.debug(f"Unslugifying: {slug}") results = [] results.append(slug.replace("-", " ").title()) @@ -49,10 +64,21 @@ def unslugify(slug): if "and" in slug: results.append(results[0].replace("And", "&").title()) + logger.debug(f"Unslugify results: {results}") return results - def get_pagination(request, total, per_page=1): + """Generate pagination links. + + Args: + request: The Flask request object. + total (int): The total number of items. + per_page (int): The number of items per page. + + Returns: + list: A list of pagination link dictionaries. + """ + logger.debug(f"Generating pagination for {total} items, {per_page} per page") pagination = [] args = request.args.copy() @@ -61,6 +87,7 @@ def get_pagination(request, total, per_page=1): query_string = urlencode(args) total_pages = int(total / per_page) + logger.debug(f"Total pages: {total_pages}, current page: {current}") if query_string: query_string = "&" + query_string @@ -105,126 +132,183 @@ def get_pagination(request, total, per_page=1): } ) + logger.debug(f"Generated {len(pagination)} pagination links") return pagination - def member_header(header): - avatar = proxy( - header.select("div.profile-avatar-container img.profile-avatar")[0].get("src") - ) - title = header.select("div.profile-top div.profile-headline h1.profile-title")[ - 0 - ].text + """Extract member profile header information. + + Args: + header: The BeautifulSoup header element. + + Returns: + dict: The member header information. + """ + logger.debug("Parsing member header") + + try: + avatar = proxy( + header.select("div.profile-avatar-container img.profile-avatar")[0].get("src") + ) + title = header.select("div.profile-top div.profile-headline h1.profile-title")[ + 0 + ].text - location = header.select("span.member-location") - if location != []: - location = location[0].text - else: - location = 0 + location = header.select("span.member-location") + if location != []: + location = location[0].text + else: + location = 0 - signup = header.select("span.member-signup-date") - if signup != []: - signup = signup[0].text - else: - signup = 0 + signup = header.select("span.member-signup-date") + if signup != []: + signup = signup[0].text + else: + signup = 0 - instructables = header.select("span.ible-count") - if instructables != []: - instructables = instructables[0].text - else: - instructables = 0 + instructables = header.select("span.ible-count") + if instructables != []: + instructables = instructables[0].text + else: + instructables = 0 - views = header.select("span.total-views") - if views != []: - views = views[0].text - else: - views = 0 + views = header.select("span.total-views") + if views != []: + views = views[0].text + else: + views = 0 - comments = header.select("span.total-comments") - if comments != []: - comments = comments[0].text - else: - comments = 0 + comments = header.select("span.total-comments") + if comments != []: + comments = comments[0].text + else: + comments = 0 - followers = header.select("span.follower-count") - if followers != []: - followers = followers[0].text - else: - followers = 0 + followers = header.select("span.follower-count") + if followers != []: + followers = followers[0].text + else: + followers = 0 - bio = header.select("span.member-bio") - if bio != []: - bio = bio[0].text - else: - bio = "" - - return { - "avatar": avatar, - "title": title, - "location": location, - "signup": signup, - "instructables": instructables, - "views": views, - "comments": comments, - "followers": followers, - "bio": bio, - } + bio = header.select("span.member-bio") + if bio != []: + bio = bio[0].text + else: + bio = "" + logger.debug(f"Parsed member header for {title}") + + return { + "avatar": avatar, + "title": title, + "location": location, + "signup": signup, + "instructables": instructables, + "views": views, + "comments": comments, + "followers": followers, + "bio": bio, + } + except Exception as e: + logger.error(f"Error parsing member header: {str(e)}") + # Return a minimal header to avoid breaking the template + return { + "avatar": "", + "title": "Unknown User", + "location": "", + "signup": "", + "instructables": 0, + "views": 0, + "comments": 0, + "followers": 0, + "bio": "", + } def explore_lists(soup): + """Parse the explore lists from the homepage. + + Args: + soup: The BeautifulSoup element containing the list. + + Returns: + list: A list of dictionaries with project information. + """ + logger.debug("Parsing explore list") list_ = [] - for ible in soup.select(".home-content-explore-ible"): - link = ible.a["href"] - img = proxy(ible.select("a img")[0].get("data-src")) - alt = ible.select("a img")[0].get("alt") - title = ible.select("div strong a")[0].text - author = ible.select("div span.ible-author a")[0].text - author_link = ible.select("div span.ible-author a")[0].get("href") - channel = ible.select("div span.ible-channel a")[0].text - channel_link = ible.select("div span.ible-channel a")[0].get("href") - views = 0 - if ible.select("span.ible-views") != []: - views = ible.select("span.ible-views")[0].text - favorites = 0 - if ible.select("span.ible-favorites") != []: - favorites = ible.select("span.ible-favorites")[0].text - list_.append( - { - "link": link, - "img": img, - "alt": alt, - "title": title, - "author": author, - "author_link": author_link, - "channel": channel, - "channel_link": channel_link, - "favorites": favorites, - "views": views, - } - ) + try: + for ible in soup.select(".home-content-explore-ible"): + link = ible.a["href"] + img = proxy(ible.select("a img")[0].get("data-src")) + alt = ible.select("a img")[0].get("alt") + title = ible.select("div strong a")[0].text + author = ible.select("div span.ible-author a")[0].text + author_link = ible.select("div span.ible-author a")[0].get("href") + channel = ible.select("div span.ible-channel a")[0].text + channel_link = ible.select("div span.ible-channel a")[0].get("href") + views = 0 + if ible.select("span.ible-views") != []: + views = ible.select("span.ible-views")[0].text + favorites = 0 + if ible.select("span.ible-favorites") != []: + favorites = ible.select("span.ible-favorites")[0].text + list_.append( + { + "link": link, + "img": img, + "alt": alt, + "title": title, + "author": author, + "author_link": author_link, + "channel": channel, + "channel_link": channel_link, + "favorites": favorites, + "views": views, + } + ) + logger.debug(f"Found {len(list_)} items in explore list") + except Exception as e: + logger.error(f"Error parsing explore list: {str(e)}") + return list_ - def project_list(app, head, sort="", per_page=20): + """Generate a list of projects for display. + + Args: + app: The Flask app instance. + head (str): The header title. + sort (str, optional): Sort description. + per_page (int, optional): Number of items per page. + + Returns: + Response: The rendered template. + """ head = f"{head + ' ' if head != '' else ''}Projects" + sort path = urlparse(request.path).path + logger.debug(f"Generating project list for {path} with title '{head}'") page = request.args.get("page", 1, type=int) + logger.debug(f"Page: {page}, per_page: {per_page}") if path in ("/projects/", "/projects"): + logger.debug("Using global projects list") ibles = app.global_ibles["/projects"] total = len(ibles) else: if "projects" in path.split("/"): + logger.debug("Fetching projects for category/channel") ibles = [] parts = path.split("/") category = parts[1] channel = "" if parts[2] == "projects" else parts[2] + + logger.debug(f"Category: {category}, Channel: {channel}") channel_names = unslugify(channel) for channel_name in channel_names: + logger.debug(f"Trying channel name: {channel_name}") project_ibles, total = projects_search( app, category=category, @@ -234,13 +318,16 @@ def project_list(app, head, sort="", per_page=20): ) if project_ibles: + logger.debug(f"Found {len(project_ibles)} projects for {channel_name}") break elif "search" in path.split("/"): + logger.debug("Processing search request") ibles = [] query = ( request.args.get("q") if request.method == "GET" else request.form["q"] ) + logger.debug(f"Search query: {query}") project_ibles, total = projects_search( app, @@ -250,23 +337,25 @@ def project_list(app, head, sort="", per_page=20): page=page, query_by="title,screenName", ) + logger.debug(f"Found {len(project_ibles)} search results") else: + logger.warning(f"Invalid path: {path}") abort(404) for ible in project_ibles: link = f"/{ible['document']['urlString']}" - img = proxy(ible["document"]["coverImageUrl"]) + img = proxy(ible['document']['coverImageUrl']) - title = ible["document"]["title"] - author = ible["document"]["screenName"] + title = ible['document']['title'] + author = ible['document']['screenName'] author_link = f"/member/{author}" - channel = ible["document"]["primaryClassification"] + channel = ible['document']['primaryClassification'] channel_link = f"/channel/{channel}" - views = ible["document"]["views"] - favorites = ible["document"]["favorites"] + views = ible['document']['views'] + favorites = ible['document']['favorites'] ibles.append( { @@ -281,54 +370,76 @@ def project_list(app, head, sort="", per_page=20): "favorites": favorites, } ) + + logger.debug(f"Processed {len(ibles)} projects for display") + pagination = get_pagination(request, total, per_page) + logger.debug(f"Rendering project list template with {len(ibles)} projects") + return render_template( "projects.html", title=unslugify(head)[0], ibles=ibles, path=path, - pagination=get_pagination(request, total, per_page), + pagination=pagination, ) - def category_page(app, name, teachers=False): + """Generate a category page. + + Args: + app: The Flask app instance. + name (str): The category name. + teachers (bool, optional): Whether this is the teachers category. + + Returns: + Response: The rendered template. + """ + logger.debug(f"Generating category page for {name} (teachers={teachers})") path = urlparse(request.path).path page = request.args.get("page", 1, type=int) ibles = [] - channels = [] contests = [] + # Get channels for this category for channel in app.global_ibles["/projects"]: if ( channel["channel"].startswith(name.lower()) and channel["channel"] not in channels ): channels.append(channel["channel"]) + + logger.debug(f"Found {len(channels)} channels for category {name}") + # Get featured projects if teachers: + logger.debug("Fetching teachers projects") category_ibles, total = projects_search( app, teachers=True, page=page, filter_by="featureFlag:=true" ) else: + logger.debug(f"Fetching featured projects for category {name}") category_ibles, total = projects_search( app, category=name, page=page, filter_by="featureFlag:=true" ) + + logger.debug(f"Found {len(category_ibles)} featured projects") for ible in category_ibles: link = f"/{ible['document']['urlString']}" - img = proxy(ible["document"]["coverImageUrl"]) + img = proxy(ible['document']['coverImageUrl']) - title = ible["document"]["title"] - author = ible["document"]["screenName"] + title = ible['document']['title'] + author = ible['document']['screenName'] author_link = f"/member/{author}" - channel = ible["document"]["primaryClassification"] + channel = ible['document']['primaryClassification'] channel_link = f"/channel/{channel}" - views = ible["document"]["views"] - favorites = ible["document"]["favorites"] + views = ible['document']['views'] + favorites = ible['document']['favorites'] ibles.append( { @@ -344,6 +455,7 @@ def category_page(app, name, teachers=False): } ) + logger.debug(f"Rendering category page template with {len(ibles)} projects") return render_template( "category.html", title=name, @@ -353,7 +465,6 @@ def category_page(app, name, teachers=False): path=path, ) - def projects_search( app, query="*", @@ -368,6 +479,26 @@ def projects_search( timeout=5, typesense_api_key=None, ): + """Search for projects using the Typesense API. + + Args: + app: The Flask app instance. + query (str, optional): The search query. + category (str, optional): The category to filter by. + teachers (bool, optional): Whether to filter for teacher projects. + channel (str, optional): The channel to filter by. + filter_by (str, optional): Additional filter criteria. + page (int, optional): The page number. + per_page (int, optional): The number of results per page. + query_by (str, optional): The fields to query. + sort_by (str, optional): The sort order. + timeout (int, optional): The request timeout. + typesense_api_key (str, optional): The Typesense API key. + + Returns: + tuple: A tuple of (projects, total_pages). + """ + # Build filter string if category: if filter_by: filter_by += " && " @@ -386,9 +517,7 @@ def projects_search( query = quote(query) filter_by = quote(filter_by) - logging.debug( - f"Searching projects with query {query} and filter {filter_by}, page {page}" - ) + logger.debug(f"Searching projects: query='{query}', filter='{filter_by}', page={page}, per_page={per_page}") projects_headers = {"x-typesense-api-key": app.typesense_api_key} @@ -404,60 +533,19 @@ def projects_search( args_str = "&".join([f"{key}={value}" for key, value in request_args.items()]) - projects_request = Request( - f"https://www.instructables.com/api_proxy/search/collections/projects/documents/search?{args_str}", - headers=projects_headers, - ) - - projects_data = urlopen(projects_request, timeout=timeout) - project_obj = json.loads(projects_data.read().decode()) - project_ibles = project_obj["hits"] - - logging.debug(f"Got {len(project_ibles)} projects") - - return project_ibles, math.ceil(project_obj["found"] / per_page) - - -def update_data(app): - logging.debug("Updating data...") - - channels = [] - - sitemap_data = urlopen("https://www.instructables.com/sitemap/") - sitemap_soup = BeautifulSoup(sitemap_data.read().decode(), "html.parser") - main = sitemap_soup.select("div.sitemap-content")[0] - - for group in main.select("div.group-section"): - channels.append(group.select("h2 a")[0].text.lower()) - - app.global_ibles["/projects"] = [] - project_ibles, total = projects_search(app, filter_by="featureFlag:=true") - - while len(app.global_ibles["/projects"]) <= 0: - for ible in project_ibles: - link = f"/{ible['document']['urlString']}" - img = proxy(ible["document"]["coverImageUrl"]) - - title = ible["document"]["title"] - author = ible["document"]["screenName"] - author_link = f"/member/{author}" - - channel = ible["document"]["primaryClassification"] - channel_link = f"/channel/{channel}" - - views = ible["document"]["views"] - favorites = ible["document"]["favorites"] - - app.global_ibles["/projects"].append( - { - "link": link, - "img": img, - "title": title, - "author": author, - "author_link": author_link, - "channel": channel, - "channel_link": channel_link, - "views": views, - "favorites": favorites, - } - ) + url = f"https://www.instructables.com/api_proxy/search/collections/projects/documents/search?{args_str}" + logger.debug(f"Making request to {url}") + + try: + projects_request = Request(url, headers=projects_headers) + projects_data = urlopen(projects_request, timeout=timeout) + project_obj = json.loads(projects_data.read().decode()) + project_ibles = project_obj["hits"] + total_found = project_obj["found"] + + logger.debug(f"Search returned {len(project_ibles)} projects out of {total_found} total matches") + + return project_ibles, math.ceil(total_found / per_page) + except Exception as e: + logger.error(f"Error searching projects: {str(e)}") + return [], 0 \ No newline at end of file