From 9dd8f4e2e071ab6d98555617d449dbb6466c623a Mon Sep 17 00:00:00 2001 From: Kumi Date: Wed, 31 Jan 2024 12:50:19 +0100 Subject: [PATCH] Enhanced environment configuration and content fetching Extended environment variables to allow for greater configurability and aligned debug mode detection with the new STRUCTABLES_DEBUG variable. Additionally, introduced command-line arguments for specifying Invidious instance URLs and unsafe iframe display settings. The content fetching logic has been revised for fetching JSON data directly instead of scraping HTML, resulting in a more robust and efficient data extraction process. New templates accommodate the change and present a cleaner UI, including handling of iframes, downloads, and proper proxying of external resources. HTML templates have been refactored to utilize better layout and styling while enhancing support for iframes and downloads, complete with the new ability to block iframe content from outside sources unless explicitly permitted. This security-focused feature protects end-users from potentially unsafe content. --- main.py | 212 ++++++++++++++++++++++++-------------- templates/article.html | 58 +++++++++-- templates/collection.html | 58 ++++++----- templates/iframe.html | 12 +++ 4 files changed, 227 insertions(+), 113 deletions(-) create mode 100644 templates/iframe.html diff --git a/main.py b/main.py index d904eb9..fe4055c 100644 --- a/main.py +++ b/main.py @@ -141,7 +141,9 @@ def update_data(): ) -debugmode = os.environ.get("FLASK_DEBUG", False) +debugmode = os.environ.get("FLASK_DEBUG", os.environ.get("STRUCTABLES_DEBUG", False)) +invidious = os.environ.get("STRUCTABLES_INVIDIOUS") +unsafe = os.environ.get("STRUCTABLES_UNSAFE", False) if __name__ == "__main__": parser = ArgumentParser() @@ -164,12 +166,29 @@ if __name__ == "__main__": default="127.0.0.1", help="Host to listen on", ) + parser.add_argument( + "-I", + "--invidious", + help="URL to Invidious instance, e.g. https://invidious.private.coffee/", + ) + parser.add_argument( + "-u", + "--unsafe", + action="store_true", + help="Display iframes regardless of origin", + ) args = parser.parse_args() if args.debug: debugmode = True -print("Loading...") + if args.invidious: + invidious = args.invidious + + if args.unsafe: + unsafe = True + +print("Loading initial data...") update_data() @@ -833,55 +852,93 @@ def route_member(member): @app.route("/
/") def route_article(article): try: - data = urlopen(f"https://www.instructables.com/{article}/") + data = urlopen( + f"https://www.instructables.com/json-api/showInstructableModel?urlString={article}" + ) + data = json.loads(data.read().decode()) except HTTPError as e: abort(e.code) - soup = BeautifulSoup(data.read().decode(), "html.parser") - try: - header = soup.select("header") - if len(header) < 2 and soup.select("title")[0].text.contains("Pending Review"): - return render_template("article-review.html", title="Pending Review") - else: - header = header[1] - title = header.find("h1").text + title = data["title"] + author = data["author"]["screenName"] + author_link = f"/member/{author}" + category = data["classifications"][0]["title"] + category_slug = data["classifications"][0]["name"] + category_link = f"/{category_slug}/" + channel = data["classifications"][0]["channels"][0]["title"] + channel_slug = data["classifications"][0]["channels"][0]["name"] + channel_link = f"/{category_slug}/{channel_slug}/" - byline = header.select("div.sub-header div.header-byline")[0] - author = byline.select("a")[0].text - author_link = byline.select("a")[0].get("href") - category = byline.select("a")[1].text - category_link = byline.select("a")[1].get("href") - channel = byline.select("a")[2].text - channel_link = byline.select("a")[2].get("href") - - stats = header.select("div.sub-header div.header-stats")[0] - views = stats.select(".view-count")[0].text - favorites = 0 - if stats.select(".favorite-count") != []: - favorites = stats.select(".favorite-count")[0].text - - if soup.select("div.article-body") != []: - ## Instructables - body = soup.select("div.article-body")[0] + views = data["views"] + favorites = data["favorites"] + if "steps" in data: steps = [] - for step in body.select("section.step"): - print(step) - step_title = step.select("h2")[0].text + + if "supplies" in data: + supplies = data["supplies"] + + supplies_files = [] + + if "suppliesFiles" in data: + supplies_files = data["suppliesFiles"] + + data["steps"].insert( + 1, {"title": "Supplies", "body": supplies, "files": supplies_files} + ) + + for step in data["steps"]: + step_title = step["title"] + print(step_title) step_imgs = [] - # TODO: Handle download links - for img in step.select("img"): - step_imgs.append( - {"src": proxy(img.get("src")), "alt": img.get("alt")} - ) + step_videos = [] # TODO: Check if this is still required + step_iframes = [] + step_downloads = [] - step_videos = [] - for img in step.select("video"): - step_videos.append([proxy(img.get("src"))]) + for file in step["files"]: + print(file) + if file["image"] and not "embedType" in file: + step_imgs.append( + {"src": proxy(file["downloadUrl"]), "alt": file["name"]} + ) - step_text = str(step.select("div.step-body")[0]) + elif not file["image"]: + step_downloads.append( + {"src": proxy(file["downloadUrl"]), "name": file["name"]} + ) + + else: # Leaves us with embeds + embed_code = file["embedHtmlCode"] + + soup = BeautifulSoup(embed_code, "html.parser") + + iframe = soup.select("iframe")[0] + + src = iframe.get("src") + + if src.startswith("https://content.instructables.com"): + src = src.replace( + "https://content.instructables.com", + f"/proxy/?url={src}", + ) + + elif invidious and src.startswith("https://www.youtube.com"): + src = src.replace("https://www.youtube.com", invidious) + + elif not unsafe: + src = "/iframe/?url=" + quote(src) + + step_iframes.append( + { + "src": src, + "width": file.get("width"), + "height": file.get("height"), + } + ) + + step_text = step["body"] step_text = step_text.replace( "https://content.instructables.com", "/proxy/?url=https://content.instructables.com", @@ -892,6 +949,8 @@ def route_article(article): "imgs": step_imgs, "text": step_text, "videos": step_videos, + "iframes": step_iframes, + "downloads": step_downloads, } ) @@ -955,45 +1014,17 @@ def route_article(article): else: ## Collections thumbnails = [] - for thumbnail in soup.select("ul#thumbnails-list li"): - text = ( - link - ) = ( - img - ) = ( - thumbnail_title - ) = ( - thumbnail_author - ) = ( - thumbnail_author_link - ) = thumbnail_channel = thumbnail_channel_link = "" + for thumbnail in data["instructables"]: + text = thumbnail["title"] + link = thumbnail["showUrl"] + img = proxy(thumbnail["downloadUrl"]) + thumbnail_title = thumbnail["title"] + thumbnail_author = thumbnail["author"]["screenName"] + thumbnail_author_link = f"/member/{thumbnail_author}" + thumbnail_channel = thumbnail["classifications"][0]["channels"][0]["title"] + thumbnail_category = thumbnail["classifications"][0]["title"] + thumbnail_channel_link = f"/{thumbnail_category}/{thumbnail_channel}" - if thumbnail.select("div.thumbnail > p") != []: - text = thumbnail.select("div.thumbnail > p")[0] - if thumbnail.select("div.thumbnail div.thumbnail-image"): - link = thumbnail.select("div.thumbnail div.thumbnail-image a")[ - 0 - ].get("href") - img = proxy( - thumbnail.select("div.thumbnail div.thumbnail-image a img")[ - 0 - ].get("src") - ) - thumbnail_title = thumbnail.select( - "div.thumbnail div.thumbnail-info h3.title a" - )[0].text - thumbnail_author = thumbnail.select( - "div.thumbnail div.thumbnail-info span.author a" - )[0].text - thumbnail_author_link = thumbnail.select( - "div.thumbnail div.thumbnail-info span.author a" - )[0].get("href") - thumbnail_channel = thumbnail.select( - "div.thumbnail div.thumbnail-info span.origin a" - )[0].text - thumbnail_channel_link = thumbnail.select( - "div.thumbnail div.thumbnail-info span.origin a" - )[0].get("href") thumbnails.append( { "text": text, @@ -1007,6 +1038,8 @@ def route_article(article): } ) + print(thumbnails[-1]) + return render_template( "collection.html", title=title, @@ -1097,13 +1130,34 @@ def route_proxy(): except HTTPError as e: abort(e.code) - return Response(data.read(), content_type=data.headers["content-type"]) + content_disposition = data.headers.get("content-disposition") + + headers = {} + + if content_disposition: + headers["Content-Disposition"] = content_disposition + + return Response( + data.read(), + headers=headers, + content_type=data.headers["content-type"], + ) else: raise BadRequest() else: raise BadRequest() +@app.route("/iframe/") +def route_iframe(): + url = request.args.get("url") + url = unquote(url) + if url != None: + return render_template("iframe.html", url=url) + else: + raise BadRequest() + + @app.route("/privacypolicy/") def privacypolicy(): content = "No privacy policy found." diff --git a/templates/article.html b/templates/article.html index 6f684f4..25f1cf2 100644 --- a/templates/article.html +++ b/templates/article.html @@ -14,21 +14,63 @@
{% for step in steps %} -
-

{{ step.title }}

-
+
+
+

{{ step.title }}

+
+
+ + {% if step.imgs %} +
{% for step_img in step.imgs %} - {{ step_img.alt }} +
+ {{ step_img.alt }} +
{% endfor %}
-
+ {% endif %} + + {% if step.videos %} +
{% for step_video in step.videos %} - +
+ +
{% endfor %}
- {{ step.text|safe }} -
+ {% endif %} + + {% if step.iframes %} +
+ {% for step_iframe in step.iframes %} +
+ +
+ {% endfor %} +
+ {% endif %} + + {% if step.downloads %} +
+
+

Downloads

+
+ {% for step_download in step.downloads %} + + {% endfor %} +
+ {% endif %} + +
+
+ {{ step.text|safe }} +
+
{% endfor %} +
+
{% for index, comment in enumerate(comments) %} diff --git a/templates/collection.html b/templates/collection.html index eef3bc5..836749c 100644 --- a/templates/collection.html +++ b/templates/collection.html @@ -1,28 +1,34 @@ -{% extends "base.html" %} +{% extends "base.html" %} {% block content %} +
+

{{ title }}

+

+ by {{ author }} in + {{ category }} > + {{ channel }} +

+

{{ views }} Views, {{ favorites }} Favorites

-{% block content %} -
-

{{ title }}

+
+ {% for thumbnail in thumbnails %} + + {% endfor %} +
+
-

by {{ author }} in {{ category }} > {{ channel }}

-

{{ views }} Views, {{ favorites }} Favorites

- -
- {% for thumbnail in thumbnails %} -
- {% if thumbnail.title == '' %} - - {{ thumbnail.author }} -

{{ thumbnail.author }}

-
-

by {{ thumbnail.author }} in {{ - thumbnail.channel }}

- {% else %} - {{ thumbnail.text|safe }} - {% endif %} -
- {% endfor %} -
- -{% endblock %} \ No newline at end of file +{% endblock %} diff --git a/templates/iframe.html b/templates/iframe.html new file mode 100644 index 0000000..461dd56 --- /dev/null +++ b/templates/iframe.html @@ -0,0 +1,12 @@ + + + iframe content + + +

Blocked iframe

+

This page contains content from outside Instructables.com. This was blocked for your safety.

+

It tries to load the following URL:

+

{{ url | safe }}

+

Click here to load the content.

+ + \ No newline at end of file