Enhanced environment configuration and content fetching

Extended environment variables to allow for greater configurability and aligned debug mode detection with the new STRUCTABLES_DEBUG variable. Additionally, introduced command-line arguments for specifying Invidious instance URLs and unsafe iframe display settings.

The content fetching logic has been revised for fetching JSON data directly instead of scraping HTML, resulting in a more robust and efficient data extraction process. New templates accommodate the change and present a cleaner UI, including handling of iframes, downloads, and proper proxying of external resources.

HTML templates have been refactored to utilize better layout and styling while enhancing support for iframes and downloads, complete with the new ability to block iframe content from outside sources unless explicitly permitted. This security-focused feature protects end-users from potentially unsafe content.
This commit is contained in:
Kumi 2024-01-31 12:50:19 +01:00
parent 1a4e0360a2
commit 9dd8f4e2e0
Signed by: kumi
GPG key ID: ECBCC9082395383F
4 changed files with 227 additions and 113 deletions

212
main.py
View file

@ -141,7 +141,9 @@ def update_data():
)
debugmode = os.environ.get("FLASK_DEBUG", False)
debugmode = os.environ.get("FLASK_DEBUG", os.environ.get("STRUCTABLES_DEBUG", False))
invidious = os.environ.get("STRUCTABLES_INVIDIOUS")
unsafe = os.environ.get("STRUCTABLES_UNSAFE", False)
if __name__ == "__main__":
parser = ArgumentParser()
@ -164,12 +166,29 @@ if __name__ == "__main__":
default="127.0.0.1",
help="Host to listen on",
)
parser.add_argument(
"-I",
"--invidious",
help="URL to Invidious instance, e.g. https://invidious.private.coffee/",
)
parser.add_argument(
"-u",
"--unsafe",
action="store_true",
help="Display iframes regardless of origin",
)
args = parser.parse_args()
if args.debug:
debugmode = True
print("Loading...")
if args.invidious:
invidious = args.invidious
if args.unsafe:
unsafe = True
print("Loading initial data...")
update_data()
@ -833,55 +852,93 @@ def route_member(member):
@app.route("/<article>/")
def route_article(article):
try:
data = urlopen(f"https://www.instructables.com/{article}/")
data = urlopen(
f"https://www.instructables.com/json-api/showInstructableModel?urlString={article}"
)
data = json.loads(data.read().decode())
except HTTPError as e:
abort(e.code)
soup = BeautifulSoup(data.read().decode(), "html.parser")
try:
header = soup.select("header")
if len(header) < 2 and soup.select("title")[0].text.contains("Pending Review"):
return render_template("article-review.html", title="Pending Review")
else:
header = header[1]
title = header.find("h1").text
title = data["title"]
author = data["author"]["screenName"]
author_link = f"/member/{author}"
category = data["classifications"][0]["title"]
category_slug = data["classifications"][0]["name"]
category_link = f"/{category_slug}/"
channel = data["classifications"][0]["channels"][0]["title"]
channel_slug = data["classifications"][0]["channels"][0]["name"]
channel_link = f"/{category_slug}/{channel_slug}/"
byline = header.select("div.sub-header div.header-byline")[0]
author = byline.select("a")[0].text
author_link = byline.select("a")[0].get("href")
category = byline.select("a")[1].text
category_link = byline.select("a")[1].get("href")
channel = byline.select("a")[2].text
channel_link = byline.select("a")[2].get("href")
stats = header.select("div.sub-header div.header-stats")[0]
views = stats.select(".view-count")[0].text
favorites = 0
if stats.select(".favorite-count") != []:
favorites = stats.select(".favorite-count")[0].text
if soup.select("div.article-body") != []:
## Instructables
body = soup.select("div.article-body")[0]
views = data["views"]
favorites = data["favorites"]
if "steps" in data:
steps = []
for step in body.select("section.step"):
print(step)
step_title = step.select("h2")[0].text
step_imgs = []
# TODO: Handle download links
for img in step.select("img"):
step_imgs.append(
{"src": proxy(img.get("src")), "alt": img.get("alt")}
if "supplies" in data:
supplies = data["supplies"]
supplies_files = []
if "suppliesFiles" in data:
supplies_files = data["suppliesFiles"]
data["steps"].insert(
1, {"title": "Supplies", "body": supplies, "files": supplies_files}
)
step_videos = []
for img in step.select("video"):
step_videos.append([proxy(img.get("src"))])
for step in data["steps"]:
step_title = step["title"]
print(step_title)
step_text = str(step.select("div.step-body")[0])
step_imgs = []
step_videos = [] # TODO: Check if this is still required
step_iframes = []
step_downloads = []
for file in step["files"]:
print(file)
if file["image"] and not "embedType" in file:
step_imgs.append(
{"src": proxy(file["downloadUrl"]), "alt": file["name"]}
)
elif not file["image"]:
step_downloads.append(
{"src": proxy(file["downloadUrl"]), "name": file["name"]}
)
else: # Leaves us with embeds
embed_code = file["embedHtmlCode"]
soup = BeautifulSoup(embed_code, "html.parser")
iframe = soup.select("iframe")[0]
src = iframe.get("src")
if src.startswith("https://content.instructables.com"):
src = src.replace(
"https://content.instructables.com",
f"/proxy/?url={src}",
)
elif invidious and src.startswith("https://www.youtube.com"):
src = src.replace("https://www.youtube.com", invidious)
elif not unsafe:
src = "/iframe/?url=" + quote(src)
step_iframes.append(
{
"src": src,
"width": file.get("width"),
"height": file.get("height"),
}
)
step_text = step["body"]
step_text = step_text.replace(
"https://content.instructables.com",
"/proxy/?url=https://content.instructables.com",
@ -892,6 +949,8 @@ def route_article(article):
"imgs": step_imgs,
"text": step_text,
"videos": step_videos,
"iframes": step_iframes,
"downloads": step_downloads,
}
)
@ -955,45 +1014,17 @@ def route_article(article):
else:
## Collections
thumbnails = []
for thumbnail in soup.select("ul#thumbnails-list li"):
text = (
link
) = (
img
) = (
thumbnail_title
) = (
thumbnail_author
) = (
thumbnail_author_link
) = thumbnail_channel = thumbnail_channel_link = ""
for thumbnail in data["instructables"]:
text = thumbnail["title"]
link = thumbnail["showUrl"]
img = proxy(thumbnail["downloadUrl"])
thumbnail_title = thumbnail["title"]
thumbnail_author = thumbnail["author"]["screenName"]
thumbnail_author_link = f"/member/{thumbnail_author}"
thumbnail_channel = thumbnail["classifications"][0]["channels"][0]["title"]
thumbnail_category = thumbnail["classifications"][0]["title"]
thumbnail_channel_link = f"/{thumbnail_category}/{thumbnail_channel}"
if thumbnail.select("div.thumbnail > p") != []:
text = thumbnail.select("div.thumbnail > p")[0]
if thumbnail.select("div.thumbnail div.thumbnail-image"):
link = thumbnail.select("div.thumbnail div.thumbnail-image a")[
0
].get("href")
img = proxy(
thumbnail.select("div.thumbnail div.thumbnail-image a img")[
0
].get("src")
)
thumbnail_title = thumbnail.select(
"div.thumbnail div.thumbnail-info h3.title a"
)[0].text
thumbnail_author = thumbnail.select(
"div.thumbnail div.thumbnail-info span.author a"
)[0].text
thumbnail_author_link = thumbnail.select(
"div.thumbnail div.thumbnail-info span.author a"
)[0].get("href")
thumbnail_channel = thumbnail.select(
"div.thumbnail div.thumbnail-info span.origin a"
)[0].text
thumbnail_channel_link = thumbnail.select(
"div.thumbnail div.thumbnail-info span.origin a"
)[0].get("href")
thumbnails.append(
{
"text": text,
@ -1007,6 +1038,8 @@ def route_article(article):
}
)
print(thumbnails[-1])
return render_template(
"collection.html",
title=title,
@ -1097,13 +1130,34 @@ def route_proxy():
except HTTPError as e:
abort(e.code)
return Response(data.read(), content_type=data.headers["content-type"])
content_disposition = data.headers.get("content-disposition")
headers = {}
if content_disposition:
headers["Content-Disposition"] = content_disposition
return Response(
data.read(),
headers=headers,
content_type=data.headers["content-type"],
)
else:
raise BadRequest()
else:
raise BadRequest()
@app.route("/iframe/")
def route_iframe():
url = request.args.get("url")
url = unquote(url)
if url != None:
return render_template("iframe.html", url=url)
else:
raise BadRequest()
@app.route("/privacypolicy/")
def privacypolicy():
content = "No privacy policy found."

View file

@ -14,21 +14,63 @@
<div class="container">
{% for step in steps %}
<div class="row wrap">
<div class="row mb-6">
<div class="col-12">
<h2>{{ step.title }}</h2>
<div class="step-imgs">
</div>
</div>
{% if step.imgs %}
<div class="row mb-3">
{% for step_img in step.imgs %}
<img src="{{ step_img.src }}" alt="{{ step_img.alt }}" />
<div class="col-md-3">
<img src="{{ step_img.src }}" alt="{{ step_img.alt }}" class="img-fluid" />
</div>
{% endfor %}
</div>
<div class="step-vids">
{% endif %}
{% if step.videos %}
<div class="row mb-3">
{% for step_video in step.videos %}
<video src="{{ step_video }}"></video>
<div class="col-md-3">
<video src="{{ step_video }}" controls class="w-100"></video>
</div>
{% endfor %}
</div>
{% endif %}
{% if step.iframes %}
<div class="row mb-3">
{% for step_iframe in step.iframes %}
<div class="col-md-3 mb-3">
<iframe src="{{ step_iframe.src }}" width="100%" height="{{ step_iframe.height }}"></iframe>
</div>
{% endfor %}
</div>
{% endif %}
{% if step.downloads %}
<div class="row">
<div class="col-12">
<h3>Downloads</h3>
</div>
{% for step_download in step.downloads %}
<div class="col-md-2 mb-3">
<a href="{{ step_download.src }}" class="btn btn-primary">{{ step_download.name }}</a>
</div>
{% endfor %}
</div>
{% endif %}
<div class="row">
<div class="col-12 mb-3">
{{ step.text|safe }}
</div>
</div>
{% endfor %}
</div>
<br />
{% for index, comment in enumerate(comments) %}

View file

@ -1,28 +1,34 @@
{% extends "base.html" %}
{% block content %}
<center>
{% extends "base.html" %} {% block content %}
<div class="container text-center">
<h1>{{ title }}</h1>
<p>by <a href="{{ author_link }}">{{ author }}</a> in <a href="{{ category_link }}">{{ category }}</a> &gt; <a
href="{{ channel_link }}">{{ channel }}</a></p>
<p>
by <a href="{{ author_link }}">{{ author }}</a> in
<a href="{{ category_link }}">{{ category }}</a> &gt;
<a href="{{ channel_link }}">{{ channel }}</a>
</p>
<p>{{ views }} Views, {{ favorites }} Favorites</p>
<div style="max-width:90%;">
<div class="row justify-content-center">
{% for thumbnail in thumbnails %}
<div class="col-md-4 mb-3">
<div class="ible-list-item">
{% if thumbnail.title == '' %}
<a href="{{ thumbnail.link }}" style="color:#bbc2cf;">
<img style="max-width:350px;" src="{{ thumbnail.img }}" alt="{{ thumbnail.author }}">
<a href="{{ thumbnail.link }}" style="color: #bbc2cf">
<img
class="img-fluid"
src="{{ thumbnail.img }}"
alt="{{ thumbnail.title }}"
style="max-width: 350px"
/>
<p>{{ thumbnail.author }}</p>
</a>
<p>by <a href="{{ thumbnail.author_link }}">{{ thumbnail.author }}</a> in <a href="{{ thumbnail.channel_link }}">{{
thumbnail.channel }}</a> </p>
{% else %}
{{ thumbnail.text|safe }}
{% endif %}
<p>
by <a href="{{ thumbnail.author_link }}">{{ thumbnail.author }}</a> in
<a href="{{ thumbnail.channel_link }}">{{ thumbnail.channel }}</a>
</p>
</div>
</div>
{% endfor %}
</div>
</center>
</div>
{% endblock %}

12
templates/iframe.html Normal file
View file

@ -0,0 +1,12 @@
<html>
<head>
<title>iframe content</title>
</head>
<body>
<h1>Blocked iframe</h1>
<p>This page contains content from outside Instructables.com. This was blocked for your safety.</p>
<p>It tries to load the following URL:</p>
<p><a href="{{ url | safe }}" target="_self">{{ url | safe }}</a></p>
<p>Click <a href="{{ url | safe }}" target="_self">here</a> to load the content.</p>
</body>
</html>