#!/usr/bin/env python from flask import ( Flask, render_template, request, redirect, Response, stream_with_context, ) from urllib.parse import quote, unquote from urllib.request import Request, urlopen from urllib.error import HTTPError from traceback import print_exc from urllib.parse import urljoin, urlparse from argparse import ArgumentParser from configparser import ConfigParser from werkzeug.exceptions import BadRequest, abort, InternalServerError, NotFound from bs4 import BeautifulSoup import os import json import re import logging import pathlib logging.basicConfig(level=logging.DEBUG) global_ibles = {} def proxy(url): logging.debug(f"Generating proxy URL for {url}") return f"/proxy/?url={url}" def get_typesense_api_key(): logging.debug("Getting Typesense API key...") data = urlopen("https://www.instructables.com/") soup = BeautifulSoup(data.read().decode(), "html.parser") scripts = soup.select("script") for script in scripts: if "typesense" in script.text and ( matches := re.search(r'"typesenseApiKey":\s?"(.*?)"', script.text) ): api_key = matches.group(1) logging.debug(f"Identified Typesense API key as {api_key}") return api_key logging.error("Failed to get Typesense API key") TYPESENSE_API_KEY = get_typesense_api_key() def projects_search( query="*", category="", channel="", filter_by="featureFlag:=true", page=1, per_page=50, ): if category: if filter_by: filter_by += " && " filter_by += f"category:={category}" if channel: if filter_by: filter_by += " && " filter_by += f"channel:={channel}" query = quote(query) filter_by = quote(filter_by) logging.debug(f"Searching projects with query {query} and filter {filter_by}") projects_headers = {"x-typesense-api-key": TYPESENSE_API_KEY} projects_request = Request( f"https://www.instructables.com/api_proxy/search/collections/projects/documents/search?q={query}&query_by=title,stepBody,screenName&page={page}&sort_by=publishDate:desc&include_fields=title,urlString,coverImageUrl,screenName,favorites,views,primaryClassification,featureFlag,prizeLevel,IMadeItCount&filter_by={filter_by}&per_page={per_page}", headers=projects_headers, ) projects_data = urlopen(projects_request) project_obj = json.loads(projects_data.read().decode()) project_ibles = project_obj["hits"] logging.debug(f"Got {len(project_ibles)} projects") return project_ibles def update_data(): logging.debug("Updating data...") channels = [] sitemap_data = urlopen("https://www.instructables.com/sitemap/") sitemap_soup = BeautifulSoup(sitemap_data.read().decode(), "html.parser") main = sitemap_soup.select("div.sitemap-content")[0] groups = [] for group in main.select("div.group-section"): channels.append(group.select("h2 a")[0].text.lower()) global_ibles["/projects"] = [] project_ibles = projects_search() while len(global_ibles["/projects"]) <= 0: for ible in project_ibles: link = f"/{ible['document']['urlString']}" img = proxy(ible["document"]["coverImageUrl"]) title = ible["document"]["title"] author = ible["document"]["screenName"] author_link = f"/member/{author}" channel = ible["document"]["primaryClassification"] channel_link = f"/channel/{channel}" views = ible["document"]["views"] favorites = ible["document"]["favorites"] global_ibles["/projects"].append( { "link": link, "img": img, "title": title, "author": author, "author_link": author_link, "channel": channel, "channel_link": channel_link, "views": views, "favorites": favorites, } ) debugmode = os.environ.get("FLASK_DEBUG", False) if __name__ == "__main__": parser = ArgumentParser() parser.add_argument( "-p", "--port", default=8002, type=int, help="Port to listen on", ) parser.add_argument( "-d", "--debug", action="store_true", help="Enable debug mode", ) parser.add_argument( "-l", "--listen-host", default="127.0.0.1", help="Host to listen on", ) args = parser.parse_args() if args.debug: debugmode = True print("Loading...") update_data() print("Started!") app = Flask(__name__, template_folder="templates", static_folder="static") if debugmode: app.logger.setLevel(logging.DEBUG) @app.route("/cron/") def cron(): update_data() return "OK" def explore_lists(soup): list_ = [] for ible in soup.select(".home-content-explore-ible"): link = ible.a["href"] img = proxy(ible.select("a img")[0].get("data-src")) alt = ible.select("a img")[0].get("alt") title = ible.select("div strong a")[0].text author = ible.select("div span.ible-author a")[0].text author_link = ible.select("div span.ible-author a")[0].get("href") channel = ible.select("div span.ible-channel a")[0].text channel_link = ible.select("div span.ible-channel a")[0].get("href") views = 0 if ible.select("span.ible-views") != []: views = ible.select("span.ible-views")[0].text favorites = 0 if ible.select("span.ible-favorites") != []: favorites = ible.select("span.ible-favorites")[0].text list_.append( { "link": link, "img": img, "alt": alt, "title": title, "author": author, "author_link": author_link, "channel": channel, "channel_link": channel_link, "favorites": favorites, "views": views, } ) return list_ def member_header(header): avatar = proxy( header.select("div.profile-avatar-container img.profile-avatar")[0].get("src") ) title = header.select("div.profile-top div.profile-headline h1.profile-title")[ 0 ].text profile_top = header.select("div.profile-top")[0] # stats_text = profile_top.select("div.profile-header-stats")[0] # stats_num = header.select("div.profile-top div.profile-header-stats")[1] location = header.select("span.member-location") if location != []: location = location[0].text else: location = 0 signup = header.select("span.member-signup-date") if signup != []: signup = signup[0].text else: signup = 0 instructables = header.select("span.ible-count") if instructables != []: instructables = instructables[0].text else: instructables = 0 views = header.select("span.total-views") if views != []: views = views[0].text else: views = 0 comments = header.select("span.total-comments") if comments != []: comments = comments[0].text else: comments = 0 followers = header.select("span.follower-count") if followers != []: followers = followers[0].text else: followers = 0 bio = header.select("span.member-bio") if bio != []: bio = bio[0].text else: bio = "" return { "avatar": avatar, "title": title, "location": location, "signup": signup, "instructables": instructables, "views": views, "comments": comments, "followers": followers, "bio": bio, } def category_page(path, name, teachers=False): # TODO: Figure out why this doesn't work - probably using the search function would help... try: data = urlopen("https://www.instructables.com" + path) except HTTPError as e: abort(e.code) soup = BeautifulSoup(data.read().decode(), "html.parser") channels = [] for card in soup.select("div.scrollable-cards-inner div.scrollable-card"): link = card.a["href"] img = proxy( card.select(f"a{' noscript' if teachers else ''} img")[0].get("src") ) title = card.select("a img")[0].get("alt") channels.append({"link": link, "title": title, "img": img}) ibles = [] for ible in soup.select( "div.category-landing-projects-list div.category-landing-projects-ible" ): link = ible.a["href"] img = proxy(ible.select("a noscript img")[0].get("src")) info = ible.select("div.category-landing-projects-ible-info")[0] title = info.select("a.ible-title")[0].text author = info.select("span.ible-author a")[0].text author_link = info.select("span.ible-author a")[0].get("href") channel = info.select("span.ible-channel a")[0].text channel_link = info.select("span.ible-channel a")[0].get("href") stats = ible.select("span.ible-stats-right-col")[0] views = 0 if stats.select("span.ible-views") != []: views = stats.select("span.ible-views")[0].text favorites = 0 if stats.select("span.ible-favorites") != []: favorites = stats.select("span.ible-favorites")[0].text ibles.append( { "link": link, "img": img, "title": title, "author": author, "author_link": author_link, "channel": channel, "channel_link": channel_link, "views": views, "favorites": favorites, } ) contests = [] for contest in soup.select( "div.category-landing-contests-list div.category-landing-contests-item" ): link = contest.a["href"] img = proxy(contest.select("a noscript img")[0].get("src")) title = contest.select("a img")[0].get("alt") contests.append({"link": link, "img": img, "title": title}) return render_template( "category.html", name=name, channels=channels, ibles=ibles, contests=contests, path=path, ) def project_list(path, head, sort=""): head = f"{head + ' ' if head != '' else ''}Projects" + sort path = urlparse(path).path if path in ("/projects/", "/projects"): ibles = global_ibles["/projects"] else: if not "projects" in path.split("/"): abort(404) ibles = [] parts = path.split("/") category = parts[1] channel = "" if parts[2] == "projects" else parts[2] # TODO: Add pagination, popular, etc. project_ibles = projects_search(category=category, channel=channel) for ible in project_ibles: link = f"/{ible['document']['urlString']}" img = proxy(ible["document"]["coverImageUrl"]) title = ible["document"]["title"] author = ible["document"]["screenName"] author_link = f"/member/{author}" channel = ible["document"]["primaryClassification"] channel_link = f"/channel/{channel}" views = ible["document"]["views"] favorites = ible["document"]["favorites"] ibles.append( { "link": link, "img": img, "title": title, "author": author, "author_link": author_link, "channel": channel, "channel_link": channel_link, "views": views, "favorites": favorites, } ) if len(ibles) >= 8: break print(ibles) return render_template("projects.html", title=head, ibles=ibles, path=path) @app.route("/sitemap/") @app.route("/sitemap/") def route_sitemap(path=""): try: data = urlopen("https://www.instructables.com/sitemap/" + path) except HTTPError as e: abort(e.code) soup = BeautifulSoup(data.read().decode(), "html.parser") main = soup.select("div.sitemap-content")[0] group_section = main.select("div.group-section") if group_section: groups = [] for group in group_section: category = group.select("h2 a")[0].text category_link = group.select("h2 a")[0].get("href") channels = [] for li in group.select("ul.sitemap-listing li"): channel = li.a.text channel_link = li.a["href"] channels.append([channel, channel_link]) groups.append([category, category_link, channels]) else: groups = [] channels = [] for li in main.select("ul.sitemap-listing li"): channel = li.a.text channel_link = li.a["href"] channels.append([channel, channel_link]) groups.append(["", "", channels]) return render_template("sitemap.html", title="Sitemap", groups=groups) @app.route("/contest/archive/") def route_contest_archive(): page = 1 if request.args.get("page") != None: page = request.args.get("page") try: data = urlopen(f"https://www.instructables.com/contest/archive/?page={page}") except HTTPError as e: abort(e.code) soup = BeautifulSoup(data.read().decode(), "html.parser") main = soup.select("div#contest-archive-wrapper")[0] contest_count = main.select("p.contest-count")[0].text contest_list = [] for index, year in enumerate(main.select("div.contest-archive-list h2")): year_list = main.select( "div.contest-archive-list div.contest-archive-list-year" )[index] year_name = year.text month_list = [] for month in year_list.select("div.contest-archive-list-month"): month_name = month.select("h3")[0].text month_contest_list = [] for p in month.select("p"): date = p.select("span")[0].text link = p.select("a")[0].get("href") title = p.select("a")[0].text month_contest_list.append([date, link, title]) month_list.append([month_name, month_contest_list]) contest_list.append([year_name, month_list]) pagination = main.select("nav.pagination ul.pagination")[0] return render_template( "archives.html", title=f"Contest Archives (Page {page})", page=page, contest_count=contest_count, pagination=pagination, contest_list=contest_list, ) @app.route("/contest//") def route_contest(contest): try: data = urlopen(f"https://www.instructables.com/contest/{contest}/") except HTTPError as e: abort(e.code) soup = BeautifulSoup(data.read().decode(), "html.parser") title = soup.select('meta[property="og:title"]')[0].get("content") body = soup.select("div#contest-wrapper")[0] img = proxy(body.select("div#contest-masthead img")[0].get("src")) entry_count = body.select("li.entries-nav-btn")[0].text.split(" ")[0] prizes = body.select("li.prizes-nav-btn")[0].text.split(" ")[0] info = body.select("div.contest-body-column-left")[0] info.select("div#site-announcements-page")[0].decompose() info.select("h3")[0].decompose() info.select("div#contest-body-nav")[0].decompose() info = str(info).replace("https://www.instructables.com", "/") entries = body.select("span.contest-entity-count")[0].text entry_list = [] for entry in body.select("div.contest-entries-list div.contest-entries-list-ible"): link = entry.a["href"] entry_img = proxy(entry.select("a noscript img")[0].get("src")) entry_title = entry.select("a.ible-title")[0].text author = entry.select("div span.ible-author a")[0].text author_link = entry.select("div span.ible-author a")[0].get("href") channel = entry.select("div span.ible-channel a")[0].text channel_link = entry.select("div span.ible-channel a")[0].get("href") views = entry.select(".ible-views")[0].text entry_list.append( { "link": link, "entry_img": entry_img, "entry_title": entry_title, "author": author, "author_link": author_link, "channel": channel, "channel_link": channel_link, "views": views, } ) return render_template( "contest.html", title=title, img=img, entry_count=entry_count, prizes=prizes, info=info, entry_list=entry_list, ) @app.route("/contest/") def route_contests(): try: data = urlopen("https://www.instructables.com/contest/") except HTTPError as e: abort(e.code) soup = BeautifulSoup(data.read().decode(), "html.parser") contest_count = str(soup.select("p.contest-count")[0]) contests = [] for contest in soup.select("div#cur-contests div.row-fluid div.contest-banner"): link = contest.select("div.contest-banner-inner a")[0].get("href") img = proxy(contest.select("div.contest-banner-inner a img")[0].get("src")) alt = contest.select("div.contest-banner-inner a img")[0].get("alt") deadline = contest.select("span.contest-meta-deadline")[0].get("data-deadline") prizes = contest.select("span.contest-meta-count")[0].text entries = contest.select("span.contest-meta-count")[1].text contests.append( { "link": link, "img": img, "alt": alt, "deadline": deadline, "prizes": prizes, "entries": entries, } ) closed = [] for display in soup.select("div.contest-winner-display"): link = display.select("div.contest-banner-inner a")[0].get("href") img = proxy(display.select("div.contest-banner-inner a img")[0].get("src")) alt = display.select("div.contest-banner-inner a img")[0].get("alt") featured_items = [] for featured_item in display.select("ul.featured-items li"): item_link = featured_item.select("div.ible-thumb a")[0].get("href") item_img = proxy(featured_item.select("div.ible-thumb a img")[0].get("src")) item_title = featured_item.select("a.title")[0].text item_author = featured_item.select("a.author")[0].text item_author_link = featured_item.select("a.author")[0].get("href") featured_items.append( { "link": item_link, "img": item_img, "title": item_title, "author": item_author, "author_link": item_author_link, } ) closed.append( {"link": link, "img": img, "alt": alt, "featured_items": featured_items} ) return render_template( "contests.html", title="Contests", contest_count=contest_count, contests=contests, closed=closed, ) @app.route("///projects/") def route_channel_projects(category, channel): return project_list(f"/{category}/{channel}/projects/", channel.title()) @app.route("///projects//") def route_channel_projects_sort(category, channel, sort): return project_list( f"/{category}/{channel}/projects/{sort}", channel.title(), " Sorted by " + sort.title(), ) @app.route("//projects/") def route_category_projects(category): return project_list(f"/{category}/projects/", category.title()) @app.route("//projects//") def route_category_projects_sort(category, sort): return project_list( f"/{category}/projects/{sort}", category.title(), " Sorted by " + sort.title() ) @app.route("/projects/") def route_projects(): return project_list("/projects/", "") @app.route("/search") def route_search(): # TODO: Fix this (using search function) return project_list("/search/?q=" + request.args["q"] + "&projects=all", "Search") @app.route("/projects//") def route_projects_sort(sort): return project_list(f"/projects/{sort}", "", " Sorted by " + sort.title()) @app.route("/circuits/") def route_circuits(): return category_page("/circuits/", "Circuits") @app.route("/workshop/") def route_workshop(): return category_page("/workshop/", "Workshop") @app.route("/craft/") def route_craft(): return category_page("/craft/", "Craft") @app.route("/cooking/") def route_cooking(): return category_page("/cooking/", "Cooking") @app.route("/living/") def route_living(): return category_page("/living/", "Living") @app.route("/outside/") def route_outside(): return category_page("/outside/", "Outside") @app.route("/teachers/") def route_teachers(): return category_page("/teachers/", "Teachers", True) @app.route("/sitemap/projects//") def route_sitemap_circuits(category, subcategory): return category_page( "/" + category + "/" + subcategory, subcategory + " - " + category ) @app.route("/member//instructables/") def route_member_instructables(member): try: data = urlopen(f"https://www.instructables.com/member/{member}/instructables/") except HTTPError as e: abort(e.code) soup = BeautifulSoup(data.read().decode(), "html.parser") header = soup.select(".profile-header.profile-header-social")[0] header_content = member_header(header) ibles = soup.select("ul.ible-list-items")[0] ible_list = [] for ible in ibles.select("li"): link = ible.select("div.thumbnail-image")[0].a.get("href") img = proxy(ible.select("div.thumbnail-image a noscript img")[0].get("src")) title = ible.select("div.caption-inner a.title")[0].text stats = ible.select("div.ible-stats-right-col")[0] views = 0 if stats.select("span.ible-views") != []: views = stats.select("span.ible-views")[0].text favorites = 0 if stats.select("span.ible-favorites") != []: favorites = stats.select("span.ible-favorites")[0].text ible_list.append( { "link": link, "img": img, "title": title, "views": views, "favorites": favorites, } ) return render_template( "member-instructables.html", title=f"{header_content['title']}'s Instructables", header_content=header_content, ibles=ible_list, ) @app.route("/member//") def route_member(member): headers = { "User-Agent": "Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0" } request = Request( f"https://www.instructables.com/member/{member}/", headers=headers ) try: data = urlopen(request) except HTTPError as e: abort(e.code) soup = BeautifulSoup(data.read().decode(), "html.parser") header_content = member_header(soup) body = soup.select("div.member-profile-body")[0] ible_list = body.select(".boxed-content.promoted-content") ible_list_title = "" ibles = [] if ible_list != []: ible_list = ible_list[0] ible_list_title = ible_list.select("h2.module-title")[0].text for ible in ible_list.select("ul.promoted-items li"): ible_title = ible.get("data-title") ible_link = ible.select("div.image-wrapper")[0].a.get("href") ible_img = proxy(ible.select("div.image-wrapper a img")[0].get("src")) ibles.append({"title": ible_title, "link": ible_link, "img": ible_img}) ach_list = body.select( "div.two-col-section div.right-col-section.centered-sidebar div.boxed-content.about-me" ) ach_list_title = "" achs = [] if len(ach_list) > 1: ach_list = ach_list[1] ach_list_title = ach_list.select("h2.module-title")[0].text for ach in ach_list.select( "div.achievements-section.main-achievements.contest-achievements div.achievement-item:not(.two-column-filler)" ): ach_title = ach.select("div.achievement-info span.achievement-title")[ 0 ].text ach_desc = ach.select("div.achievement-info span.achievement-description")[ 0 ].text achs.append([ach_title, ach_desc]) return render_template( "member.html", title=header_content["title"] + "'s Profile", header_content=header_content, ible_list_title=ible_list_title, ibles=ibles, ach_list_title=ach_list_title, achs=achs, ) @app.route("/
/") def route_article(article): try: data = urlopen(f"https://www.instructables.com/{article}/") except HTTPError as e: abort(e.code) soup = BeautifulSoup(data.read().decode(), "html.parser") try: header = soup.select("header") if len(header) < 2 and soup.select("title")[0].text.contains("Pending Review"): return render_template("article-review.html", title="Pending Review") else: header = header[1] title = header.find("h1").text byline = header.select("div.sub-header div.header-byline")[0] author = byline.select("a")[0].text author_link = byline.select("a")[0].get("href") category = byline.select("a")[1].text category_link = byline.select("a")[1].get("href") channel = byline.select("a")[2].text channel_link = byline.select("a")[2].get("href") stats = header.select("div.sub-header div.header-stats")[0] views = stats.select(".view-count")[0].text favorites = 0 if stats.select(".favorite-count") != []: favorites = stats.select(".favorite-count")[0].text if soup.select("div.article-body") != []: ## Instructables body = soup.select("div.article-body")[0] steps = [] for step in body.select("section.step"): print(step) step_title = step.select("h2")[0].text step_imgs = [] # TODO: Handle download links for img in step.select("img"): step_imgs.append( {"src": proxy(img.get("src")), "alt": img.get("alt")} ) step_videos = [] for img in step.select("video"): step_videos.append([proxy(img.get("src"))]) step_text = str(step.select("div.step-body")[0]) step_text = step_text.replace( "https://content.instructables.com", "/proxy/?url=https://content.instructables.com", ) steps.append( { "title": step_title, "imgs": step_imgs, "text": step_text, "videos": step_videos, } ) comments_list = [] comment_count = 0 # TODO: Fix comments # comments = body.select("section.discussion")[0] # comment_count = comments.select("h2")[0].text # comment_list = comments.select("div.posts") # if comment_list != []: # comment_list = comment_list[0] # comments_list = [] # replies_used = 0 # for comment in comment_list.select(".post.js-comment:not(.reply)"): # comment_votes = comment.select(".votes")[0].text # comment_author_img_src = proxy(comment.select(".avatar a noscript img")[0].get("src")) # comment_author_img_alt = comment.select(".avatar a noscript img")[0].get("alt") # comment_author = comment.select(".posted-by a")[0].text # comment_author_link = comment.select(".posted-by a")[0].get("href") # comment_date = comment.select(".posted-by p.posted-date")[0].text # comment_text = comment.select("div.text p")[0] # comment_reply_count = comment.select("button.js-show-replies") # if comment_reply_count != []: # comment_reply_count = comment_reply_count[0].get("data-num-hidden") # else: # comment_reply_count = 0 # reply_list = [] # for index, reply in enumerate(comment_list.select(".post.js-comment:not(.reply) ~ .post.js-comment.reply.hide:has(~.post.js-comment:not(.reply))")[replies_used:int(comment_reply_count) + replies_used]): # reply_votes = reply.select(".votes")[0].text # reply_author_img_src = proxy(reply.select(".avatar a noscript img")[0].get("src")) # reply_author_img_alt = reply.select(".avatar a noscript img")[0].get("alt") # reply_author = reply.select(".posted-by a")[0].text # reply_author_link = reply.select(".posted-by a")[0].get("href") # reply_date = reply.select(".posted-by p.posted-date")[0].text # reply_text = reply.select("div.text p")[0] # reply_list.append([reply_votes, reply_author_img_src, reply_author_img_alt, reply_author, reply_author_link, reply_date, reply_text]) # replies_used += 1 # comments_list.append([comment_votes, comment_author_img_src, comment_author_img_alt, comment_author, comment_author_link, comment_date, comment_text, comment_reply_count, reply_list]) return render_template( "article.html", title=title, author=author, author_link=author_link, category=category, category_link=category_link, channel=channel, channel_link=channel_link, views=views, favorites=favorites, steps=steps, comment_count=comment_count, comments_list=comments_list, enumerate=enumerate, ) else: ## Collections thumbnails = [] for thumbnail in soup.select("ul#thumbnails-list li"): text = ( link ) = ( img ) = ( thumbnail_title ) = ( thumbnail_author ) = ( thumbnail_author_link ) = thumbnail_channel = thumbnail_channel_link = "" if thumbnail.select("div.thumbnail > p") != []: text = thumbnail.select("div.thumbnail > p")[0] if thumbnail.select("div.thumbnail div.thumbnail-image"): link = thumbnail.select("div.thumbnail div.thumbnail-image a")[ 0 ].get("href") img = proxy( thumbnail.select("div.thumbnail div.thumbnail-image a img")[ 0 ].get("src") ) thumbnail_title = thumbnail.select( "div.thumbnail div.thumbnail-info h3.title a" )[0].text thumbnail_author = thumbnail.select( "div.thumbnail div.thumbnail-info span.author a" )[0].text thumbnail_author_link = thumbnail.select( "div.thumbnail div.thumbnail-info span.author a" )[0].get("href") thumbnail_channel = thumbnail.select( "div.thumbnail div.thumbnail-info span.origin a" )[0].text thumbnail_channel_link = thumbnail.select( "div.thumbnail div.thumbnail-info span.origin a" )[0].get("href") thumbnails.append( { "text": text, "link": link, "img": img, "title": thumbnail_title, "author": thumbnail_author, "author_link": thumbnail_author_link, "channel": thumbnail_channel, "channel_link": thumbnail_channel_link, } ) return render_template( "collection.html", title=title, author=author, author_link=author_link, category=category, category_link=category_link, channel=channel, channel_link=channel_link, views=views, favorites=favorites, thumbnails=thumbnails, ) except Exception: print_exc() raise InternalServerError() @app.route("///") def route_channel_redirect(category, channel): # TODO: Just check if the channel exists if ( category == "circuits" or category == "workshop" or category == "craft" or category == "cooking" or category == "living" or category == "outside" or category == "teachers" ): return redirect(f"/{category}/{channel}/projects/", 307) else: raise NotFound() @app.route("/") def route_explore(): try: data = urlopen("https://www.instructables.com/") except HTTPError as e: abort(e.code) soup = BeautifulSoup(data.read().decode(), "html.parser") explore = soup.select(".home-content-explore-wrap")[0] title = explore.select("h2")[0].text circuits = explore_lists( explore.select(".home-content-explore-category-circuits")[0] ) workshop = explore_lists( explore.select(".home-content-explore-category-workshop")[0] ) craft = explore_lists(explore.select(".home-content-explore-category-craft")[0]) cooking = explore_lists(explore.select(".home-content-explore-category-cooking")[0]) living = explore_lists(explore.select(".home-content-explore-category-living")[0]) outside = explore_lists(explore.select(".home-content-explore-category-outside")[0]) teachers = explore_lists( explore.select(".home-content-explore-category-teachers")[0] ) return render_template( "index.html", title=title, sections=[ ("Circuits", "/circuits", circuits), ("Workshop", "/workshop", workshop), ("Craft", "/craft", craft), ("Cooking", "/cooking", cooking), ("Living", "/living", living), ("Outside", "/outside", outside), ("Teachers", "/teachers", teachers), ], ) @app.route("/proxy/") def route_proxy(): url = request.args.get("url") if url != None: if url.startswith("https://cdn.instructables.com/") or url.startswith( "https://content.instructables.com/" ): try: data = urlopen(unquote(url)) except HTTPError as e: abort(e.code) return Response(data.read(), content_type=data.headers["content-type"]) else: raise BadRequest() else: raise BadRequest() @app.route("/privacypolicy/") def privacypolicy(): content = "No privacy policy found." try: with (pathlib.Path(__file__).parent / "privacy.txt").open() as f: content = f.read() except: pass return render_template( "privacypolicy.html", title="Privacy Policy", content=content ) @app.errorhandler(404) def not_found(e): return render_template("404.html") @app.errorhandler(400) def bad_request(e): return render_template("400.html") @app.errorhandler(429) def too_many_requests(e): return render_template("429.html") @app.errorhandler(500) def internal_server_error(e): return render_template("500.html") if __name__ == "__main__": app.run(port=args.port, host=args.listen_host, debug=debugmode)