Replaced selenium with playwright

Fixed requirements
Added source links to footer
This commit is contained in:
Kumi 2023-06-02 14:02:40 +00:00
parent 586b916ed5
commit 823a44f664
Signed by: kumi
GPG key ID: ECBCC9082395383F
8 changed files with 483 additions and 2941 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
venv/
*.pyc
__pycache__/
.vscode

View file

@ -1,24 +1,36 @@
<div align="center"> <div align="center">
<img src="static/img/logo.png"> <img src="static/img/logo.png">
<h1>Indestructables</h1> <h1>Indestructables</h1>
An open source alternative front-end to Instructables An open source alternative front-end to Instructables. This is a fork of <a href="https://codeberg.org/snowcatridge10/indestructables">snowcatridge10's Indestructables</a> to use Playwright instead of Selenium.
<a href="https://matrix.to/#/#indestructables:fedora.im">snowcatridge10's Matrix Room</a>
<a href="https://matrix.to/#/#indestructables:fedora.im">Matrix</a>
</div> </div>
# Instances # Instances
None, yet! None, yet!
# Run your own instance # Run your own instance
## Dependencies ## Dependencies
First, create a virtual environment with `python3 -m venv venv` and activate it with `source venv/bin/activate`. Then, install the dependencies with:
`pip3 install -r requirements.txt`. `pip3 install -r requirements.txt`.
For the production environment, you also need the uWSGI Python3 plugin. On Debian, it can be installed via `apt install uwsgi-plugin-python3` For the production environment, you also need the uWSGI Python3 plugin. On Debian, it can be installed via `apt install uwsgi-plugin-python3`
Furthermore, you need to install the Chromium binary used by Playwright. You can do this by running `playwright install chromium`.
## Production ## Production
1. Clone the repository 1. Clone the repository
2. Run `uwsgi --plugin python3 --http-socket 0.0.0.0:8002 --wsgi-file main.py --callable app --processes 4 --threads 2` 2. Run `uwsgi --plugin python3 --http-socket 0.0.0.0:8002 --wsgi-file main.py --callable app --processes 4 --threads 2`
3. Point your reverse proxy to http://localhost:8002 3. Point your reverse proxy to http://localhost:8002
## Development ## Development
1. Clone the repository 1. Clone the repository
2. Run `python3 main.py` 2. Run `python3 main.py`
3. Connect to http://localhost:8002 3. Connect to http://localhost:8002

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load diff

613
main.py
View file

@ -1,47 +1,64 @@
#!/usr/bin/env python #!/usr/bin/env python
from flask import Flask, render_template, request, redirect, Response, stream_with_context from flask import (
Flask,
render_template,
request,
redirect,
Response,
stream_with_context,
)
import requests import requests
import re import re
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from urllib.parse import quote, unquote from urllib.parse import quote, unquote
from traceback import print_exc from traceback import print_exc
from requests_html import HTMLSession from requests_html import HTMLSession
from playwright.sync_api import sync_playwright
from urllib.parse import urljoin
from argparse import ArgumentParser
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.chrome.options import Options
import os import os
debugmode = False debugmode = False
try: if __name__ == "__main__":
if sys.argv[1] == "debug": parser = ArgumentParser()
parser.add_argument(
"-p",
"--port",
default=8002,
type=int,
help="Port to listen on",
)
parser.add_argument(
"-d",
"--debug",
action="store_true",
help="Enable debug mode",
)
parser.add_argument(
"-l",
"--listen-host",
default="127.0.0.1",
help="Host to listen on",
)
args = parser.parse_args()
if args.debug:
debugmode = True debugmode = True
except:
pass
print("Loading...") print("Loading...")
def proxy(src): def proxy(src):
return "/proxy/?url=" + quote(str(src)) return "/proxy/?url=" + quote(str(src))
instance_root_url = "http://127.0.0.1:8002" def get_instance_root_url(request):
return request.url_root
chrome_options = Options() playwright = sync_playwright().start()
chrome_options.add_argument("--headless") browser = playwright.chromium.launch(headless=True)
page = browser.new_page()
if os.name == 'nt':
# Windows
driver = webdriver.Chrome('./chromedriver.exe', options=chrome_options)
else:
# Linux
driver = webdriver.Chrome('./chromedriver', options=chrome_options)
channels = [] channels = []
@ -59,49 +76,75 @@ global_ibles = {}
global_ibles["/projects"] = [] global_ibles["/projects"] = []
driver.get("https://www.instructables.com/projects") page.goto("https://www.instructables.com/projects")
while len(global_ibles["/projects"]) <= 0: while len(global_ibles["/projects"]) <= 0:
for ible in driver.find_elements(By.CLASS_NAME, "ibleCard__QPJVm"): for ible in page.query_selector_all(".ibleCard__QPJVm"):
link = ible.find_elements(By.CSS_SELECTOR, "a")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) link = (
img = proxy(ible.find_elements(By.CSS_SELECTOR, "img")[0].get_attribute("src")) ible.query_selector("a")
.get_attribute("href")
.replace("https://www.instructables.com", "{instance_root_url}")
)
img = proxy(ible.query_selector("img").get_attribute("src"))
title = ible.find_elements(By.CLASS_NAME, "title__t0fGQ")[0].text title = ible.query_selector(".title__t0fGQ").inner_text()
author = ible.find_elements(By.CSS_SELECTOR, "a[href^='/member/']")[0].text author = ible.query_selector("a[href^='/member/']").inner_text()
author_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/member/']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) author_link = (
ible.query_selector("a[href^='/member/']")
.get_attribute("href")
.replace("https://www.instructables.com", "{instance_root_url}")
)
channel = "TEST" channel = "TEST"
channel_link = "TEST" channel_link = "TEST"
for c in channels: for c in channels:
try: try:
channel = ible.find_elements(By.CSS_SELECTOR, "a[href^='/" + c + "']")[0].text channel = ible.query_selector("a[href^='/" + c + "']").inner_text()
channel_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/" + c + "']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) channel_link = (
ible.query_selector("a[href^='/" + c + "']")
.get_attribute("href")
.replace("https://www.instructables.com", "{instance_root_url}")
)
except: except:
try: try:
channel = ible.find_elements(By.CSS_SELECTOR, "a[href^='/projects/']")[0].text channel = ible.query_selector("a[href^='/projects/']").inner_text()
channel_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/projects/']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) channel_link = (
ible.query_selector("a[href^='/projects/']")
.get_attribute("href")
.replace("https://www.instructables.com", "{instance_root_url}")
)
except: except:
pass pass
stats = ible.find_elements(By.CLASS_NAME, "stats__GFKyl")[0] stats = ible.query_selector(".stats__GFKyl")
views = 0 views = 0
if stats.find_elements(By.CSS_SELECTOR, "div[title$=' views']") != []: if stats.query_selector("div[title$=' views']"):
views = stats.find_elements(By.CSS_SELECTOR, "div[title$=' views']")[0].text views = stats.query_selector("div[title$=' views']").inner_text()
favorites = 0 favorites = 0
if stats.find_elements(By.CSS_SELECTOR, "div[title$=' favorites']") != []: if stats.query_selector("div[title$=' favorites']"):
favorites = stats.find_elements(By.CSS_SELECTOR, "div[title$=' favorites']")[0].text favorites = stats.query_selector("div[title$=' favorites']").inner_text()
global_ibles["/projects"].append([link, img, title, author, author_link, channel, channel_link, views, favorites]) global_ibles["/projects"].append(
[
link,
img,
title,
author,
author_link,
channel,
channel_link,
views,
favorites,
]
)
firefox_capabilities = DesiredCapabilities.FIREFOX browser.close()
firefox_capabilities['marionette'] = True playwright.stop()
firefox_capabilities['binary'] = "C:/Program Files/Mozilla Firefox/firefox.exe"
options = Options()
options.add_argument("--headless")
print("Started!") print("Started!")
def explore_lists(soup): def explore_lists(soup):
list_ = [] list_ = []
for ible in soup.select(".home-content-explore-ible"): for ible in soup.select(".home-content-explore-ible"):
@ -119,12 +162,30 @@ def explore_lists(soup):
favorites = 0 favorites = 0
if ible.select("span.ible-favorites") != []: if ible.select("span.ible-favorites") != []:
favorites = ible.select("span.ible-favorites")[0].text favorites = ible.select("span.ible-favorites")[0].text
list_.append([link, img, alt, title, author, author_link, channel, channel_link, favorites, views]) list_.append(
[
link,
img,
alt,
title,
author,
author_link,
channel,
channel_link,
favorites,
views,
]
)
return list_ return list_
def member_header(header): def member_header(header):
avatar = proxy(header.select("div.profile-avatar-container img.profile-avatar")[0].get("src")) avatar = proxy(
title = header.select("div.profile-top div.profile-headline h1.profile-title")[0].text header.select("div.profile-avatar-container img.profile-avatar")[0].get("src")
)
title = header.select("div.profile-top div.profile-headline h1.profile-title")[
0
].text
profile_top = header.select("div.profile-top")[0] profile_top = header.select("div.profile-top")[0]
@ -133,7 +194,6 @@ def member_header(header):
# stats_text = profile_top.select("div.profile-header-stats")[0] # stats_text = profile_top.select("div.profile-header-stats")[0]
# stats_num = header.select("div.profile-top div.profile-header-stats")[1] # stats_num = header.select("div.profile-top div.profile-header-stats")[1]
location = header.select("span.member-location") location = header.select("span.member-location")
if location != []: if location != []:
location = location[0].text location = location[0].text
@ -146,7 +206,6 @@ def member_header(header):
else: else:
signup = 0 signup = 0
instructables = header.select("span.ible-count") instructables = header.select("span.ible-count")
if instructables != []: if instructables != []:
instructables = instructables[0].text instructables = instructables[0].text
@ -177,25 +236,42 @@ def member_header(header):
else: else:
bio = "" bio = ""
return [avatar, title, location, signup, instructables, views, comments, followers, bio] return [
avatar,
title,
location,
signup,
instructables,
views,
comments,
followers,
bio,
]
def category_page(path, name, teachers=False): def category_page(path, name, teachers=False):
data = requests.get("https://www.instructables.com" + path) data = requests.get("https://www.instructables.com" + path)
if data.status_code != 200: if data.status_code != 200:
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code) return Response(
render_template(str(data.status_code) + ".html"), status=data.status_code
)
soup = BeautifulSoup(data.text, "html.parser") soup = BeautifulSoup(data.text, "html.parser")
channels = [] channels = []
for card in soup.select("div.scrollable-cards-inner div.scrollable-card"): for card in soup.select("div.scrollable-cards-inner div.scrollable-card"):
link = card.a["href"] link = card.a["href"]
img = proxy(card.select(f"a{' noscript' if teachers else ''} img")[0].get("src")) img = proxy(
card.select(f"a{' noscript' if teachers else ''} img")[0].get("src")
)
title = card.select("a img")[0].get("alt") title = card.select("a img")[0].get("alt")
channels.append([link, title, img]) channels.append([link, title, img])
ibles = [] ibles = []
for ible in soup.select("div.category-landing-projects-list div.category-landing-projects-ible"): for ible in soup.select(
"div.category-landing-projects-list div.category-landing-projects-ible"
):
link = ible.a["href"] link = ible.a["href"]
img = proxy(ible.select("a noscript img")[0].get("src")) img = proxy(ible.select("a noscript img")[0].get("src"))
@ -214,75 +290,134 @@ def category_page(path, name, teachers=False):
if stats.select("span.ible-favorites") != []: if stats.select("span.ible-favorites") != []:
favorites = stats.select("span.ible-favorites")[0].text favorites = stats.select("span.ible-favorites")[0].text
ibles.append([link, img, title, author, author_link, channel, channel_link, views, favorites]) ibles.append(
[
link,
img,
title,
author,
author_link,
channel,
channel_link,
views,
favorites,
]
)
contests = [] contests = []
for contest in soup.select("div.category-landing-contests-list div.category-landing-contests-item"): for contest in soup.select(
"div.category-landing-contests-list div.category-landing-contests-item"
):
link = contest.a["href"] link = contest.a["href"]
img = proxy(contest.select("a noscript img")[0].get("src")) img = proxy(contest.select("a noscript img")[0].get("src"))
title = contest.select("a img")[0].get("alt") title = contest.select("a img")[0].get("alt")
contests.append([link, img, title]) contests.append([link, img, title])
return render_template("category.html", data=[name, channels, ibles, contests, path]) return render_template(
"category.html", data=[name, channels, ibles, contests, path]
)
def project_list(path, head, sort=''):
#driver = webdriver.Firefox(options=options, capabilities=firefox_capabilities)
driver.get("https://www.instructables.com" + path) def project_list(path, head, sort=""):
playwright = sync_playwright().start()
browser = playwright.chromium.launch(headless=True)
page = browser.new_page()
page.goto(urljoin("https://www.instructables.com", path))
head = f"{head + ' ' if head != '' else ''}Projects" + sort head = f"{head + ' ' if head != '' else ''}Projects" + sort
path_ = path.rsplit('/', 1)[0] path_ = path.rsplit("/", 1)[0]
if path == "/projects/" or path == "/projects": if path == "/projects/" or path == "/projects":
ibles = global_ibles["/projects"] ibles = global_ibles["/projects"]
else: else:
ibles = [] ibles = []
for ible in driver.find_elements(By.CLASS_NAME, "ibleCard__QPJVm"): for ible in page.query_selector_all(".ibleCard__QPJVm"):
link = ible.find_elements(By.CSS_SELECTOR, "a")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) link = (
img = proxy(ible.find_elements(By.CSS_SELECTOR, "img")[0].get_attribute("src")) ible.query_selector("a")
.get_attribute("href")
.replace("https://www.instructables.com", "{instance_root_url}")
)
img = proxy(
ible.find_elements(By.CSS_SELECTOR, "img")[0].get_attribute("src")
)
title = ible.find_elements(By.CLASS_NAME, "title__t0fGQ")[0].text title = ible.find_elements(By.CLASS_NAME, "title__t0fGQ")[0].text
author = ible.find_elements(By.CSS_SELECTOR, "a[href^='/member/']")[0].text author = ible.find_elements(By.CSS_SELECTOR, "a[href^='/member/']")[0].text
author_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/member/']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) author_link = (
ible.find_elements(By.CSS_SELECTOR, "a[href^='/member/']")[0]
.get_attribute("href")
.replace("https://www.instructables.com", "{instance_root_url}")
)
channel = "TEST" channel = "TEST"
channel_link = "TEST" channel_link = "TEST"
for c in channels: for c in channels:
try: try:
channel = ible.find_elements(By.CSS_SELECTOR, "a[href^='/" + c + "']")[0].text channel = ible.query_selector("a[href^='/" + c + "']").inner_text()
channel_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/" + c + "']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) channel_link = (
ible.query_selector("a[href^='/" + c + "']")
.get_attribute("href")
.replace("https://www.instructables.com", "{instance_root_url}")
)
except: except:
try: try:
channel = ible.find_elements(By.CSS_SELECTOR, "a[href^='/projects/']")[0].text channel = ible.query_selector("a[href^='/projects/'] span").inner_text()
channel_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/projects/']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) channel_link = (
ible.query_selector("a[href^='/projects/']")
.get_attribute("href")
.replace("https://www.instructables.com", "{instance_root_url}")
)
except: except:
pass pass
stats = ible.find_elements(By.CLASS_NAME, "stats__GFKyl")[0] stats = ible.query_selector(".stats__GFKyl")
views = 0 views = 0
if stats.find_elements(By.CSS_SELECTOR, "div[title$=' views']") != []:
views = stats.find_elements(By.CSS_SELECTOR, "div[title$=' views']")[0].text
favorites = 0
if stats.find_elements(By.CSS_SELECTOR, "div[title$=' favorites']") != []:
favorites = stats.find_elements(By.CSS_SELECTOR, "div[title$=' favorites']")[0].text
ibles.append([link, img, title, author, author_link, channel, channel_link, views, favorites]) if stats.query_selector("div[title$=' views']"):
views = stats.query_selector("div[title$=' views']").inner_text()
favorites = 0
if stats.query_selector("div[title$=' favorites']"):
favorites = stats.query_selector("div[title$=' favorites']").inner_text()
ibles.append(
[
link,
img,
title,
author,
author_link,
channel,
channel_link,
views,
favorites,
]
)
if len(ibles) >= 8: if len(ibles) >= 8:
break break
browser.close()
playwright.stop()
return render_template("projects.html", data=[head, ibles, path_]) return render_template("projects.html", data=[head, ibles, path_])
app = Flask(__name__, template_folder="templates", static_folder="static") app = Flask(__name__, template_folder="templates", static_folder="static")
@app.route('/sitemap/')
@app.route("/sitemap/")
def route_sitemap(): def route_sitemap():
data = requests.get(f"https://www.instructables.com/sitemap/") data = requests.get(f"https://www.instructables.com/sitemap/")
if data.status_code != 200: if data.status_code != 200:
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code) return Response(
render_template(str(data.status_code) + ".html"), status=data.status_code
)
soup = BeautifulSoup(data.text, "html.parser") soup = BeautifulSoup(data.text, "html.parser")
@ -301,14 +436,17 @@ def route_sitemap():
return render_template("sitemap.html", data=groups) return render_template("sitemap.html", data=groups)
@app.route('/contest/archive/')
@app.route("/contest/archive/")
def route_contest_archive(): def route_contest_archive():
page = 1 page = 1
if request.args.get("page") != None: if request.args.get("page") != None:
page = request.args.get("page") page = request.args.get("page")
data = requests.get(f"https://www.instructables.com/contest/archive/?page={page}") data = requests.get(f"https://www.instructables.com/contest/archive/?page={page}")
if data.status_code != 200: if data.status_code != 200:
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code) return Response(
render_template(str(data.status_code) + ".html"), status=data.status_code
)
soup = BeautifulSoup(data.text, "html.parser") soup = BeautifulSoup(data.text, "html.parser")
@ -318,7 +456,9 @@ def route_contest_archive():
contest_list = [] contest_list = []
for index, year in enumerate(main.select("div.contest-archive-list h2")): for index, year in enumerate(main.select("div.contest-archive-list h2")):
year_list = main.select("div.contest-archive-list div.contest-archive-list-year")[index] year_list = main.select(
"div.contest-archive-list div.contest-archive-list-year"
)[index]
year_name = year.text year_name = year.text
month_list = [] month_list = []
for month in year_list.select("div.contest-archive-list-month"): for month in year_list.select("div.contest-archive-list-month"):
@ -334,31 +474,35 @@ def route_contest_archive():
pagination = main.select("nav.pagination ul.pagination")[0] pagination = main.select("nav.pagination ul.pagination")[0]
return render_template("archives.html", data=[page, contest_count, pagination, contest_list]) return render_template(
"archives.html", data=[page, contest_count, pagination, contest_list]
)
@app.route('/contest/<contest>/') @app.route("/contest/<contest>/")
def route_contest(contest): def route_contest(contest):
data = requests.get(f"https://www.instructables.com/contest/{contest}/") data = requests.get(f"https://www.instructables.com/contest/{contest}/")
if data.status_code != 200: if data.status_code != 200:
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code) return Response(
render_template(str(data.status_code) + ".html"), status=data.status_code
)
soup = BeautifulSoup(data.text, "html.parser") soup = BeautifulSoup(data.text, "html.parser")
title = soup.select("meta[property=\"og:title\"]")[0].get("content") title = soup.select('meta[property="og:title"]')[0].get("content")
body = soup.select("div#contest-wrapper")[0] body = soup.select("div#contest-wrapper")[0]
img = proxy(body.select("div#contest-masthead img")[0].get("src")) img = proxy(body.select("div#contest-masthead img")[0].get("src"))
entry_count = body.select("li.entries-nav-btn")[0].text.split(' ')[0] entry_count = body.select("li.entries-nav-btn")[0].text.split(" ")[0]
prizes = body.select("li.prizes-nav-btn")[0].text.split(' ')[0] prizes = body.select("li.prizes-nav-btn")[0].text.split(" ")[0]
info = body.select("div.contest-body-column-left")[0] info = body.select("div.contest-body-column-left")[0]
info.select("div#site-announcements-page")[0].decompose() info.select("div#site-announcements-page")[0].decompose()
info.select("h3")[0].decompose() info.select("h3")[0].decompose()
info.select("div#contest-body-nav")[0].decompose() info.select("div#contest-body-nav")[0].decompose()
info = str(info).replace("https://www.instructables.com", '') info = str(info).replace("https://www.instructables.com", "{instance_root_url}")
entries = body.select("span.contest-entity-count")[0].text entries = body.select("span.contest-entity-count")[0].text
@ -373,16 +517,31 @@ def route_contest(contest):
channel_link = entry.select("div span.ible-channel a")[0].get("href") channel_link = entry.select("div span.ible-channel a")[0].get("href")
views = entry.select(".ible-views")[0].text views = entry.select(".ible-views")[0].text
entry_list.append([link, entry_img, entry_title, author, author_link, channel, channel_link, views]) entry_list.append(
[
link,
entry_img,
entry_title,
author,
author_link,
channel,
channel_link,
views,
]
)
return render_template("contest.html", data=[title, img, entry_count, prizes, info, entry_list]) return render_template(
"contest.html", data=[title, img, entry_count, prizes, info, entry_list]
)
@app.route('/contest/') @app.route("/contest/")
def route_contests(): def route_contests():
data = requests.get("https://www.instructables.com/contest/") data = requests.get("https://www.instructables.com/contest/")
if data.status_code != 200: if data.status_code != 200:
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code) return Response(
render_template(str(data.status_code) + ".html"), status=data.status_code
)
soup = BeautifulSoup(data.text, "html.parser") soup = BeautifulSoup(data.text, "html.parser")
@ -412,76 +571,104 @@ def route_contests():
item_author = featured_item.select("a.author")[0].text item_author = featured_item.select("a.author")[0].text
item_author_link = featured_item.select("a.author")[0].get("href") item_author_link = featured_item.select("a.author")[0].get("href")
featured_items.append([item_link, item_img, item_title, item_author, item_author_link]) featured_items.append(
[item_link, item_img, item_title, item_author, item_author_link]
)
closed.append([link, img, alt, featured_items]) closed.append([link, img, alt, featured_items])
return render_template("contests.html", data=[contest_count, contests, closed]) return render_template("contests.html", data=[contest_count, contests, closed])
@app.route('/<category>/<channel>/projects/')
@app.route("/<category>/<channel>/projects/")
def route_channel_projects(category, channel): def route_channel_projects(category, channel):
return project_list(f"/{category}/{channel}/projects/", channel.title()) return project_list(f"/{category}/{channel}/projects/", channel.title())
@app.route('/<category>/<channel>/projects/<sort>/')
def route_channel_projects_sort(category, channel, sort):
return project_list(f"/{category}/{channel}/projects/{sort}", channel.title(), " Sorted by " + sort.title())
@app.route('/<category>/projects/') @app.route("/<category>/<channel>/projects/<sort>/")
def route_channel_projects_sort(category, channel, sort):
return project_list(
f"/{category}/{channel}/projects/{sort}",
channel.title(),
" Sorted by " + sort.title(),
)
@app.route("/<category>/projects/")
def route_category_projects(category): def route_category_projects(category):
return project_list(f"/{category}/projects/", category.title()) return project_list(f"/{category}/projects/", category.title())
@app.route('/<category>/projects/<sort>/')
@app.route("/<category>/projects/<sort>/")
def route_category_projects_sort(category, sort): def route_category_projects_sort(category, sort):
return project_list(f"/{category}/projects/{sort}", category.title(), " Sorted by " + sort.title()) return project_list(
f"/{category}/projects/{sort}", category.title(), " Sorted by " + sort.title()
)
@app.route('/projects/')
@app.route("/projects/")
def route_projects(): def route_projects():
return project_list("/projects/", '') return project_list("/projects/", "")
@app.route('/search')
@app.route("/search")
def route_search(): def route_search():
return project_list("/search/?q=" + request.args['q'] + "&projects=all", 'Search') return project_list("/search/?q=" + request.args["q"] + "&projects=all", "Search")
@app.route('/projects/<sort>/')
@app.route("/projects/<sort>/")
def route_projects_sort(sort): def route_projects_sort(sort):
return project_list(f"/projects/{sort}", '', " Sorted by " + sort.title()) return project_list(f"/projects/{sort}", "", " Sorted by " + sort.title())
@app.route('/circuits/')
@app.route("/circuits/")
def route_circuits(): def route_circuits():
return category_page("/circuits/", "Circuits") return category_page("/circuits/", "Circuits")
@app.route('/workshop/')
@app.route("/workshop/")
def route_workshop(): def route_workshop():
return category_page("/workshop/", "Workshop") return category_page("/workshop/", "Workshop")
@app.route('/craft/')
@app.route("/craft/")
def route_craft(): def route_craft():
return category_page("/craft/", "Craft") return category_page("/craft/", "Craft")
@app.route('/cooking/')
@app.route("/cooking/")
def route_cooking(): def route_cooking():
return category_page("/cooking/", "Cooking") return category_page("/cooking/", "Cooking")
@app.route('/living/')
@app.route("/living/")
def route_living(): def route_living():
return category_page("/living/", "Living") return category_page("/living/", "Living")
@app.route('/outside/')
@app.route("/outside/")
def route_outside(): def route_outside():
return category_page("/outside/", "Outside") return category_page("/outside/", "Outside")
@app.route('/teachers/')
@app.route("/teachers/")
def route_teachers(): def route_teachers():
return category_page("/teachers/", "Teachers", True) return category_page("/teachers/", "Teachers", True)
@app.route('/sitemap/projects/<category>/<subcategory>')
def route_sitemap_circuits(category, subcategory):
return category_page("/" + category + "/" + subcategory, subcategory + " - " + category)
@app.route('/member/<member>/instructables/') @app.route("/sitemap/projects/<category>/<subcategory>")
def route_sitemap_circuits(category, subcategory):
return category_page(
"/" + category + "/" + subcategory, subcategory + " - " + category
)
@app.route("/member/<member>/instructables/")
def route_member_instructables(member): def route_member_instructables(member):
data = requests.get(f"https://www.instructables.com/member/{member}/instructables") data = requests.get(f"https://www.instructables.com/member/{member}/instructables")
if data.status_code != 200: if data.status_code != 200:
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code) return Response(
render_template(str(data.status_code) + ".html"), status=data.status_code
)
soup = BeautifulSoup(data.text, "html.parser") soup = BeautifulSoup(data.text, "html.parser")
@ -505,18 +692,24 @@ def route_member_instructables(member):
ible_list.append([link, img, title, views, favorites]) ible_list.append([link, img, title, views, favorites])
return render_template("member-instructables.html", data=header_content + [ible_list]) return render_template(
"member-instructables.html", data=header_content + [ible_list]
)
@app.route('/member/<member>/') @app.route("/member/<member>/")
def route_member(member): def route_member(member):
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0' "User-Agent": "Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0"
} }
data = requests.get(f"https://www.instructables.com/member/{member}/", headers=headers) data = requests.get(
f"https://www.instructables.com/member/{member}/", headers=headers
)
if data.status_code != 200: if data.status_code != 200:
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code) return Response(
render_template(str(data.status_code) + ".html"), status=data.status_code
)
soup = BeautifulSoup(data.text, "html.parser") soup = BeautifulSoup(data.text, "html.parser")
@ -539,8 +732,9 @@ def route_member(member):
ibles.append([ible_title, ible_link, ible_img]) ibles.append([ible_title, ible_link, ible_img])
ach_list = body.select(
ach_list = body.select("div.two-col-section div.right-col-section.centered-sidebar div.boxed-content.about-me") "div.two-col-section div.right-col-section.centered-sidebar div.boxed-content.about-me"
)
ach_list_title = "" ach_list_title = ""
achs = [] achs = []
@ -548,19 +742,30 @@ def route_member(member):
if len(ach_list) > 1: if len(ach_list) > 1:
ach_list = ach_list[1] ach_list = ach_list[1]
ach_list_title = ach_list.select("h2.module-title")[0].text ach_list_title = ach_list.select("h2.module-title")[0].text
for ach in ach_list.select("div.achievements-section.main-achievements.contest-achievements div.achievement-item:not(.two-column-filler)"): for ach in ach_list.select(
ach_title = ach.select("div.achievement-info span.achievement-title")[0].text "div.achievements-section.main-achievements.contest-achievements div.achievement-item:not(.two-column-filler)"
ach_desc = ach.select("div.achievement-info span.achievement-description")[0].text ):
ach_title = ach.select("div.achievement-info span.achievement-title")[
0
].text
ach_desc = ach.select("div.achievement-info span.achievement-description")[
0
].text
achs.append([ach_title, ach_desc]) achs.append([ach_title, ach_desc])
return render_template("member.html", data=header_content + [ible_list_title, ibles, ach_list_title, achs]) return render_template(
"member.html",
data=header_content + [ible_list_title, ibles, ach_list_title, achs],
)
@app.route('/<article>/') @app.route("/<article>/")
def route_article(article): def route_article(article):
data = requests.get(f"https://www.instructables.com/{article}/") data = requests.get(f"https://www.instructables.com/{article}/")
if data.status_code != 200: if data.status_code != 200:
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code) return Response(
render_template(str(data.status_code) + ".html"), status=data.status_code
)
soup = BeautifulSoup(data.text, "html.parser") soup = BeautifulSoup(data.text, "html.parser")
@ -603,7 +808,12 @@ def route_article(article):
step_videos.append([proxy(img.get("src"))]) step_videos.append([proxy(img.get("src"))])
step_text = str(step.select("div.step-body")[0]) step_text = str(step.select("div.step-body")[0])
step_text = step_text.replace("https://content.instructables.com", "/proxy/?url=https://content.instructables.com") step_text = step_text.replace(
"https://content.instructables.com",
"{instance_root_url}/proxy/?url=https://content.instructables.com".format(
instance_root_url=get_instance_root_url(request)
),
)
steps.append([step_title, step_imgs, step_text, step_videos]) steps.append([step_title, step_imgs, step_text, step_videos])
comments_list = [] comments_list = []
@ -645,44 +855,123 @@ def route_article(article):
# replies_used += 1 # replies_used += 1
# comments_list.append([comment_votes, comment_author_img_src, comment_author_img_alt, comment_author, comment_author_link, comment_date, comment_text, comment_reply_count, reply_list]) # comments_list.append([comment_votes, comment_author_img_src, comment_author_img_alt, comment_author, comment_author_link, comment_date, comment_text, comment_reply_count, reply_list])
return render_template("article.html", data=[title, author, author_link, category, category_link, channel, channel_link, views, favorites, steps, comment_count, comments_list], enumerate=enumerate) return render_template(
"article.html",
data=[
title,
author,
author_link,
category,
category_link,
channel,
channel_link,
views,
favorites,
steps,
comment_count,
comments_list,
],
enumerate=enumerate,
)
else: else:
## Collections ## Collections
thumbnails = [] thumbnails = []
for thumbnail in soup.select("ul#thumbnails-list li"): for thumbnail in soup.select("ul#thumbnails-list li"):
text = link = img = thumbnail_title = thumbnail_author = thumbnail_author_link = thumbnail_channel = thumbnail_channel_link = '' text = (
link
) = (
img
) = (
thumbnail_title
) = (
thumbnail_author
) = (
thumbnail_author_link
) = thumbnail_channel = thumbnail_channel_link = ""
if thumbnail.select("div.thumbnail > p") != []: if thumbnail.select("div.thumbnail > p") != []:
text = thumbnail.select("div.thumbnail > p")[0] text = thumbnail.select("div.thumbnail > p")[0]
if thumbnail.select("div.thumbnail div.thumbnail-image"): if thumbnail.select("div.thumbnail div.thumbnail-image"):
link = thumbnail.select("div.thumbnail div.thumbnail-image a")[0].get("href") link = thumbnail.select("div.thumbnail div.thumbnail-image a")[
img = proxy(thumbnail.select("div.thumbnail div.thumbnail-image a img")[0].get("src")) 0
thumbnail_title = thumbnail.select("div.thumbnail div.thumbnail-info h3.title a")[0].text ].get("href")
thumbnail_author = thumbnail.select("div.thumbnail div.thumbnail-info span.author a")[0].text img = proxy(
thumbnail_author_link = thumbnail.select("div.thumbnail div.thumbnail-info span.author a")[0].get("href") thumbnail.select("div.thumbnail div.thumbnail-image a img")[
thumbnail_channel = thumbnail.select("div.thumbnail div.thumbnail-info span.origin a")[0].text 0
thumbnail_channel_link = thumbnail.select("div.thumbnail div.thumbnail-info span.origin a")[0].get("href") ].get("src")
thumbnails.append([text, link, img, thumbnail_title, thumbnail_author, thumbnail_author_link, thumbnail_channel, thumbnail_channel_link]) )
thumbnail_title = thumbnail.select(
"div.thumbnail div.thumbnail-info h3.title a"
)[0].text
thumbnail_author = thumbnail.select(
"div.thumbnail div.thumbnail-info span.author a"
)[0].text
thumbnail_author_link = thumbnail.select(
"div.thumbnail div.thumbnail-info span.author a"
)[0].get("href")
thumbnail_channel = thumbnail.select(
"div.thumbnail div.thumbnail-info span.origin a"
)[0].text
thumbnail_channel_link = thumbnail.select(
"div.thumbnail div.thumbnail-info span.origin a"
)[0].get("href")
thumbnails.append(
[
text,
link,
img,
thumbnail_title,
thumbnail_author,
thumbnail_author_link,
thumbnail_channel,
thumbnail_channel_link,
]
)
return render_template(
return render_template("collection.html", data=[title, author, author_link, category, category_link, channel, channel_link, views, favorites, thumbnails]) "collection.html",
data=[
title,
author,
author_link,
category,
category_link,
channel,
channel_link,
views,
favorites,
thumbnails,
],
)
except Exception: except Exception:
print_exc() print_exc()
return Response(render_template("404.html"), status=404) return Response(render_template("404.html"), status=404)
@app.route('/<category>/<channel>/')
@app.route("/<category>/<channel>/")
def route_channel_redirect(category, channel): def route_channel_redirect(category, channel):
if category == "circuits" or category == "workshop" or category == "craft" or category == "cooking" or category == "living" or category == "outside" or category == "teachers": if (
category == "circuits"
or category == "workshop"
or category == "craft"
or category == "cooking"
or category == "living"
or category == "outside"
or category == "teachers"
):
return redirect(f"/{category}/{channel}/projects/", 307) return redirect(f"/{category}/{channel}/projects/", 307)
else: else:
return Response(render_template("404.html"), status=404) return Response(render_template("404.html"), status=404)
@app.route('/')
@app.route("/")
def route_explore(): def route_explore():
data = requests.get("https://www.instructables.com/") data = requests.get("https://www.instructables.com/")
if data.status_code != 200: if data.status_code != 200:
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code) return Response(
render_template(str(data.status_code) + ".html"), status=data.status_code
)
soup = BeautifulSoup(data.text, "html.parser") soup = BeautifulSoup(data.text, "html.parser")
@ -690,22 +979,33 @@ def route_explore():
title = explore.select("h2")[0].text title = explore.select("h2")[0].text
circuits = explore_lists(explore.select(".home-content-explore-category-circuits")[0]) circuits = explore_lists(
workshop = explore_lists(explore.select(".home-content-explore-category-workshop")[0]) explore.select(".home-content-explore-category-circuits")[0]
)
workshop = explore_lists(
explore.select(".home-content-explore-category-workshop")[0]
)
craft = explore_lists(explore.select(".home-content-explore-category-craft")[0]) craft = explore_lists(explore.select(".home-content-explore-category-craft")[0])
cooking = explore_lists(explore.select(".home-content-explore-category-cooking")[0]) cooking = explore_lists(explore.select(".home-content-explore-category-cooking")[0])
living = explore_lists(explore.select(".home-content-explore-category-living")[0]) living = explore_lists(explore.select(".home-content-explore-category-living")[0])
outside = explore_lists(explore.select(".home-content-explore-category-outside")[0]) outside = explore_lists(explore.select(".home-content-explore-category-outside")[0])
teachers = explore_lists(explore.select(".home-content-explore-category-teachers")[0]) teachers = explore_lists(
explore.select(".home-content-explore-category-teachers")[0]
)
return render_template("index.html", data=[title, circuits, workshop, craft, cooking, living, outside, teachers]) return render_template(
"index.html",
data=[title, circuits, workshop, craft, cooking, living, outside, teachers],
)
@app.route('/proxy/') @app.route("/proxy/")
def route_proxy(): def route_proxy():
url = request.args.get("url") url = request.args.get("url")
if url != None: if url != None:
if url.startswith("https://cdn.instructables.com/") or url.startswith("https://content.instructables.com/"): if url.startswith("https://cdn.instructables.com/") or url.startswith(
"https://content.instructables.com/"
):
data = requests.get(unquote(url)) data = requests.get(unquote(url))
return Response(data.content, content_type=data.headers["content-type"]) return Response(data.content, content_type=data.headers["content-type"])
else: else:
@ -717,5 +1017,6 @@ def route_proxy():
def not_found(e): def not_found(e):
return render_template("404.html") return render_template("404.html")
if __name__ == '__main__':
app.run(port=8002, debug=debugmode) if __name__ == "__main__":
app.run(port=args.port, host=args.listen_host, debug=debugmode)

View file

@ -1,4 +1,5 @@
bs4 bs4
requests requests
flask flask
selenium requests-html
playwright

View file

@ -1,7 +1,8 @@
<footer> <footer>
<hr> <hr>
<center> <center>
<p><a href="">Source code (AGPLv3)</a></p> <p><a href="https://git.kumi/PrivateCoffee/indestructables">Modified Source Code (AGPLv3)</a></p>
<p><a href="https://codeberg.org/snowcatridge10/indestructables">Original Source Code (AGPLv3)</a></p>
<p><a href="">View privacy policy.</a></p> <p><a href="">View privacy policy.</a></p>
</center> </center>
</footer> </footer>