Replaced selenium with playwright
Fixed requirements Added source links to footer
This commit is contained in:
parent
586b916ed5
commit
823a44f664
8 changed files with 483 additions and 2941 deletions
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
venv/
|
||||
*.pyc
|
||||
__pycache__/
|
||||
.vscode
|
16
README.md
16
README.md
|
@ -1,24 +1,36 @@
|
|||
<div align="center">
|
||||
<img src="static/img/logo.png">
|
||||
<h1>Indestructables</h1>
|
||||
An open source alternative front-end to Instructables
|
||||
An open source alternative front-end to Instructables. This is a fork of <a href="https://codeberg.org/snowcatridge10/indestructables">snowcatridge10's Indestructables</a> to use Playwright instead of Selenium.
|
||||
|
||||
<a href="https://matrix.to/#/#indestructables:fedora.im">snowcatridge10's Matrix Room</a>
|
||||
|
||||
<a href="https://matrix.to/#/#indestructables:fedora.im">Matrix</a>
|
||||
</div>
|
||||
|
||||
# Instances
|
||||
|
||||
None, yet!
|
||||
|
||||
# Run your own instance
|
||||
|
||||
## Dependencies
|
||||
|
||||
First, create a virtual environment with `python3 -m venv venv` and activate it with `source venv/bin/activate`. Then, install the dependencies with:
|
||||
|
||||
`pip3 install -r requirements.txt`.
|
||||
|
||||
For the production environment, you also need the uWSGI Python3 plugin. On Debian, it can be installed via `apt install uwsgi-plugin-python3`
|
||||
|
||||
Furthermore, you need to install the Chromium binary used by Playwright. You can do this by running `playwright install chromium`.
|
||||
|
||||
## Production
|
||||
|
||||
1. Clone the repository
|
||||
2. Run `uwsgi --plugin python3 --http-socket 0.0.0.0:8002 --wsgi-file main.py --callable app --processes 4 --threads 2`
|
||||
3. Point your reverse proxy to http://localhost:8002
|
||||
|
||||
## Development
|
||||
|
||||
1. Clone the repository
|
||||
2. Run `python3 main.py`
|
||||
3. Connect to http://localhost:8002
|
||||
|
|
BIN
chromedriver
BIN
chromedriver
Binary file not shown.
BIN
chromedriver.exe
BIN
chromedriver.exe
Binary file not shown.
2777
geckodriver.log
2777
geckodriver.log
File diff suppressed because it is too large
Load diff
617
main.py
617
main.py
|
@ -1,47 +1,64 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
from flask import Flask, render_template, request, redirect, Response, stream_with_context
|
||||
from flask import (
|
||||
Flask,
|
||||
render_template,
|
||||
request,
|
||||
redirect,
|
||||
Response,
|
||||
stream_with_context,
|
||||
)
|
||||
import requests
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import quote, unquote
|
||||
from traceback import print_exc
|
||||
from requests_html import HTMLSession
|
||||
from playwright.sync_api import sync_playwright
|
||||
from urllib.parse import urljoin
|
||||
from argparse import ArgumentParser
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.firefox.options import Options
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
import os
|
||||
|
||||
debugmode = False
|
||||
|
||||
try:
|
||||
if sys.argv[1] == "debug":
|
||||
if __name__ == "__main__":
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--port",
|
||||
default=8002,
|
||||
type=int,
|
||||
help="Port to listen on",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--debug",
|
||||
action="store_true",
|
||||
help="Enable debug mode",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-l",
|
||||
"--listen-host",
|
||||
default="127.0.0.1",
|
||||
help="Host to listen on",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.debug:
|
||||
debugmode = True
|
||||
except:
|
||||
pass
|
||||
|
||||
print("Loading...")
|
||||
|
||||
def proxy(src):
|
||||
return "/proxy/?url=" + quote(str(src))
|
||||
|
||||
instance_root_url = "http://127.0.0.1:8002"
|
||||
def get_instance_root_url(request):
|
||||
return request.url_root
|
||||
|
||||
chrome_options = Options()
|
||||
chrome_options.add_argument("--headless")
|
||||
|
||||
if os.name == 'nt':
|
||||
# Windows
|
||||
driver = webdriver.Chrome('./chromedriver.exe', options=chrome_options)
|
||||
else:
|
||||
# Linux
|
||||
driver = webdriver.Chrome('./chromedriver', options=chrome_options)
|
||||
playwright = sync_playwright().start()
|
||||
browser = playwright.chromium.launch(headless=True)
|
||||
page = browser.new_page()
|
||||
|
||||
channels = []
|
||||
|
||||
|
@ -59,49 +76,75 @@ global_ibles = {}
|
|||
|
||||
global_ibles["/projects"] = []
|
||||
|
||||
driver.get("https://www.instructables.com/projects")
|
||||
page.goto("https://www.instructables.com/projects")
|
||||
|
||||
while len(global_ibles["/projects"]) <= 0:
|
||||
for ible in driver.find_elements(By.CLASS_NAME, "ibleCard__QPJVm"):
|
||||
link = ible.find_elements(By.CSS_SELECTOR, "a")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url)
|
||||
img = proxy(ible.find_elements(By.CSS_SELECTOR, "img")[0].get_attribute("src"))
|
||||
for ible in page.query_selector_all(".ibleCard__QPJVm"):
|
||||
link = (
|
||||
ible.query_selector("a")
|
||||
.get_attribute("href")
|
||||
.replace("https://www.instructables.com", "{instance_root_url}")
|
||||
)
|
||||
img = proxy(ible.query_selector("img").get_attribute("src"))
|
||||
|
||||
title = ible.find_elements(By.CLASS_NAME, "title__t0fGQ")[0].text
|
||||
author = ible.find_elements(By.CSS_SELECTOR, "a[href^='/member/']")[0].text
|
||||
author_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/member/']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url)
|
||||
title = ible.query_selector(".title__t0fGQ").inner_text()
|
||||
author = ible.query_selector("a[href^='/member/']").inner_text()
|
||||
author_link = (
|
||||
ible.query_selector("a[href^='/member/']")
|
||||
.get_attribute("href")
|
||||
.replace("https://www.instructables.com", "{instance_root_url}")
|
||||
)
|
||||
|
||||
channel = "TEST"
|
||||
channel_link = "TEST"
|
||||
|
||||
for c in channels:
|
||||
try:
|
||||
channel = ible.find_elements(By.CSS_SELECTOR, "a[href^='/" + c + "']")[0].text
|
||||
channel_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/" + c + "']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url)
|
||||
channel = ible.query_selector("a[href^='/" + c + "']").inner_text()
|
||||
channel_link = (
|
||||
ible.query_selector("a[href^='/" + c + "']")
|
||||
.get_attribute("href")
|
||||
.replace("https://www.instructables.com", "{instance_root_url}")
|
||||
)
|
||||
except:
|
||||
try:
|
||||
channel = ible.find_elements(By.CSS_SELECTOR, "a[href^='/projects/']")[0].text
|
||||
channel_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/projects/']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url)
|
||||
channel = ible.query_selector("a[href^='/projects/']").inner_text()
|
||||
channel_link = (
|
||||
ible.query_selector("a[href^='/projects/']")
|
||||
.get_attribute("href")
|
||||
.replace("https://www.instructables.com", "{instance_root_url}")
|
||||
)
|
||||
except:
|
||||
pass
|
||||
|
||||
stats = ible.find_elements(By.CLASS_NAME, "stats__GFKyl")[0]
|
||||
stats = ible.query_selector(".stats__GFKyl")
|
||||
views = 0
|
||||
if stats.find_elements(By.CSS_SELECTOR, "div[title$=' views']") != []:
|
||||
views = stats.find_elements(By.CSS_SELECTOR, "div[title$=' views']")[0].text
|
||||
if stats.query_selector("div[title$=' views']"):
|
||||
views = stats.query_selector("div[title$=' views']").inner_text()
|
||||
favorites = 0
|
||||
if stats.find_elements(By.CSS_SELECTOR, "div[title$=' favorites']") != []:
|
||||
favorites = stats.find_elements(By.CSS_SELECTOR, "div[title$=' favorites']")[0].text
|
||||
if stats.query_selector("div[title$=' favorites']"):
|
||||
favorites = stats.query_selector("div[title$=' favorites']").inner_text()
|
||||
|
||||
global_ibles["/projects"].append([link, img, title, author, author_link, channel, channel_link, views, favorites])
|
||||
global_ibles["/projects"].append(
|
||||
[
|
||||
link,
|
||||
img,
|
||||
title,
|
||||
author,
|
||||
author_link,
|
||||
channel,
|
||||
channel_link,
|
||||
views,
|
||||
favorites,
|
||||
]
|
||||
)
|
||||
|
||||
firefox_capabilities = DesiredCapabilities.FIREFOX
|
||||
firefox_capabilities['marionette'] = True
|
||||
firefox_capabilities['binary'] = "C:/Program Files/Mozilla Firefox/firefox.exe"
|
||||
options = Options()
|
||||
options.add_argument("--headless")
|
||||
browser.close()
|
||||
playwright.stop()
|
||||
|
||||
print("Started!")
|
||||
|
||||
|
||||
def explore_lists(soup):
|
||||
list_ = []
|
||||
for ible in soup.select(".home-content-explore-ible"):
|
||||
|
@ -119,12 +162,30 @@ def explore_lists(soup):
|
|||
favorites = 0
|
||||
if ible.select("span.ible-favorites") != []:
|
||||
favorites = ible.select("span.ible-favorites")[0].text
|
||||
list_.append([link, img, alt, title, author, author_link, channel, channel_link, favorites, views])
|
||||
list_.append(
|
||||
[
|
||||
link,
|
||||
img,
|
||||
alt,
|
||||
title,
|
||||
author,
|
||||
author_link,
|
||||
channel,
|
||||
channel_link,
|
||||
favorites,
|
||||
views,
|
||||
]
|
||||
)
|
||||
return list_
|
||||
|
||||
|
||||
def member_header(header):
|
||||
avatar = proxy(header.select("div.profile-avatar-container img.profile-avatar")[0].get("src"))
|
||||
title = header.select("div.profile-top div.profile-headline h1.profile-title")[0].text
|
||||
avatar = proxy(
|
||||
header.select("div.profile-avatar-container img.profile-avatar")[0].get("src")
|
||||
)
|
||||
title = header.select("div.profile-top div.profile-headline h1.profile-title")[
|
||||
0
|
||||
].text
|
||||
|
||||
profile_top = header.select("div.profile-top")[0]
|
||||
|
||||
|
@ -133,7 +194,6 @@ def member_header(header):
|
|||
# stats_text = profile_top.select("div.profile-header-stats")[0]
|
||||
# stats_num = header.select("div.profile-top div.profile-header-stats")[1]
|
||||
|
||||
|
||||
location = header.select("span.member-location")
|
||||
if location != []:
|
||||
location = location[0].text
|
||||
|
@ -146,7 +206,6 @@ def member_header(header):
|
|||
else:
|
||||
signup = 0
|
||||
|
||||
|
||||
instructables = header.select("span.ible-count")
|
||||
if instructables != []:
|
||||
instructables = instructables[0].text
|
||||
|
@ -177,25 +236,42 @@ def member_header(header):
|
|||
else:
|
||||
bio = ""
|
||||
|
||||
return [avatar, title, location, signup, instructables, views, comments, followers, bio]
|
||||
return [
|
||||
avatar,
|
||||
title,
|
||||
location,
|
||||
signup,
|
||||
instructables,
|
||||
views,
|
||||
comments,
|
||||
followers,
|
||||
bio,
|
||||
]
|
||||
|
||||
|
||||
def category_page(path, name, teachers=False):
|
||||
data = requests.get("https://www.instructables.com" + path)
|
||||
if data.status_code != 200:
|
||||
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code)
|
||||
return Response(
|
||||
render_template(str(data.status_code) + ".html"), status=data.status_code
|
||||
)
|
||||
|
||||
soup = BeautifulSoup(data.text, "html.parser")
|
||||
|
||||
channels = []
|
||||
for card in soup.select("div.scrollable-cards-inner div.scrollable-card"):
|
||||
link = card.a["href"]
|
||||
img = proxy(card.select(f"a{' noscript' if teachers else ''} img")[0].get("src"))
|
||||
img = proxy(
|
||||
card.select(f"a{' noscript' if teachers else ''} img")[0].get("src")
|
||||
)
|
||||
title = card.select("a img")[0].get("alt")
|
||||
|
||||
channels.append([link, title, img])
|
||||
|
||||
ibles = []
|
||||
for ible in soup.select("div.category-landing-projects-list div.category-landing-projects-ible"):
|
||||
for ible in soup.select(
|
||||
"div.category-landing-projects-list div.category-landing-projects-ible"
|
||||
):
|
||||
link = ible.a["href"]
|
||||
img = proxy(ible.select("a noscript img")[0].get("src"))
|
||||
|
||||
|
@ -214,75 +290,134 @@ def category_page(path, name, teachers=False):
|
|||
if stats.select("span.ible-favorites") != []:
|
||||
favorites = stats.select("span.ible-favorites")[0].text
|
||||
|
||||
ibles.append([link, img, title, author, author_link, channel, channel_link, views, favorites])
|
||||
ibles.append(
|
||||
[
|
||||
link,
|
||||
img,
|
||||
title,
|
||||
author,
|
||||
author_link,
|
||||
channel,
|
||||
channel_link,
|
||||
views,
|
||||
favorites,
|
||||
]
|
||||
)
|
||||
|
||||
contests = []
|
||||
for contest in soup.select("div.category-landing-contests-list div.category-landing-contests-item"):
|
||||
for contest in soup.select(
|
||||
"div.category-landing-contests-list div.category-landing-contests-item"
|
||||
):
|
||||
link = contest.a["href"]
|
||||
img = proxy(contest.select("a noscript img")[0].get("src"))
|
||||
title = contest.select("a img")[0].get("alt")
|
||||
|
||||
contests.append([link, img, title])
|
||||
|
||||
return render_template("category.html", data=[name, channels, ibles, contests, path])
|
||||
return render_template(
|
||||
"category.html", data=[name, channels, ibles, contests, path]
|
||||
)
|
||||
|
||||
def project_list(path, head, sort=''):
|
||||
#driver = webdriver.Firefox(options=options, capabilities=firefox_capabilities)
|
||||
|
||||
driver.get("https://www.instructables.com" + path)
|
||||
def project_list(path, head, sort=""):
|
||||
playwright = sync_playwright().start()
|
||||
browser = playwright.chromium.launch(headless=True)
|
||||
page = browser.new_page()
|
||||
|
||||
page.goto(urljoin("https://www.instructables.com", path))
|
||||
|
||||
head = f"{head + ' ' if head != '' else ''}Projects" + sort
|
||||
path_ = path.rsplit('/', 1)[0]
|
||||
path_ = path.rsplit("/", 1)[0]
|
||||
|
||||
if path == "/projects/" or path == "/projects":
|
||||
ibles = global_ibles["/projects"]
|
||||
else:
|
||||
ibles = []
|
||||
|
||||
for ible in driver.find_elements(By.CLASS_NAME, "ibleCard__QPJVm"):
|
||||
link = ible.find_elements(By.CSS_SELECTOR, "a")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url)
|
||||
img = proxy(ible.find_elements(By.CSS_SELECTOR, "img")[0].get_attribute("src"))
|
||||
for ible in page.query_selector_all(".ibleCard__QPJVm"):
|
||||
link = (
|
||||
ible.query_selector("a")
|
||||
.get_attribute("href")
|
||||
.replace("https://www.instructables.com", "{instance_root_url}")
|
||||
)
|
||||
img = proxy(
|
||||
ible.find_elements(By.CSS_SELECTOR, "img")[0].get_attribute("src")
|
||||
)
|
||||
|
||||
title = ible.find_elements(By.CLASS_NAME, "title__t0fGQ")[0].text
|
||||
author = ible.find_elements(By.CSS_SELECTOR, "a[href^='/member/']")[0].text
|
||||
author_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/member/']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url)
|
||||
author_link = (
|
||||
ible.find_elements(By.CSS_SELECTOR, "a[href^='/member/']")[0]
|
||||
.get_attribute("href")
|
||||
.replace("https://www.instructables.com", "{instance_root_url}")
|
||||
)
|
||||
|
||||
channel = "TEST"
|
||||
channel_link = "TEST"
|
||||
|
||||
for c in channels:
|
||||
try:
|
||||
channel = ible.find_elements(By.CSS_SELECTOR, "a[href^='/" + c + "']")[0].text
|
||||
channel_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/" + c + "']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url)
|
||||
channel = ible.query_selector("a[href^='/" + c + "']").inner_text()
|
||||
channel_link = (
|
||||
ible.query_selector("a[href^='/" + c + "']")
|
||||
.get_attribute("href")
|
||||
.replace("https://www.instructables.com", "{instance_root_url}")
|
||||
)
|
||||
except:
|
||||
try:
|
||||
channel = ible.find_elements(By.CSS_SELECTOR, "a[href^='/projects/']")[0].text
|
||||
channel_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/projects/']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url)
|
||||
channel = ible.query_selector("a[href^='/projects/'] span").inner_text()
|
||||
channel_link = (
|
||||
ible.query_selector("a[href^='/projects/']")
|
||||
.get_attribute("href")
|
||||
.replace("https://www.instructables.com", "{instance_root_url}")
|
||||
)
|
||||
except:
|
||||
pass
|
||||
|
||||
stats = ible.find_elements(By.CLASS_NAME, "stats__GFKyl")[0]
|
||||
stats = ible.query_selector(".stats__GFKyl")
|
||||
views = 0
|
||||
if stats.find_elements(By.CSS_SELECTOR, "div[title$=' views']") != []:
|
||||
views = stats.find_elements(By.CSS_SELECTOR, "div[title$=' views']")[0].text
|
||||
favorites = 0
|
||||
if stats.find_elements(By.CSS_SELECTOR, "div[title$=' favorites']") != []:
|
||||
favorites = stats.find_elements(By.CSS_SELECTOR, "div[title$=' favorites']")[0].text
|
||||
|
||||
ibles.append([link, img, title, author, author_link, channel, channel_link, views, favorites])
|
||||
if stats.query_selector("div[title$=' views']"):
|
||||
views = stats.query_selector("div[title$=' views']").inner_text()
|
||||
|
||||
favorites = 0
|
||||
|
||||
if stats.query_selector("div[title$=' favorites']"):
|
||||
favorites = stats.query_selector("div[title$=' favorites']").inner_text()
|
||||
|
||||
ibles.append(
|
||||
[
|
||||
link,
|
||||
img,
|
||||
title,
|
||||
author,
|
||||
author_link,
|
||||
channel,
|
||||
channel_link,
|
||||
views,
|
||||
favorites,
|
||||
]
|
||||
)
|
||||
|
||||
if len(ibles) >= 8:
|
||||
break
|
||||
|
||||
browser.close()
|
||||
playwright.stop()
|
||||
|
||||
return render_template("projects.html", data=[head, ibles, path_])
|
||||
|
||||
|
||||
app = Flask(__name__, template_folder="templates", static_folder="static")
|
||||
|
||||
@app.route('/sitemap/')
|
||||
|
||||
@app.route("/sitemap/")
|
||||
def route_sitemap():
|
||||
data = requests.get(f"https://www.instructables.com/sitemap/")
|
||||
if data.status_code != 200:
|
||||
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code)
|
||||
return Response(
|
||||
render_template(str(data.status_code) + ".html"), status=data.status_code
|
||||
)
|
||||
|
||||
soup = BeautifulSoup(data.text, "html.parser")
|
||||
|
||||
|
@ -301,14 +436,17 @@ def route_sitemap():
|
|||
|
||||
return render_template("sitemap.html", data=groups)
|
||||
|
||||
@app.route('/contest/archive/')
|
||||
|
||||
@app.route("/contest/archive/")
|
||||
def route_contest_archive():
|
||||
page = 1
|
||||
if request.args.get("page") != None:
|
||||
page = request.args.get("page")
|
||||
data = requests.get(f"https://www.instructables.com/contest/archive/?page={page}")
|
||||
if data.status_code != 200:
|
||||
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code)
|
||||
return Response(
|
||||
render_template(str(data.status_code) + ".html"), status=data.status_code
|
||||
)
|
||||
|
||||
soup = BeautifulSoup(data.text, "html.parser")
|
||||
|
||||
|
@ -318,7 +456,9 @@ def route_contest_archive():
|
|||
|
||||
contest_list = []
|
||||
for index, year in enumerate(main.select("div.contest-archive-list h2")):
|
||||
year_list = main.select("div.contest-archive-list div.contest-archive-list-year")[index]
|
||||
year_list = main.select(
|
||||
"div.contest-archive-list div.contest-archive-list-year"
|
||||
)[index]
|
||||
year_name = year.text
|
||||
month_list = []
|
||||
for month in year_list.select("div.contest-archive-list-month"):
|
||||
|
@ -334,31 +474,35 @@ def route_contest_archive():
|
|||
|
||||
pagination = main.select("nav.pagination ul.pagination")[0]
|
||||
|
||||
return render_template("archives.html", data=[page, contest_count, pagination, contest_list])
|
||||
return render_template(
|
||||
"archives.html", data=[page, contest_count, pagination, contest_list]
|
||||
)
|
||||
|
||||
|
||||
@app.route('/contest/<contest>/')
|
||||
@app.route("/contest/<contest>/")
|
||||
def route_contest(contest):
|
||||
data = requests.get(f"https://www.instructables.com/contest/{contest}/")
|
||||
if data.status_code != 200:
|
||||
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code)
|
||||
return Response(
|
||||
render_template(str(data.status_code) + ".html"), status=data.status_code
|
||||
)
|
||||
|
||||
soup = BeautifulSoup(data.text, "html.parser")
|
||||
|
||||
title = soup.select("meta[property=\"og:title\"]")[0].get("content")
|
||||
title = soup.select('meta[property="og:title"]')[0].get("content")
|
||||
|
||||
body = soup.select("div#contest-wrapper")[0]
|
||||
|
||||
img = proxy(body.select("div#contest-masthead img")[0].get("src"))
|
||||
|
||||
entry_count = body.select("li.entries-nav-btn")[0].text.split(' ')[0]
|
||||
prizes = body.select("li.prizes-nav-btn")[0].text.split(' ')[0]
|
||||
entry_count = body.select("li.entries-nav-btn")[0].text.split(" ")[0]
|
||||
prizes = body.select("li.prizes-nav-btn")[0].text.split(" ")[0]
|
||||
|
||||
info = body.select("div.contest-body-column-left")[0]
|
||||
info.select("div#site-announcements-page")[0].decompose()
|
||||
info.select("h3")[0].decompose()
|
||||
info.select("div#contest-body-nav")[0].decompose()
|
||||
info = str(info).replace("https://www.instructables.com", '')
|
||||
info = str(info).replace("https://www.instructables.com", "{instance_root_url}")
|
||||
|
||||
entries = body.select("span.contest-entity-count")[0].text
|
||||
|
||||
|
@ -373,16 +517,31 @@ def route_contest(contest):
|
|||
channel_link = entry.select("div span.ible-channel a")[0].get("href")
|
||||
views = entry.select(".ible-views")[0].text
|
||||
|
||||
entry_list.append([link, entry_img, entry_title, author, author_link, channel, channel_link, views])
|
||||
entry_list.append(
|
||||
[
|
||||
link,
|
||||
entry_img,
|
||||
entry_title,
|
||||
author,
|
||||
author_link,
|
||||
channel,
|
||||
channel_link,
|
||||
views,
|
||||
]
|
||||
)
|
||||
|
||||
return render_template("contest.html", data=[title, img, entry_count, prizes, info, entry_list])
|
||||
return render_template(
|
||||
"contest.html", data=[title, img, entry_count, prizes, info, entry_list]
|
||||
)
|
||||
|
||||
|
||||
@app.route('/contest/')
|
||||
@app.route("/contest/")
|
||||
def route_contests():
|
||||
data = requests.get("https://www.instructables.com/contest/")
|
||||
if data.status_code != 200:
|
||||
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code)
|
||||
return Response(
|
||||
render_template(str(data.status_code) + ".html"), status=data.status_code
|
||||
)
|
||||
|
||||
soup = BeautifulSoup(data.text, "html.parser")
|
||||
|
||||
|
@ -391,8 +550,8 @@ def route_contests():
|
|||
contests = []
|
||||
for contest in soup.select("div#cur-contests div.row-fluid div.contest-banner"):
|
||||
link = contest.select("div.contest-banner-inner a")[0].get("href")
|
||||
img = proxy(contest.select("div.contest-banner-inner a img")[0].get("src"))
|
||||
alt = contest.select("div.contest-banner-inner a img")[0].get("alt")
|
||||
img = proxy(contest.select("div.contest-banner-inner a img")[0].get("src"))
|
||||
alt = contest.select("div.contest-banner-inner a img")[0].get("alt")
|
||||
deadline = contest.select("span.contest-meta-deadline")[0].get("data-deadline")
|
||||
prizes = contest.select("span.contest-meta-count")[0].text
|
||||
entries = contest.select("span.contest-meta-count")[1].text
|
||||
|
@ -412,76 +571,104 @@ def route_contests():
|
|||
item_author = featured_item.select("a.author")[0].text
|
||||
item_author_link = featured_item.select("a.author")[0].get("href")
|
||||
|
||||
featured_items.append([item_link, item_img, item_title, item_author, item_author_link])
|
||||
featured_items.append(
|
||||
[item_link, item_img, item_title, item_author, item_author_link]
|
||||
)
|
||||
closed.append([link, img, alt, featured_items])
|
||||
|
||||
return render_template("contests.html", data=[contest_count, contests, closed])
|
||||
|
||||
@app.route('/<category>/<channel>/projects/')
|
||||
|
||||
@app.route("/<category>/<channel>/projects/")
|
||||
def route_channel_projects(category, channel):
|
||||
return project_list(f"/{category}/{channel}/projects/", channel.title())
|
||||
|
||||
@app.route('/<category>/<channel>/projects/<sort>/')
|
||||
def route_channel_projects_sort(category, channel, sort):
|
||||
return project_list(f"/{category}/{channel}/projects/{sort}", channel.title(), " Sorted by " + sort.title())
|
||||
|
||||
@app.route('/<category>/projects/')
|
||||
@app.route("/<category>/<channel>/projects/<sort>/")
|
||||
def route_channel_projects_sort(category, channel, sort):
|
||||
return project_list(
|
||||
f"/{category}/{channel}/projects/{sort}",
|
||||
channel.title(),
|
||||
" Sorted by " + sort.title(),
|
||||
)
|
||||
|
||||
|
||||
@app.route("/<category>/projects/")
|
||||
def route_category_projects(category):
|
||||
return project_list(f"/{category}/projects/", category.title())
|
||||
|
||||
@app.route('/<category>/projects/<sort>/')
|
||||
|
||||
@app.route("/<category>/projects/<sort>/")
|
||||
def route_category_projects_sort(category, sort):
|
||||
return project_list(f"/{category}/projects/{sort}", category.title(), " Sorted by " + sort.title())
|
||||
return project_list(
|
||||
f"/{category}/projects/{sort}", category.title(), " Sorted by " + sort.title()
|
||||
)
|
||||
|
||||
@app.route('/projects/')
|
||||
|
||||
@app.route("/projects/")
|
||||
def route_projects():
|
||||
return project_list("/projects/", '')
|
||||
return project_list("/projects/", "")
|
||||
|
||||
@app.route('/search')
|
||||
|
||||
@app.route("/search")
|
||||
def route_search():
|
||||
return project_list("/search/?q=" + request.args['q'] + "&projects=all", 'Search')
|
||||
return project_list("/search/?q=" + request.args["q"] + "&projects=all", "Search")
|
||||
|
||||
@app.route('/projects/<sort>/')
|
||||
|
||||
@app.route("/projects/<sort>/")
|
||||
def route_projects_sort(sort):
|
||||
return project_list(f"/projects/{sort}", '', " Sorted by " + sort.title())
|
||||
return project_list(f"/projects/{sort}", "", " Sorted by " + sort.title())
|
||||
|
||||
@app.route('/circuits/')
|
||||
|
||||
@app.route("/circuits/")
|
||||
def route_circuits():
|
||||
return category_page("/circuits/", "Circuits")
|
||||
|
||||
@app.route('/workshop/')
|
||||
|
||||
@app.route("/workshop/")
|
||||
def route_workshop():
|
||||
return category_page("/workshop/", "Workshop")
|
||||
|
||||
@app.route('/craft/')
|
||||
|
||||
@app.route("/craft/")
|
||||
def route_craft():
|
||||
return category_page("/craft/", "Craft")
|
||||
|
||||
@app.route('/cooking/')
|
||||
|
||||
@app.route("/cooking/")
|
||||
def route_cooking():
|
||||
return category_page("/cooking/", "Cooking")
|
||||
|
||||
@app.route('/living/')
|
||||
|
||||
@app.route("/living/")
|
||||
def route_living():
|
||||
return category_page("/living/", "Living")
|
||||
|
||||
@app.route('/outside/')
|
||||
|
||||
@app.route("/outside/")
|
||||
def route_outside():
|
||||
return category_page("/outside/", "Outside")
|
||||
|
||||
@app.route('/teachers/')
|
||||
|
||||
@app.route("/teachers/")
|
||||
def route_teachers():
|
||||
return category_page("/teachers/", "Teachers", True)
|
||||
|
||||
@app.route('/sitemap/projects/<category>/<subcategory>')
|
||||
def route_sitemap_circuits(category, subcategory):
|
||||
return category_page("/" + category + "/" + subcategory, subcategory + " - " + category)
|
||||
|
||||
@app.route('/member/<member>/instructables/')
|
||||
@app.route("/sitemap/projects/<category>/<subcategory>")
|
||||
def route_sitemap_circuits(category, subcategory):
|
||||
return category_page(
|
||||
"/" + category + "/" + subcategory, subcategory + " - " + category
|
||||
)
|
||||
|
||||
|
||||
@app.route("/member/<member>/instructables/")
|
||||
def route_member_instructables(member):
|
||||
data = requests.get(f"https://www.instructables.com/member/{member}/instructables")
|
||||
if data.status_code != 200:
|
||||
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code)
|
||||
return Response(
|
||||
render_template(str(data.status_code) + ".html"), status=data.status_code
|
||||
)
|
||||
|
||||
soup = BeautifulSoup(data.text, "html.parser")
|
||||
|
||||
|
@ -505,18 +692,24 @@ def route_member_instructables(member):
|
|||
|
||||
ible_list.append([link, img, title, views, favorites])
|
||||
|
||||
return render_template("member-instructables.html", data=header_content + [ible_list])
|
||||
return render_template(
|
||||
"member-instructables.html", data=header_content + [ible_list]
|
||||
)
|
||||
|
||||
|
||||
@app.route('/member/<member>/')
|
||||
@app.route("/member/<member>/")
|
||||
def route_member(member):
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0'
|
||||
"User-Agent": "Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0"
|
||||
}
|
||||
|
||||
data = requests.get(f"https://www.instructables.com/member/{member}/", headers=headers)
|
||||
data = requests.get(
|
||||
f"https://www.instructables.com/member/{member}/", headers=headers
|
||||
)
|
||||
if data.status_code != 200:
|
||||
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code)
|
||||
return Response(
|
||||
render_template(str(data.status_code) + ".html"), status=data.status_code
|
||||
)
|
||||
|
||||
soup = BeautifulSoup(data.text, "html.parser")
|
||||
|
||||
|
@ -539,8 +732,9 @@ def route_member(member):
|
|||
|
||||
ibles.append([ible_title, ible_link, ible_img])
|
||||
|
||||
|
||||
ach_list = body.select("div.two-col-section div.right-col-section.centered-sidebar div.boxed-content.about-me")
|
||||
ach_list = body.select(
|
||||
"div.two-col-section div.right-col-section.centered-sidebar div.boxed-content.about-me"
|
||||
)
|
||||
|
||||
ach_list_title = ""
|
||||
achs = []
|
||||
|
@ -548,19 +742,30 @@ def route_member(member):
|
|||
if len(ach_list) > 1:
|
||||
ach_list = ach_list[1]
|
||||
ach_list_title = ach_list.select("h2.module-title")[0].text
|
||||
for ach in ach_list.select("div.achievements-section.main-achievements.contest-achievements div.achievement-item:not(.two-column-filler)"):
|
||||
ach_title = ach.select("div.achievement-info span.achievement-title")[0].text
|
||||
ach_desc = ach.select("div.achievement-info span.achievement-description")[0].text
|
||||
for ach in ach_list.select(
|
||||
"div.achievements-section.main-achievements.contest-achievements div.achievement-item:not(.two-column-filler)"
|
||||
):
|
||||
ach_title = ach.select("div.achievement-info span.achievement-title")[
|
||||
0
|
||||
].text
|
||||
ach_desc = ach.select("div.achievement-info span.achievement-description")[
|
||||
0
|
||||
].text
|
||||
achs.append([ach_title, ach_desc])
|
||||
|
||||
return render_template("member.html", data=header_content + [ible_list_title, ibles, ach_list_title, achs])
|
||||
return render_template(
|
||||
"member.html",
|
||||
data=header_content + [ible_list_title, ibles, ach_list_title, achs],
|
||||
)
|
||||
|
||||
|
||||
@app.route('/<article>/')
|
||||
@app.route("/<article>/")
|
||||
def route_article(article):
|
||||
data = requests.get(f"https://www.instructables.com/{article}/")
|
||||
if data.status_code != 200:
|
||||
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code)
|
||||
return Response(
|
||||
render_template(str(data.status_code) + ".html"), status=data.status_code
|
||||
)
|
||||
|
||||
soup = BeautifulSoup(data.text, "html.parser")
|
||||
|
||||
|
@ -603,7 +808,12 @@ def route_article(article):
|
|||
step_videos.append([proxy(img.get("src"))])
|
||||
|
||||
step_text = str(step.select("div.step-body")[0])
|
||||
step_text = step_text.replace("https://content.instructables.com", "/proxy/?url=https://content.instructables.com")
|
||||
step_text = step_text.replace(
|
||||
"https://content.instructables.com",
|
||||
"{instance_root_url}/proxy/?url=https://content.instructables.com".format(
|
||||
instance_root_url=get_instance_root_url(request)
|
||||
),
|
||||
)
|
||||
steps.append([step_title, step_imgs, step_text, step_videos])
|
||||
|
||||
comments_list = []
|
||||
|
@ -645,44 +855,123 @@ def route_article(article):
|
|||
# replies_used += 1
|
||||
|
||||
# comments_list.append([comment_votes, comment_author_img_src, comment_author_img_alt, comment_author, comment_author_link, comment_date, comment_text, comment_reply_count, reply_list])
|
||||
return render_template("article.html", data=[title, author, author_link, category, category_link, channel, channel_link, views, favorites, steps, comment_count, comments_list], enumerate=enumerate)
|
||||
return render_template(
|
||||
"article.html",
|
||||
data=[
|
||||
title,
|
||||
author,
|
||||
author_link,
|
||||
category,
|
||||
category_link,
|
||||
channel,
|
||||
channel_link,
|
||||
views,
|
||||
favorites,
|
||||
steps,
|
||||
comment_count,
|
||||
comments_list,
|
||||
],
|
||||
enumerate=enumerate,
|
||||
)
|
||||
else:
|
||||
## Collections
|
||||
thumbnails = []
|
||||
for thumbnail in soup.select("ul#thumbnails-list li"):
|
||||
text = link = img = thumbnail_title = thumbnail_author = thumbnail_author_link = thumbnail_channel = thumbnail_channel_link = ''
|
||||
text = (
|
||||
link
|
||||
) = (
|
||||
img
|
||||
) = (
|
||||
thumbnail_title
|
||||
) = (
|
||||
thumbnail_author
|
||||
) = (
|
||||
thumbnail_author_link
|
||||
) = thumbnail_channel = thumbnail_channel_link = ""
|
||||
|
||||
if thumbnail.select("div.thumbnail > p") != []:
|
||||
text = thumbnail.select("div.thumbnail > p")[0]
|
||||
if thumbnail.select("div.thumbnail div.thumbnail-image"):
|
||||
link = thumbnail.select("div.thumbnail div.thumbnail-image a")[0].get("href")
|
||||
img = proxy(thumbnail.select("div.thumbnail div.thumbnail-image a img")[0].get("src"))
|
||||
thumbnail_title = thumbnail.select("div.thumbnail div.thumbnail-info h3.title a")[0].text
|
||||
thumbnail_author = thumbnail.select("div.thumbnail div.thumbnail-info span.author a")[0].text
|
||||
thumbnail_author_link = thumbnail.select("div.thumbnail div.thumbnail-info span.author a")[0].get("href")
|
||||
thumbnail_channel = thumbnail.select("div.thumbnail div.thumbnail-info span.origin a")[0].text
|
||||
thumbnail_channel_link = thumbnail.select("div.thumbnail div.thumbnail-info span.origin a")[0].get("href")
|
||||
thumbnails.append([text, link, img, thumbnail_title, thumbnail_author, thumbnail_author_link, thumbnail_channel, thumbnail_channel_link])
|
||||
link = thumbnail.select("div.thumbnail div.thumbnail-image a")[
|
||||
0
|
||||
].get("href")
|
||||
img = proxy(
|
||||
thumbnail.select("div.thumbnail div.thumbnail-image a img")[
|
||||
0
|
||||
].get("src")
|
||||
)
|
||||
thumbnail_title = thumbnail.select(
|
||||
"div.thumbnail div.thumbnail-info h3.title a"
|
||||
)[0].text
|
||||
thumbnail_author = thumbnail.select(
|
||||
"div.thumbnail div.thumbnail-info span.author a"
|
||||
)[0].text
|
||||
thumbnail_author_link = thumbnail.select(
|
||||
"div.thumbnail div.thumbnail-info span.author a"
|
||||
)[0].get("href")
|
||||
thumbnail_channel = thumbnail.select(
|
||||
"div.thumbnail div.thumbnail-info span.origin a"
|
||||
)[0].text
|
||||
thumbnail_channel_link = thumbnail.select(
|
||||
"div.thumbnail div.thumbnail-info span.origin a"
|
||||
)[0].get("href")
|
||||
thumbnails.append(
|
||||
[
|
||||
text,
|
||||
link,
|
||||
img,
|
||||
thumbnail_title,
|
||||
thumbnail_author,
|
||||
thumbnail_author_link,
|
||||
thumbnail_channel,
|
||||
thumbnail_channel_link,
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
return render_template("collection.html", data=[title, author, author_link, category, category_link, channel, channel_link, views, favorites, thumbnails])
|
||||
return render_template(
|
||||
"collection.html",
|
||||
data=[
|
||||
title,
|
||||
author,
|
||||
author_link,
|
||||
category,
|
||||
category_link,
|
||||
channel,
|
||||
channel_link,
|
||||
views,
|
||||
favorites,
|
||||
thumbnails,
|
||||
],
|
||||
)
|
||||
|
||||
except Exception:
|
||||
print_exc()
|
||||
return Response(render_template("404.html"), status=404)
|
||||
|
||||
@app.route('/<category>/<channel>/')
|
||||
|
||||
@app.route("/<category>/<channel>/")
|
||||
def route_channel_redirect(category, channel):
|
||||
if category == "circuits" or category == "workshop" or category == "craft" or category == "cooking" or category == "living" or category == "outside" or category == "teachers":
|
||||
if (
|
||||
category == "circuits"
|
||||
or category == "workshop"
|
||||
or category == "craft"
|
||||
or category == "cooking"
|
||||
or category == "living"
|
||||
or category == "outside"
|
||||
or category == "teachers"
|
||||
):
|
||||
return redirect(f"/{category}/{channel}/projects/", 307)
|
||||
else:
|
||||
return Response(render_template("404.html"), status=404)
|
||||
|
||||
@app.route('/')
|
||||
|
||||
@app.route("/")
|
||||
def route_explore():
|
||||
data = requests.get("https://www.instructables.com/")
|
||||
if data.status_code != 200:
|
||||
return Response(render_template(str(data.status_code) + ".html"), status=data.status_code)
|
||||
return Response(
|
||||
render_template(str(data.status_code) + ".html"), status=data.status_code
|
||||
)
|
||||
|
||||
soup = BeautifulSoup(data.text, "html.parser")
|
||||
|
||||
|
@ -690,22 +979,33 @@ def route_explore():
|
|||
|
||||
title = explore.select("h2")[0].text
|
||||
|
||||
circuits = explore_lists(explore.select(".home-content-explore-category-circuits")[0])
|
||||
workshop = explore_lists(explore.select(".home-content-explore-category-workshop")[0])
|
||||
circuits = explore_lists(
|
||||
explore.select(".home-content-explore-category-circuits")[0]
|
||||
)
|
||||
workshop = explore_lists(
|
||||
explore.select(".home-content-explore-category-workshop")[0]
|
||||
)
|
||||
craft = explore_lists(explore.select(".home-content-explore-category-craft")[0])
|
||||
cooking = explore_lists(explore.select(".home-content-explore-category-cooking")[0])
|
||||
living = explore_lists(explore.select(".home-content-explore-category-living")[0])
|
||||
outside = explore_lists(explore.select(".home-content-explore-category-outside")[0])
|
||||
teachers = explore_lists(explore.select(".home-content-explore-category-teachers")[0])
|
||||
teachers = explore_lists(
|
||||
explore.select(".home-content-explore-category-teachers")[0]
|
||||
)
|
||||
|
||||
return render_template("index.html", data=[title, circuits, workshop, craft, cooking, living, outside, teachers])
|
||||
return render_template(
|
||||
"index.html",
|
||||
data=[title, circuits, workshop, craft, cooking, living, outside, teachers],
|
||||
)
|
||||
|
||||
|
||||
@app.route('/proxy/')
|
||||
@app.route("/proxy/")
|
||||
def route_proxy():
|
||||
url = request.args.get("url")
|
||||
if url != None:
|
||||
if url.startswith("https://cdn.instructables.com/") or url.startswith("https://content.instructables.com/"):
|
||||
if url.startswith("https://cdn.instructables.com/") or url.startswith(
|
||||
"https://content.instructables.com/"
|
||||
):
|
||||
data = requests.get(unquote(url))
|
||||
return Response(data.content, content_type=data.headers["content-type"])
|
||||
else:
|
||||
|
@ -717,5 +1017,6 @@ def route_proxy():
|
|||
def not_found(e):
|
||||
return render_template("404.html")
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(port=8002, debug=debugmode)
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(port=args.port, host=args.listen_host, debug=debugmode)
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
bs4
|
||||
requests
|
||||
flask
|
||||
selenium
|
||||
requests-html
|
||||
playwright
|
|
@ -1,7 +1,8 @@
|
|||
<footer>
|
||||
<hr>
|
||||
<center>
|
||||
<p><a href="">Source code (AGPLv3)</a></p>
|
||||
<p><a href="https://git.kumi/PrivateCoffee/indestructables">Modified Source Code (AGPLv3)</a></p>
|
||||
<p><a href="https://codeberg.org/snowcatridge10/indestructables">Original Source Code (AGPLv3)</a></p>
|
||||
<p><a href="">View privacy policy.</a></p>
|
||||
</center>
|
||||
</footer>
|
Loading…
Reference in a new issue