Add cron endpoint to update global_ibles

This commit is contained in:
Kumi 2023-06-03 09:46:28 +02:00
parent fbb0fea2d5
commit 75ec8b961f
Signed by: kumi
GPG key ID: ECBCC9082395383F

109
main.py
View file

@ -20,65 +20,33 @@ from argparse import ArgumentParser
import os
debugmode = False
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument(
"-p",
"--port",
default=8002,
type=int,
help="Port to listen on",
)
parser.add_argument(
"-d",
"--debug",
action="store_true",
help="Enable debug mode",
)
parser.add_argument(
"-l",
"--listen-host",
default="127.0.0.1",
help="Host to listen on",
)
args = parser.parse_args()
if args.debug:
debugmode = True
print("Loading...")
global_ibles = {}
def proxy(src):
return "/proxy/?url=" + quote(str(src))
def get_instance_root_url(request):
return request.url_root
def update_data():
playwright = sync_playwright().start()
browser = playwright.chromium.launch(headless=True)
page = browser.new_page()
playwright = sync_playwright().start()
browser = playwright.chromium.launch(headless=True)
page = browser.new_page()
channels = []
channels = []
data = requests.get(f"https://www.instructables.com/sitemap/")
data = requests.get(f"https://www.instructables.com/sitemap/")
soup = BeautifulSoup(data.text, "html.parser")
soup = BeautifulSoup(data.text, "html.parser")
main = soup.select("div.sitemap-content")[0]
main = soup.select("div.sitemap-content")[0]
groups = []
for group in main.select("div.group-section"):
groups = []
for group in main.select("div.group-section"):
channels.append(group.select("h2 a")[0].text.lower())
global_ibles = {}
global_ibles["/projects"] = []
global_ibles["/projects"] = []
page.goto("https://www.instructables.com/projects")
page.goto("https://www.instructables.com/projects")
while len(global_ibles["/projects"]) <= 0:
while len(global_ibles["/projects"]) <= 0:
for ible in page.query_selector_all(".ibleCard__QPJVm"):
link = (
ible.query_selector("a")
@ -139,11 +107,52 @@ while len(global_ibles["/projects"]) <= 0:
]
)
browser.close()
playwright.stop()
browser.close()
playwright.stop()
debugmode = False
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument(
"-p",
"--port",
default=8002,
type=int,
help="Port to listen on",
)
parser.add_argument(
"-d",
"--debug",
action="store_true",
help="Enable debug mode",
)
parser.add_argument(
"-l",
"--listen-host",
default="127.0.0.1",
help="Host to listen on",
)
args = parser.parse_args()
if args.debug:
debugmode = True
print("Loading...")
update_data()
print("Started!")
app = Flask(__name__, template_folder="templates", static_folder="static")
def get_instance_root_url(request):
return request.url_root
@app.route("/cron/")
def cron():
update_data()
return "OK"
def explore_lists(soup):
list_ = []
@ -407,10 +416,6 @@ def project_list(path, head, sort=""):
return render_template("projects.html", data=[head, ibles, path_])
app = Flask(__name__, template_folder="templates", static_folder="static")
@app.route("/sitemap/")
def route_sitemap():
data = requests.get(f"https://www.instructables.com/sitemap/")