From 3d1a1f6971c80fd58149fc73172907713b15d6ae Mon Sep 17 00:00:00 2001 From: snowcatridge10 Date: Sat, 3 Jun 2023 17:27:48 -0400 Subject: [PATCH 1/5] commit --- .gitignore | 6 +++- main.py | 102 +++++++++++++++++++++++++++++++++++------------------ 2 files changed, 72 insertions(+), 36 deletions(-) diff --git a/.gitignore b/.gitignore index e1e6f5e..085da43 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ -geckodriver.log \ No newline at end of file +geckodriver.log +venv/ +*.pyc +__pycache__/ +.vscode \ No newline at end of file diff --git a/main.py b/main.py index 9313cd4..0a0d45e 100644 --- a/main.py +++ b/main.py @@ -1,13 +1,5 @@ #!/usr/bin/env python -from flask import Flask, render_template, request, redirect, Response, stream_with_context -import requests -import re -from bs4 import BeautifulSoup -from urllib.parse import quote, unquote -from traceback import print_exc -from requests_html import HTMLSession - from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.common.by import By @@ -16,6 +8,15 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.keys import Keys from selenium.webdriver.firefox.options import Options from selenium.webdriver.chrome.options import Options + +from flask import Flask, render_template, request, redirect, Response, stream_with_context +import requests +import re +from bs4 import BeautifulSoup +from urllib.parse import quote, unquote +from traceback import print_exc +from requests_html import HTMLSession +from argparse import ArgumentParser import os debugmode = False @@ -26,6 +27,31 @@ try: except: pass +parser = ArgumentParser() +parser.add_argument( + "-p", + "--port", + default=8002, + type=int, + help="Port to listen on", +) +parser.add_argument( + "-d", + "--debug", + action="store_true", + help="Enable debug mode", +) +parser.add_argument( + "-l", + "--listen-host", + default="127.0.0.1", + help="Host to listen on", +) +args = parser.parse_args() + +if args.debug: + debugmode = True + print("Loading...") def proxy(src): @@ -59,40 +85,41 @@ global_ibles = {} global_ibles["/projects"] = [] -driver.get("https://www.instructables.com/projects") +def update_data(): + driver.get("https://www.instructables.com/projects") -while len(global_ibles["/projects"]) <= 0: - for ible in driver.find_elements(By.CLASS_NAME, "ibleCard__QPJVm"): - link = ible.find_elements(By.CSS_SELECTOR, "a")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) - img = proxy(ible.find_elements(By.CSS_SELECTOR, "img")[0].get_attribute("src")) + while len(global_ibles["/projects"]) <= 0: + for ible in driver.find_elements(By.CLASS_NAME, "ibleCard__QPJVm"): + link = ible.find_elements(By.CSS_SELECTOR, "a")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) + img = proxy(ible.find_elements(By.CSS_SELECTOR, "img")[0].get_attribute("src")) - title = ible.find_elements(By.CLASS_NAME, "title__t0fGQ")[0].text - author = ible.find_elements(By.CSS_SELECTOR, "a[href^='/member/']")[0].text - author_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/member/']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) + title = ible.find_elements(By.CLASS_NAME, "title__t0fGQ")[0].text + author = ible.find_elements(By.CSS_SELECTOR, "a[href^='/member/']")[0].text + author_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/member/']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) - channel = "TEST" - channel_link = "TEST" + channel = "TEST" + channel_link = "TEST" - for c in channels: - try: - channel = ible.find_elements(By.CSS_SELECTOR, "a[href^='/" + c + "']")[0].text - channel_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/" + c + "']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) - except: + for c in channels: try: - channel = ible.find_elements(By.CSS_SELECTOR, "a[href^='/projects/']")[0].text - channel_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/projects/']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) + channel = ible.find_elements(By.CSS_SELECTOR, "a[href^='/" + c + "']")[0].text + channel_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/" + c + "']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) except: - pass + try: + channel = ible.find_elements(By.CSS_SELECTOR, "a[href^='/projects/']")[0].text + channel_link = ible.find_elements(By.CSS_SELECTOR, "a[href^='/projects/']")[0].get_attribute("href").replace("https://www.instructables.com", instance_root_url) + except: + pass - stats = ible.find_elements(By.CLASS_NAME, "stats__GFKyl")[0] - views = 0 - if stats.find_elements(By.CSS_SELECTOR, "div[title$=' views']") != []: - views = stats.find_elements(By.CSS_SELECTOR, "div[title$=' views']")[0].text - favorites = 0 - if stats.find_elements(By.CSS_SELECTOR, "div[title$=' favorites']") != []: - favorites = stats.find_elements(By.CSS_SELECTOR, "div[title$=' favorites']")[0].text + stats = ible.find_elements(By.CLASS_NAME, "stats__GFKyl")[0] + views = 0 + if stats.find_elements(By.CSS_SELECTOR, "div[title$=' views']") != []: + views = stats.find_elements(By.CSS_SELECTOR, "div[title$=' views']")[0].text + favorites = 0 + if stats.find_elements(By.CSS_SELECTOR, "div[title$=' favorites']") != []: + favorites = stats.find_elements(By.CSS_SELECTOR, "div[title$=' favorites']")[0].text - global_ibles["/projects"].append([link, img, title, author, author_link, channel, channel_link, views, favorites]) + global_ibles["/projects"].append([link, img, title, author, author_link, channel, channel_link, views, favorites]) firefox_capabilities = DesiredCapabilities.FIREFOX firefox_capabilities['marionette'] = True @@ -713,9 +740,14 @@ def route_proxy(): else: return Response(render_template("400.html"), status=400) +@app.route("/cron/") +def cron(): + update_data() + return "OK" + @app.errorhandler(404) def not_found(e): return render_template("404.html") if __name__ == '__main__': - app.run(port=8002, debug=debugmode) + app.run(port=args.port, host=args.listen_host, debug=debugmode) From 499f0e9cbbe33313661a160dc08fef53d0570c01 Mon Sep 17 00:00:00 2001 From: snowcatridge10 Date: Sat, 3 Jun 2023 17:44:44 -0400 Subject: [PATCH 2/5] add cronjob text file --- cronjob.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 cronjob.txt diff --git a/cronjob.txt b/cronjob.txt new file mode 100644 index 0000000..4b1eb9d --- /dev/null +++ b/cronjob.txt @@ -0,0 +1 @@ +0 * * * * curl http://INSTANCE_URL/cron/ \ No newline at end of file From e9b29282991a0173de9f20c023583817b057b78e Mon Sep 17 00:00:00 2001 From: snowcatridge10 Date: Sat, 3 Jun 2023 18:14:29 -0400 Subject: [PATCH 3/5] Add chromedrivermanager --- main.py | 8 ++------ requirements.txt | 3 ++- static/img/magnifying-glass-solid.svg | 1 + 3 files changed, 5 insertions(+), 7 deletions(-) create mode 100644 static/img/magnifying-glass-solid.svg diff --git a/main.py b/main.py index 0a0d45e..5f92661 100644 --- a/main.py +++ b/main.py @@ -18,6 +18,7 @@ from traceback import print_exc from requests_html import HTMLSession from argparse import ArgumentParser import os +from webdriver_manager.chrome import ChromeDriverManager debugmode = False @@ -62,12 +63,7 @@ instance_root_url = "http://127.0.0.1:8002" chrome_options = Options() chrome_options.add_argument("--headless") -if os.name == 'nt': - # Windows - driver = webdriver.Chrome('./chromedriver.exe', options=chrome_options) -else: - # Linux - driver = webdriver.Chrome('./chromedriver', options=chrome_options) +driver = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_options) channels = [] diff --git a/requirements.txt b/requirements.txt index 56aa84d..635179a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ bs4 requests flask selenium -requests_html \ No newline at end of file +requests_html +webdriver-manager \ No newline at end of file diff --git a/static/img/magnifying-glass-solid.svg b/static/img/magnifying-glass-solid.svg new file mode 100644 index 0000000..ee25f3c --- /dev/null +++ b/static/img/magnifying-glass-solid.svg @@ -0,0 +1 @@ + \ No newline at end of file From ee274a7df7163f6473cd1d9b8a84292cd063a69e Mon Sep 17 00:00:00 2001 From: snowcatridge10 Date: Sat, 3 Jun 2023 18:31:55 -0400 Subject: [PATCH 4/5] Add privacy policy --- main.py | 4 ++++ templates/footer.html | 4 ++-- templates/privacypolicy.html | 23 +++++++++++++++++++++++ templates/privacypolicy.txt | 1 + 4 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 templates/privacypolicy.html create mode 100644 templates/privacypolicy.txt diff --git a/main.py b/main.py index 5f92661..07e6836 100644 --- a/main.py +++ b/main.py @@ -741,6 +741,10 @@ def cron(): update_data() return "OK" +@app.route("/privacypolicy/") +def privacypolicy(): + return render_template("privacypolicy.html") + @app.errorhandler(404) def not_found(e): return render_template("404.html") diff --git a/templates/footer.html b/templates/footer.html index 918efc7..817d237 100644 --- a/templates/footer.html +++ b/templates/footer.html @@ -1,7 +1,7 @@ \ No newline at end of file diff --git a/templates/privacypolicy.html b/templates/privacypolicy.html new file mode 100644 index 0000000..67eef22 --- /dev/null +++ b/templates/privacypolicy.html @@ -0,0 +1,23 @@ + + + + + Privacy Policy - Indestructables + + + {% include "style.html" %} + + + + + {% include "header.html" %} +
+
+

Privacy Policy

+
+

{% include "privacypolicy.txt" %}

+
+ {% include "footer.html" %} + + + \ No newline at end of file diff --git a/templates/privacypolicy.txt b/templates/privacypolicy.txt new file mode 100644 index 0000000..7905154 --- /dev/null +++ b/templates/privacypolicy.txt @@ -0,0 +1 @@ +Nothing here, yet! \ No newline at end of file From 2798786f510adc836d74021c4e107c8f42027ffa Mon Sep 17 00:00:00 2001 From: snowcatridge10 Date: Sat, 3 Jun 2023 19:16:16 -0400 Subject: [PATCH 5/5] update data on startup --- main.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/main.py b/main.py index 07e6836..725b9f8 100644 --- a/main.py +++ b/main.py @@ -117,6 +117,8 @@ def update_data(): global_ibles["/projects"].append([link, img, title, author, author_link, channel, channel_link, views, favorites]) +update_data() + firefox_capabilities = DesiredCapabilities.FIREFOX firefox_capabilities['marionette'] = True firefox_capabilities['binary'] = "C:/Program Files/Mozilla Firefox/firefox.exe"