#!/usr/bin/env python3 from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoSuchElementException import argparse, multiprocessing, urllib.request, urllib.error, urllib.parse, shutil, time, os import helpers, helpers.scripts, setuptools def status(driver): if 'Login' not in driver.page_source: return True else: return False def loadPage(url, driver, period=5,init=False): if not driver.current_url == url: if not (init or status(driver)): login(driver) driver.get(url) time.sleep(period) return True if driver.current_url == url else False def loginHandler(driver, user = setuptools.riuser(), password = setuptools.ripass()): loadPage("https://app.myreadit.com/login", driver, 3, True) curfield = driver.find_element_by_name("email") curfield.send_keys(user) curfield = driver.find_element_by_name("password") curfield.send_keys(password) curfield.send_keys(Keys.RETURN) time.sleep(3) return status(driver) class LoginError(Exception): pass def login(driver): if not (status(driver) or loginHandler(driver)): raise LoginError("Login failed.") return True def pageHandler(driver, id, page): while not loadPage("https://app.myreadit.com/reader/%s#%i" % (id, int(page)), driver): pass img = driver.find_element_by_id("page1").get_attribute("src") helpers.downloadPage(img, id, page, helpers.JPG) try: img2 = driver.find_element_by_id("page2").get_attribute("src") helpers.downloadPage(img2, id, page + 1, helpers.JPG) except: pass def imgHandler(driver, id): count = len(driver.find_elements_by_class_name("pageNumber")) for i in [1] + list(range(2, count + 1, 2)): pageHandler(driver, id, i) return count def canvasHandler(driver, id): try: script = helpers.scripts.monitor % "https://app.myreadit.com/reader/%s#1" urls = driver.execute_script(script) print(driver.page_source) print(urls) print(type(urls)) except Exception as e: print(e) raise Exception("Sorry, this type of magazine is not supported yet. Please send the output above to fread@kumi.email.") def hasCanvas(driver, id): if not loadPage("https://app.myreadit.com/reader/%s#1" % id, driver): raise ValueError("Issue %s does not exist." % id) try: assert len(driver.find_elements_by_tag_name("canvas")) == 1 return True except: return False def magazineHandler(driver, id, makepdf = True): if helpers.makeDir(id): try: loginHandler(driver) page = canvasHandler(driver, id) if hasCanvas(driver, id) else imgHandler(driver, id) if makepdf: helpers.makePDF(id, page, helpers.JPG) except Exception: shutil.rmtree(id) raise else: print("[NOTICE] Skipping issue %s - already exists." % id) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("id", help="ID of the magazine to be downloaded. May be a URL.", nargs="+") ids = parser.parse_args().id caps = webdriver.DesiredCapabilities().PHANTOMJS.copy() caps["phantoms.page.settings.userAgent"] = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0" driver = webdriver.PhantomJS(desired_capabilities=caps, service_args=["--web-security=no"]) for id in ids: use = id.split("/")[-1].split("#")[0] magazineHandler(driver, use) driver.quit()