From bebd8e01baf4b306e82a25f89740fad6708cd2ea Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 21 Jul 2017 20:29:34 +0200 Subject: [PATCH] Add debug functions for canvas based magazines, some optimizations --- helpers/scripts.py | 10 ++++++++ readit.py | 58 +++++++++++++++++++++++++++++++--------------- 2 files changed, 49 insertions(+), 19 deletions(-) create mode 100644 helpers/scripts.py diff --git a/helpers/scripts.py b/helpers/scripts.py new file mode 100644 index 0000000..652cda9 --- /dev/null +++ b/helpers/scripts.py @@ -0,0 +1,10 @@ +monitor = '''var page = require('webpage').create(); +var urls = []; + +page.onResourceRequested = function(request) { + urls.push(JSON.stringify(request, undefined, 4)); +}; + +page.open("%s"); + +return urls;''' diff --git a/readit.py b/readit.py index f0fb954..6b58d2c 100755 --- a/readit.py +++ b/readit.py @@ -4,7 +4,7 @@ from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoSuchElementException import argparse, multiprocessing, urllib.request, urllib.error, urllib.parse, time, os -import helpers, setuptools +import helpers, helpers.scripts, setuptools def status(driver): if 'Login' not in driver.page_source: @@ -13,15 +13,16 @@ def status(driver): return False def loadPage(url, driver, period=5,init=False): - if not (init or status(driver)): - login(driver) - driver.get(url) - time.sleep(period) + if not driver.current_url == url: + if not (init or status(driver)): + login(driver) + driver.get(url) + time.sleep(period) return True if driver.current_url == url else False def loginHandler(driver, user = setuptools.riuser(), password = setuptools.ripass()): - loadPage("https://app.myreadit.com/login",driver,3,True) + loadPage("https://app.myreadit.com/login", driver, 3, True) curfield = driver.find_element_by_name("email") curfield.send_keys(user) @@ -43,9 +44,6 @@ def login(driver): return True def pageHandler(driver, id, page): - if "/ %i" % (int(page) - 1) in driver.page_source or "/ %i" % (int(page) - 2) in driver.page_source: - return False - while not loadPage("https://app.myreadit.com/reader/%s#%i" % (id, int(page)), driver): pass @@ -55,21 +53,43 @@ def pageHandler(driver, id, page): try: img2 = driver.find_element_by_id("page2").get_attribute("src") helpers.downloadPage(img2, id, page + 1, helpers.JPG) - return 2 - except: - return 1 - + pass +def imgHandler(driver, id): + count = len(driver.find_elements_by_class_name("pageNumber")) + + for i in [1] + list(range(2, count + 1, 2)): + pageHandler(driver, id, i) + + return count + +def canvasHandler(driver, id): + try: + script = helpers.scripts.monitor % "https://app.myreadit.com/reader/%s#1" + urls = driver.execute_script(script) + print(driver.page_source) + print(urls) + print(type(urls)) + except Exception as e: + print(e) + + print("Sorry, this type of magazine is not supported yet. Please send the output above to fread@kumi.email.") + exit(1) + +def hasCanvas(driver, id): + loadPage("https://app.myreadit.com/reader/%s#1" % id, driver) + + try: + assert len(driver.find_elements_by_tag_name("canvas")) == 1 + return True + except: + return False + def magazineHandler(driver, id, makepdf = True): if helpers.makeDir(id): loginHandler(driver) - page = 0 - val = pageHandler(driver, id, 1) - - while val: - page += val - val = pageHandler(driver, id, page + 1) + page = canvasHandler(driver, id) if hasCanvas(driver, id) else imgHandler(driver, id) if makepdf: helpers.makePDF(id, page, helpers.JPG)