Add debug functions for canvas based magazines, some optimizations

This commit is contained in:
Klaus-Uwe Mitterer 2017-07-21 20:29:34 +02:00
parent ca340815b7
commit bebd8e01ba
2 changed files with 49 additions and 19 deletions

10
helpers/scripts.py Normal file
View file

@ -0,0 +1,10 @@
monitor = '''var page = require('webpage').create();
var urls = [];
page.onResourceRequested = function(request) {
urls.push(JSON.stringify(request, undefined, 4));
};
page.open("%s");
return urls;'''

View file

@ -4,7 +4,7 @@ from selenium import webdriver
from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException from selenium.common.exceptions import NoSuchElementException
import argparse, multiprocessing, urllib.request, urllib.error, urllib.parse, time, os import argparse, multiprocessing, urllib.request, urllib.error, urllib.parse, time, os
import helpers, setuptools import helpers, helpers.scripts, setuptools
def status(driver): def status(driver):
if '<a class="btn btn-primary clickable btn-login">Login</a>' not in driver.page_source: if '<a class="btn btn-primary clickable btn-login">Login</a>' not in driver.page_source:
@ -13,15 +13,16 @@ def status(driver):
return False return False
def loadPage(url, driver, period=5,init=False): def loadPage(url, driver, period=5,init=False):
if not (init or status(driver)): if not driver.current_url == url:
login(driver) if not (init or status(driver)):
driver.get(url) login(driver)
time.sleep(period) driver.get(url)
time.sleep(period)
return True if driver.current_url == url else False return True if driver.current_url == url else False
def loginHandler(driver, user = setuptools.riuser(), password = setuptools.ripass()): def loginHandler(driver, user = setuptools.riuser(), password = setuptools.ripass()):
loadPage("https://app.myreadit.com/login",driver,3,True) loadPage("https://app.myreadit.com/login", driver, 3, True)
curfield = driver.find_element_by_name("email") curfield = driver.find_element_by_name("email")
curfield.send_keys(user) curfield.send_keys(user)
@ -43,9 +44,6 @@ def login(driver):
return True return True
def pageHandler(driver, id, page): def pageHandler(driver, id, page):
if "/ %i" % (int(page) - 1) in driver.page_source or "/ %i" % (int(page) - 2) in driver.page_source:
return False
while not loadPage("https://app.myreadit.com/reader/%s#%i" % (id, int(page)), driver): while not loadPage("https://app.myreadit.com/reader/%s#%i" % (id, int(page)), driver):
pass pass
@ -55,21 +53,43 @@ def pageHandler(driver, id, page):
try: try:
img2 = driver.find_element_by_id("page2").get_attribute("src") img2 = driver.find_element_by_id("page2").get_attribute("src")
helpers.downloadPage(img2, id, page + 1, helpers.JPG) helpers.downloadPage(img2, id, page + 1, helpers.JPG)
return 2
except: except:
return 1 pass
def imgHandler(driver, id):
count = len(driver.find_elements_by_class_name("pageNumber"))
for i in [1] + list(range(2, count + 1, 2)):
pageHandler(driver, id, i)
return count
def canvasHandler(driver, id):
try:
script = helpers.scripts.monitor % "https://app.myreadit.com/reader/%s#1"
urls = driver.execute_script(script)
print(driver.page_source)
print(urls)
print(type(urls))
except Exception as e:
print(e)
print("Sorry, this type of magazine is not supported yet. Please send the output above to fread@kumi.email.")
exit(1)
def hasCanvas(driver, id):
loadPage("https://app.myreadit.com/reader/%s#1" % id, driver)
try:
assert len(driver.find_elements_by_tag_name("canvas")) == 1
return True
except:
return False
def magazineHandler(driver, id, makepdf = True): def magazineHandler(driver, id, makepdf = True):
if helpers.makeDir(id): if helpers.makeDir(id):
loginHandler(driver) loginHandler(driver)
page = 0 page = canvasHandler(driver, id) if hasCanvas(driver, id) else imgHandler(driver, id)
val = pageHandler(driver, id, 1)
while val:
page += val
val = pageHandler(driver, id, page + 1)
if makepdf: if makepdf:
helpers.makePDF(id, page, helpers.JPG) helpers.makePDF(id, page, helpers.JPG)