Add debug functions for canvas based magazines, some optimizations

This commit is contained in:
Klaus-Uwe Mitterer 2017-07-21 20:29:34 +02:00
parent ca340815b7
commit bebd8e01ba
2 changed files with 49 additions and 19 deletions

10
helpers/scripts.py Normal file
View file

@ -0,0 +1,10 @@
monitor = '''var page = require('webpage').create();
var urls = [];
page.onResourceRequested = function(request) {
urls.push(JSON.stringify(request, undefined, 4));
};
page.open("%s");
return urls;'''

View file

@ -4,7 +4,7 @@ from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
import argparse, multiprocessing, urllib.request, urllib.error, urllib.parse, time, os
import helpers, setuptools
import helpers, helpers.scripts, setuptools
def status(driver):
if '<a class="btn btn-primary clickable btn-login">Login</a>' not in driver.page_source:
@ -13,15 +13,16 @@ def status(driver):
return False
def loadPage(url, driver, period=5,init=False):
if not (init or status(driver)):
login(driver)
driver.get(url)
time.sleep(period)
if not driver.current_url == url:
if not (init or status(driver)):
login(driver)
driver.get(url)
time.sleep(period)
return True if driver.current_url == url else False
def loginHandler(driver, user = setuptools.riuser(), password = setuptools.ripass()):
loadPage("https://app.myreadit.com/login",driver,3,True)
loadPage("https://app.myreadit.com/login", driver, 3, True)
curfield = driver.find_element_by_name("email")
curfield.send_keys(user)
@ -43,9 +44,6 @@ def login(driver):
return True
def pageHandler(driver, id, page):
if "/ %i" % (int(page) - 1) in driver.page_source or "/ %i" % (int(page) - 2) in driver.page_source:
return False
while not loadPage("https://app.myreadit.com/reader/%s#%i" % (id, int(page)), driver):
pass
@ -55,21 +53,43 @@ def pageHandler(driver, id, page):
try:
img2 = driver.find_element_by_id("page2").get_attribute("src")
helpers.downloadPage(img2, id, page + 1, helpers.JPG)
return 2
except:
return 1
pass
def imgHandler(driver, id):
count = len(driver.find_elements_by_class_name("pageNumber"))
for i in [1] + list(range(2, count + 1, 2)):
pageHandler(driver, id, i)
return count
def canvasHandler(driver, id):
try:
script = helpers.scripts.monitor % "https://app.myreadit.com/reader/%s#1"
urls = driver.execute_script(script)
print(driver.page_source)
print(urls)
print(type(urls))
except Exception as e:
print(e)
print("Sorry, this type of magazine is not supported yet. Please send the output above to fread@kumi.email.")
exit(1)
def hasCanvas(driver, id):
loadPage("https://app.myreadit.com/reader/%s#1" % id, driver)
try:
assert len(driver.find_elements_by_tag_name("canvas")) == 1
return True
except:
return False
def magazineHandler(driver, id, makepdf = True):
if helpers.makeDir(id):
loginHandler(driver)
page = 0
val = pageHandler(driver, id, 1)
while val:
page += val
val = pageHandler(driver, id, page + 1)
page = canvasHandler(driver, id) if hasCanvas(driver, id) else imgHandler(driver, id)
if makepdf:
helpers.makePDF(id, page, helpers.JPG)