Add debug functions for canvas based magazines, some optimizations
This commit is contained in:
parent
ca340815b7
commit
bebd8e01ba
2 changed files with 49 additions and 19 deletions
10
helpers/scripts.py
Normal file
10
helpers/scripts.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
monitor = '''var page = require('webpage').create();
|
||||
var urls = [];
|
||||
|
||||
page.onResourceRequested = function(request) {
|
||||
urls.push(JSON.stringify(request, undefined, 4));
|
||||
};
|
||||
|
||||
page.open("%s");
|
||||
|
||||
return urls;'''
|
48
readit.py
48
readit.py
|
@ -4,7 +4,7 @@ from selenium import webdriver
|
|||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.common.exceptions import NoSuchElementException
|
||||
import argparse, multiprocessing, urllib.request, urllib.error, urllib.parse, time, os
|
||||
import helpers, setuptools
|
||||
import helpers, helpers.scripts, setuptools
|
||||
|
||||
def status(driver):
|
||||
if '<a class="btn btn-primary clickable btn-login">Login</a>' not in driver.page_source:
|
||||
|
@ -13,6 +13,7 @@ def status(driver):
|
|||
return False
|
||||
|
||||
def loadPage(url, driver, period=5,init=False):
|
||||
if not driver.current_url == url:
|
||||
if not (init or status(driver)):
|
||||
login(driver)
|
||||
driver.get(url)
|
||||
|
@ -21,7 +22,7 @@ def loadPage(url, driver, period=5,init=False):
|
|||
return True if driver.current_url == url else False
|
||||
|
||||
def loginHandler(driver, user = setuptools.riuser(), password = setuptools.ripass()):
|
||||
loadPage("https://app.myreadit.com/login",driver,3,True)
|
||||
loadPage("https://app.myreadit.com/login", driver, 3, True)
|
||||
|
||||
curfield = driver.find_element_by_name("email")
|
||||
curfield.send_keys(user)
|
||||
|
@ -43,9 +44,6 @@ def login(driver):
|
|||
return True
|
||||
|
||||
def pageHandler(driver, id, page):
|
||||
if "/ %i" % (int(page) - 1) in driver.page_source or "/ %i" % (int(page) - 2) in driver.page_source:
|
||||
return False
|
||||
|
||||
while not loadPage("https://app.myreadit.com/reader/%s#%i" % (id, int(page)), driver):
|
||||
pass
|
||||
|
||||
|
@ -55,21 +53,43 @@ def pageHandler(driver, id, page):
|
|||
try:
|
||||
img2 = driver.find_element_by_id("page2").get_attribute("src")
|
||||
helpers.downloadPage(img2, id, page + 1, helpers.JPG)
|
||||
return 2
|
||||
|
||||
except:
|
||||
return 1
|
||||
pass
|
||||
|
||||
def imgHandler(driver, id):
|
||||
count = len(driver.find_elements_by_class_name("pageNumber"))
|
||||
|
||||
for i in [1] + list(range(2, count + 1, 2)):
|
||||
pageHandler(driver, id, i)
|
||||
|
||||
return count
|
||||
|
||||
def canvasHandler(driver, id):
|
||||
try:
|
||||
script = helpers.scripts.monitor % "https://app.myreadit.com/reader/%s#1"
|
||||
urls = driver.execute_script(script)
|
||||
print(driver.page_source)
|
||||
print(urls)
|
||||
print(type(urls))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
print("Sorry, this type of magazine is not supported yet. Please send the output above to fread@kumi.email.")
|
||||
exit(1)
|
||||
|
||||
def hasCanvas(driver, id):
|
||||
loadPage("https://app.myreadit.com/reader/%s#1" % id, driver)
|
||||
|
||||
try:
|
||||
assert len(driver.find_elements_by_tag_name("canvas")) == 1
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def magazineHandler(driver, id, makepdf = True):
|
||||
if helpers.makeDir(id):
|
||||
loginHandler(driver)
|
||||
page = 0
|
||||
val = pageHandler(driver, id, 1)
|
||||
|
||||
while val:
|
||||
page += val
|
||||
val = pageHandler(driver, id, page + 1)
|
||||
page = canvasHandler(driver, id) if hasCanvas(driver, id) else imgHandler(driver, id)
|
||||
|
||||
if makepdf:
|
||||
helpers.makePDF(id, page, helpers.JPG)
|
||||
|
|
Loading…
Reference in a new issue