2017-02-08 23:27:02 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
from selenium import webdriver
|
|
|
|
from selenium.webdriver.common.keys import Keys
|
|
|
|
from selenium.common.exceptions import NoSuchElementException
|
|
|
|
import argparse, multiprocessing, urllib.request, urllib.error, urllib.parse, time, os
|
2017-07-21 18:29:34 +00:00
|
|
|
import helpers, helpers.scripts, setuptools
|
2017-02-08 23:27:02 +00:00
|
|
|
|
|
|
|
def status(driver):
|
|
|
|
if '<a class="btn btn-primary clickable btn-login">Login</a>' not in driver.page_source:
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
|
|
|
def loadPage(url, driver, period=5,init=False):
|
2017-07-21 18:29:34 +00:00
|
|
|
if not driver.current_url == url:
|
|
|
|
if not (init or status(driver)):
|
|
|
|
login(driver)
|
|
|
|
driver.get(url)
|
|
|
|
time.sleep(period)
|
2017-07-17 15:48:41 +00:00
|
|
|
|
2017-02-08 23:27:02 +00:00
|
|
|
return True if driver.current_url == url else False
|
|
|
|
|
|
|
|
def loginHandler(driver, user = setuptools.riuser(), password = setuptools.ripass()):
|
2017-07-21 18:29:34 +00:00
|
|
|
loadPage("https://app.myreadit.com/login", driver, 3, True)
|
2017-02-08 23:27:02 +00:00
|
|
|
|
|
|
|
curfield = driver.find_element_by_name("email")
|
|
|
|
curfield.send_keys(user)
|
|
|
|
|
|
|
|
curfield = driver.find_element_by_name("password")
|
|
|
|
curfield.send_keys(password)
|
2017-07-16 10:40:19 +00:00
|
|
|
|
2017-02-08 23:27:02 +00:00
|
|
|
curfield.send_keys(Keys.RETURN)
|
|
|
|
time.sleep(3)
|
|
|
|
|
|
|
|
return status(driver)
|
|
|
|
|
|
|
|
class LoginError(Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def login(driver):
|
|
|
|
if not (status(driver) or loginHandler(driver)):
|
|
|
|
raise LoginError("Login failed.")
|
|
|
|
return True
|
|
|
|
|
|
|
|
def pageHandler(driver, id, page):
|
2017-07-17 19:11:07 +00:00
|
|
|
while not loadPage("https://app.myreadit.com/reader/%s#%i" % (id, int(page)), driver):
|
|
|
|
pass
|
2017-02-08 23:27:02 +00:00
|
|
|
|
2017-07-17 15:48:41 +00:00
|
|
|
img = driver.find_element_by_id("page1").get_attribute("src")
|
|
|
|
helpers.downloadPage(img, id, page, helpers.JPG)
|
|
|
|
|
2017-02-08 23:27:02 +00:00
|
|
|
try:
|
2017-07-17 15:48:41 +00:00
|
|
|
img2 = driver.find_element_by_id("page2").get_attribute("src")
|
|
|
|
helpers.downloadPage(img2, id, page + 1, helpers.JPG)
|
2017-02-08 23:27:02 +00:00
|
|
|
except:
|
2017-07-21 18:29:34 +00:00
|
|
|
pass
|
|
|
|
|
|
|
|
def imgHandler(driver, id):
|
|
|
|
count = len(driver.find_elements_by_class_name("pageNumber"))
|
|
|
|
|
|
|
|
for i in [1] + list(range(2, count + 1, 2)):
|
|
|
|
pageHandler(driver, id, i)
|
|
|
|
|
|
|
|
return count
|
|
|
|
|
|
|
|
def canvasHandler(driver, id):
|
|
|
|
try:
|
|
|
|
script = helpers.scripts.monitor % "https://app.myreadit.com/reader/%s#1"
|
|
|
|
urls = driver.execute_script(script)
|
|
|
|
print(driver.page_source)
|
|
|
|
print(urls)
|
|
|
|
print(type(urls))
|
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
|
|
|
|
|
|
|
print("Sorry, this type of magazine is not supported yet. Please send the output above to fread@kumi.email.")
|
|
|
|
exit(1)
|
2017-07-16 10:40:19 +00:00
|
|
|
|
2017-07-21 18:29:34 +00:00
|
|
|
def hasCanvas(driver, id):
|
|
|
|
loadPage("https://app.myreadit.com/reader/%s#1" % id, driver)
|
|
|
|
|
|
|
|
try:
|
|
|
|
assert len(driver.find_elements_by_tag_name("canvas")) == 1
|
|
|
|
return True
|
|
|
|
except:
|
|
|
|
return False
|
|
|
|
|
2017-02-08 23:27:02 +00:00
|
|
|
def magazineHandler(driver, id, makepdf = True):
|
|
|
|
if helpers.makeDir(id):
|
|
|
|
loginHandler(driver)
|
2017-07-21 18:29:34 +00:00
|
|
|
page = canvasHandler(driver, id) if hasCanvas(driver, id) else imgHandler(driver, id)
|
2017-02-08 23:27:02 +00:00
|
|
|
|
|
|
|
if makepdf:
|
2017-07-17 15:48:41 +00:00
|
|
|
helpers.makePDF(id, page, helpers.JPG)
|
2017-02-08 23:27:02 +00:00
|
|
|
|
|
|
|
else:
|
|
|
|
print("[NOTICE] Skipping issue %s - already exists." % id)
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
parser = argparse.ArgumentParser()
|
2017-02-08 23:59:32 +00:00
|
|
|
parser.add_argument("id", help="ID of the magazine to be downloaded. May be a URL.", nargs="+")
|
2017-02-08 23:27:02 +00:00
|
|
|
|
2017-02-08 23:59:32 +00:00
|
|
|
ids = parser.parse_args().id
|
2017-07-16 10:40:19 +00:00
|
|
|
|
2017-07-16 10:57:11 +00:00
|
|
|
caps = webdriver.DesiredCapabilities().PHANTOMJS.copy()
|
|
|
|
caps["phantoms.page.settings.userAgent"] = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0"
|
|
|
|
|
|
|
|
driver = webdriver.PhantomJS(desired_capabilities=caps, service_args=["--web-security=no"])
|
2017-07-16 10:40:19 +00:00
|
|
|
|
2017-02-08 23:59:32 +00:00
|
|
|
for id in ids:
|
|
|
|
use = id.split("/")[-1].split("#")[0]
|
|
|
|
magazineHandler(driver, use)
|
2017-02-08 23:27:02 +00:00
|
|
|
|
2017-02-08 23:40:42 +00:00
|
|
|
driver.quit()
|