From 5e59a800b2b4997e8a943c7099f4fd06339b32e2 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 19 Jul 2017 16:11:13 +0200 Subject: [PATCH] Use PhantomJS instead of Firefox, update URLs, login immediately upon launch --- handler.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/handler.py b/handler.py index c0dcf9d..7d5214e 100755 --- a/handler.py +++ b/handler.py @@ -4,12 +4,11 @@ from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoSuchElementException -from xvfbwrapper import Xvfb import multiprocessing, urllib.request, urllib.error, urllib.parse, time, os import dbtools, phototools, setuptools def status(driver): - if "/main/login.php" not in driver.page_source: + if "Unauthorised Access" not in driver.page_source and "/main/login.php" not in driver.page_source and len(driver.page_source) > 100: return True else: return False @@ -21,8 +20,8 @@ def loadPage(url, driver, period=5,init=False): time.sleep(period) def loginHandler(driver, user = setuptools.user(), password = setuptools.password()): - loadPage("https://www.planetromeo.com/",driver,3,True) - loadPage("https://www.planetromeo.com/main/login.php",driver,3,True) + loadPage("https://classic.planetromeo.com/",driver,3,True) + loadPage("https://classic.planetromeo.com/main/login.php",driver,3,True) curfield = driver.find_element_by_name("username") curfield.send_keys(user) @@ -47,7 +46,7 @@ def messageID(url): return url.split("=")[-1] def quickshareHandler(driver, url, sender): - nurl = "https://www.planetromeo.com/" + url if "planetromeo.com" not in url else url + nurl = "https://classic.planetromeo.com/" + url if "planetromeo.com" not in url else url loadPage(nurl) juha = BeautifulSoup(driver.page_source, "html5lib") @@ -55,7 +54,7 @@ def quickshareHandler(driver, url, sender): links = juha.findAll("a") for link in links: try: - purl = "https://www.planetromeo.com/" + link["data-pic"] if "planetromeo.com" not in link["data-pic"] else link["data-pic"] + purl = "https://classic.planetromeo.com/" + link["data-pic"] if "planetromeo.com" not in link["data-pic"] else link["data-pic"] phototools.processURL(purl, sender, shutup=True) except: pass @@ -64,9 +63,9 @@ def quickshareHandler(driver, url, sender): def messageHandler(sender, recipient, mid, date, driver, mode = 0, db = dbtools.dbHelper()): if mode == 0: - loadPage("https://www.planetromeo.com/msg/?id=" + mid, driver) + loadPage("https://classic.planetromeo.com/msg/?id=" + mid, driver) else: - loadPage("https://www.planetromeo.com/msg/?type=sent&id=" + mid, driver) + loadPage("https://classic.planetromeo.com/msg/?type=sent&id=" + mid, driver) juha = BeautifulSoup(driver.page_source, "html5lib") text = juha.select("div.msg div")[0] @@ -120,20 +119,22 @@ def pageHandler(driver, db = dbtools.dbHelper()): def siteHandler(driver, mode = 0, p = 0, db = dbtools.dbHelper()): if mode == 0: - loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=all&seite=" + str(p), driver) + loadPage("https://classic.planetromeo.com/mitglieder/messages/uebersicht.php?view=all&seite=" + str(p), driver) else: - loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=sent&seite=" + str(p), driver) + loadPage("https://classic.planetromeo.com/mitglieder/messages/uebersicht.php?view=sent&seite=" + str(p), driver) if pageHandler(driver, db): siteHandler(driver, mode, p+1, db) def mainHandler(driver, db): + loginHandler(driver) siteHandler(driver, 0, db=db) siteHandler(driver, 1, db=db) if __name__ == "__main__": - with Xvfb() as xvfb: - db = dbtools.dbHelper() - driver = webdriver.Firefox() - mainHandler(driver, db) - driver.close() + db = dbtools.dbHelper() + caps = webdriver.DesiredCapabilities().PHANTOMJS.copy() + caps["phantoms.page.settings.userAgent"] = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0" + driver = webdriver.PhantomJS(desired_capabilities=caps) + mainHandler(driver, db) + driver.close()