#!/usr/bin/env python3 from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoSuchElementException import multiprocessing, urllib.request, urllib.error, urllib.parse, time, os import dbtools, phototools, setuptools def status(driver): if "Unauthorised Access" not in driver.page_source and "/main/login.php" not in driver.page_source and len(driver.page_source) > 100: return True else: return False def loadPage(url, driver, period=5,init=False): if not (init or status(driver)): login(driver) driver.get(url) time.sleep(period) def loginHandler(driver, user = setuptools.user(), password = setuptools.password()): loadPage("https://classic.planetromeo.com/",driver,3,True) loadPage("https://classic.planetromeo.com/main/login.php",driver,3,True) curfield = driver.find_element_by_name("username") curfield.send_keys(user) curfield = driver.find_element_by_name("passwort") curfield.send_keys(password) curfield.send_keys(Keys.RETURN) time.sleep(3) return status(driver) class LoginError(Exception): pass def login(driver): if not (status(driver) or loginHandler(driver)): raise LoginError("Login failed.") return True def messageID(url): return url.split("=")[-1] def quickshareHandler(driver, url, sender): nurl = "https://classic.planetromeo.com/" + url if "planetromeo.com" not in url else url loadPage(nurl) juha = BeautifulSoup(driver.page_source, "html5lib") try: links = juha.findAll("a") for link in links: try: purl = "https://classic.planetromeo.com/" + link["data-pic"] if "planetromeo.com" not in link["data-pic"] else link["data-pic"] phototools.processURL(purl, sender, shutup=True) except: pass except: pass def messageHandler(sender, recipient, mid, date, driver, mode = 0, db = dbtools.dbHelper()): if mode == 0: loadPage("https://classic.planetromeo.com/msg/?id=" + mid, driver) else: loadPage("https://classic.planetromeo.com/msg/?type=sent&id=" + mid, driver) juha = BeautifulSoup(driver.page_source, "html5lib") text = juha.select("div.msg div")[0] db.executeQuery("INSERT INTO messages(id, text, sender_id, recipient_id, created_at) VALUES('%s', '%s', '%s', '%s', '%s');" % (mid, setuptools.unescapeText(text.text).strip(), sender, recipient, date)) db.commit() try: links = juha.findAll("a") for link in links: if "/pix/popup.php/" in link["href"]: phototools.processURL(link["href"], sender) try: db.executeQuery("INSERT INTO photos(mid, pid) VALUES('%s', '%s');" % (mid, phototools.parseurl(link["href"]).split('/')[-1])) except: pass if "/quickshare/" in link["href"]: quickshareHandler(driver, link["href"], sender) except: pass def pageHandler(driver, db = dbtools.dbHelper()): mode = 0 abort = True juha = BeautifulSoup(driver.page_source, "html5lib") if "sent" in driver.current_url: mode = 1 try: for msg in juha.select("table.messageCenter tr")[1:]: try: data = msg.findAll('td') user = data[1].string mid = messageID(data[2].find("a")["href"]) date = data[3].string if not db.checkID(mid): abort = False if mode == 0: messageHandler(user, setuptools.user(), mid, date, driver, mode, db) else: messageHandler(setuptools.user(), user, mid, date, driver, mode, db) except IndexError: pass except IndexError: return False if abort: return False else: return True def siteHandler(driver, mode = 0, p = 0, db = dbtools.dbHelper()): if mode == 0: loadPage("https://classic.planetromeo.com/mitglieder/messages/uebersicht.php?view=all&seite=" + str(p), driver) else: loadPage("https://classic.planetromeo.com/mitglieder/messages/uebersicht.php?view=sent&seite=" + str(p), driver) if pageHandler(driver, db): siteHandler(driver, mode, p+1, db) def mainHandler(driver, db): loginHandler(driver) siteHandler(driver, 0, db=db) siteHandler(driver, 1, db=db) if __name__ == "__main__": db = dbtools.dbHelper() caps = webdriver.DesiredCapabilities().PHANTOMJS.copy() caps["phantoms.page.settings.userAgent"] = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0" driver = webdriver.PhantomJS(desired_capabilities=caps) mainHandler(driver, db) driver.close()