#!/usr/bin/env python3 from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoSuchElementException import multiprocessing, urllib.request, urllib.error, urllib.parse, time, os import dbtools, phototools, setuptools currentRun = [] def status(driver): if "/main/login.php" not in driver.page_source: return True else: return False def loadPage(url, driver, period=5,init=False): if not (init or status(driver)): login(driver) driver.get(url) time.sleep(period) def loginHandler(driver, user = setuptools.user(), password = setuptools.password()): loadPage("https://www.planetromeo.com/",driver,3,True) loadPage("https://www.planetromeo.com/main/login.php",driver,3,True) curfield = driver.find_element_by_name("username") curfield.send_keys(user) curfield = driver.find_element_by_name("passwort") curfield.send_keys(password) curfield.send_keys(Keys.RETURN) time.sleep(3) return status(driver) class LoginError(Exception): pass def login(driver): if not (status(driver) or loginHandler(driver)): raise LoginError("Login failed.") return True def messageID(url): return url.split("=")[-1] def messageHandler(sender, recipient, mid, date, driver, mode = 0, db = dbtools.dbHelper()): global currentRun if mode == 0: loadPage("https://www.planetromeo.com/msg/?id=" + mid, driver) else: loadPage("https://www.planetromeo.com/msg/?type=sent&id=" + mid, driver) juha = BeautifulSoup(driver.page_source, "html5lib") text = juha.select("div.msg div")[0] db.executeQuery("INSERT INTO messages(id, text, sender_id, recipient_id, created_at) VALUES('%s', '%s', '%s', '%s', '%s');" % (mid, setuptools.unescapeText(text.string or "").strip(), sender, recipient, date)) db.commit() try: links = juha.findAll("a") for link in links: if "/pix/popup.php/" in link["href"]: phototools.processURL(link["href"], sender) except: pass currentRun += [mid] def pageHandler(driver, db = dbtools.dbHelper()): global currentRun count = 0 juha = BeautifulSoup(driver.page_source, "html5lib") mode = 0 if "sent" in driver.current_url: mode = 1 try: for msg in juha.select("table.messageCenter tr")[1:]: try: data = msg.findAll('td') user = data[1].string mid = messageID(data[2].find("a")["href"]) date = data[3].string if not db.checkID(mid): if mode == 0: messageHandler(user, setuptools.user(), mid, date, driver, mode, db) else: messageHandler(setuptools.user(), user, mid, date, driver, mode, db) count += 1 except IndexError: pass except IndexError: return False if count == 0: return False return True def siteHandler(driver, mode = 0, p = 0, db = dbtools.dbHelper()): if mode == 0: loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=all&seite=" + str(p), driver) else: loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=sent&seite=" + str(p), driver) if pageHandler(driver, db): siteHandler(driver, mode, p+1, db) def mainHandler(): siteHandler(driver, db=db) siteHandler(driver, 1, db=db) if __name__ == "__main__": db = dbtools.dbHelper() driver = webdriver.Firefox() if loginHandler(driver): p = multiprocessing.Process(target=mainHandler) p.start() p.join(1500) if p.is_alive(): p.terminate() p.join() driver.close()