From be09282609d2e2f8d6e3548c1c295c0aa83397c1 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Sun, 7 Aug 2016 01:46:20 +0200 Subject: [PATCH] Loads of changes. Filling the database seems to work now. --- dbtools/__init__.py | 15 ++++++- messages.py | 101 +++++++++++++++++++++++++++++--------------- 2 files changed, 80 insertions(+), 36 deletions(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index a3c1f30..a0a49ec 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -63,13 +63,24 @@ class dbObject: except: return False - def getLatestMessage(db): - db.executeQuery("SELECT max(id) FROM messages") + def getLatestMessage(db, mode = 0, user = setuptools.user()): + if mode == 0: + db.executeQuery("SELECT max(id) FROM messages WHERE recipient_id='%s'" % user) + else: + db.executeQuery("SELECT max(id) FROM messages WHERE sender_id='%s'" % user) try: return int(db.getNext()[0]) except: return 0 + def checkID(db, mid): + db.executeQuery("SELECT * FROM messages WHERE id=%s" % mid) + try: + db.getNext()[0] + return True + except: + return False + def dbHelper(): if setuptools.dbtype() == SQLITE: return dbObject(dbtype=SQLITE, path=setuptools.dbpath()) diff --git a/messages.py b/messages.py index 5446aac..1c044b0 100755 --- a/messages.py +++ b/messages.py @@ -1,11 +1,13 @@ #!/usr/bin/env python3 -from BeautifulSoup import BeautifulSoup +from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoSuchElementException import urllib.request, urllib.error, urllib.parse, time, os -import setuptools +import dbtools, setuptools + +currentRun = [] def status(driver): if "/main/login.php" not in driver.page_source: @@ -13,14 +15,15 @@ def status(driver): else: return False -def loadPage(url,period=5,init=False, driver=driver): +def loadPage(url, driver, period=5,init=False): if not (init or status(driver)): - login() + login(driver) driver.get(url) time.sleep(period) -def loginHandler(user = setuptools.user, password = setuptools.password, driver = driver): - loadPage("http://www.planetromeo.com/",10,True,driver) +def loginHandler(driver, user = setuptools.user(), password = setuptools.password()): + loadPage("https://www.planetromeo.com/",driver,3,True) + loadPage("https://www.planetromeo.com/main/login.php",driver,3,True) curfield = driver.find_element_by_name("username") curfield.send_keys(user) @@ -29,59 +32,89 @@ def loginHandler(user = setuptools.user, password = setuptools.password, driver curfield.send_keys(password) curfield.send_keys(Keys.RETURN) - time.sleep(10) + time.sleep(3) - return status() + return status(driver) class LoginError(Exception): pass -def login(): - if not (status() or loginHandler()): +def login(driver): + if not (status(driver) or loginHandler(driver)): raise LoginError("Login failed.") return True def messageID(url): - return url.split("=")[1] + return url.split("=")[-1] -def messageHandler(mid, driver): - loadPage("https://www.planetromeo.com/msg/?id=" + mid, driver=driver) +def messageHandler(sender, recipient, mid, date, driver, mode = 0, db = dbtools.dbHelper()): + global currentRun + if mode == 0: + loadPage("https://www.planetromeo.com/msg/?id=" + mid, driver) + else: + loadPage("https://www.planetromeo.com/msg/?type=sent&id=" + mid, driver) + juha = BeautifulSoup(driver.page_source, "html5lib") + text = juha.select("div.msg div")[0] + + db.executeQuery("INSERT INTO messages(id, text, sender_id, recipient_id, created_at) VALUES('%s', '%s', '%s', '%s', '%s');" % (mid, setuptools.unescapeText(text.string or "").strip(), sender, recipient, date)) + db.commit() try: - links = driver.find_elements_by_partial_link_text('pix/popup.php/') + links = juha.findAll("a") for link in links: - phototools.processURL(link.get_attribute('href'), mid) - except NoSuchElementException as e: - pass + if "/pix/popup.php/" in link["href"]: + phototools.processURL(link["href"], sender) + except: + pass -def pageHandler(driver): - webpage = driver.page_source - links = BeautifulSoup(webpage).findAll('a') + currentRun += [mid] + +def pageHandler(driver, db = dbtools.dbHelper()): + global currentRun count = 0 + juha = BeautifulSoup(driver.page_source, "html5lib") - for l in links: - url = l['href'] - if "/msg/?id=" in url: - count += 1 - mid = messageID(url) - if mid <= dbtools.getLatestMessage() - return False - messageHandler(mid, driver) + mode = 0 + if "sent" in driver.current_url: + mode = 1 + + try: + for msg in juha.select("table.messageCenter tr")[1:]: + try: + data = msg.findAll('td') + user = data[1].string + mid = messageID(data[2].find("a")["href"]) + date = data[3].string + if not db.checkID(mid): + if mode == 1: + messageHandler(user, setuptools.user(), mid, date, driver, mode, db) + else: + messageHandler(setuptools.user(), user, mid, date, driver, mode, db) + count += 1 + except IndexError: + pass + except IndexError: + return False if count == 0: return False return True -def siteHandler(p = 0, driver = driver): - loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?seite=" + str(p), driver=driver) - if pageHandler(driver): - siteHandler(p+1, driver) +def siteHandler(driver, mode = 0, p = 0, db = dbtools.dbHelper()): + if mode == 0: + loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=all&seite=" + str(p), driver) + else: + loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=sent&seite=" + str(p), driver) + if pageHandler(driver, db): + siteHandler(driver, mode, p+1, db) if __name__ == "__main__": + db = dbtools.dbHelper() driver = webdriver.Firefox() - if login(driver): - siteHandler(driver=driver) + if loginHandler(driver): + siteHandler(driver, db=db) + siteHandler(driver, 1, db=db) print("KTHXBAI")