Loads of changes. Filling the database seems to work now.

This commit is contained in:
Klaus-Uwe Mitterer 2016-08-07 01:46:20 +02:00
parent 59fe1a23f4
commit be09282609
2 changed files with 80 additions and 36 deletions

View file

@ -63,13 +63,24 @@ class dbObject:
except: except:
return False return False
def getLatestMessage(db): def getLatestMessage(db, mode = 0, user = setuptools.user()):
db.executeQuery("SELECT max(id) FROM messages") if mode == 0:
db.executeQuery("SELECT max(id) FROM messages WHERE recipient_id='%s'" % user)
else:
db.executeQuery("SELECT max(id) FROM messages WHERE sender_id='%s'" % user)
try: try:
return int(db.getNext()[0]) return int(db.getNext()[0])
except: except:
return 0 return 0
def checkID(db, mid):
db.executeQuery("SELECT * FROM messages WHERE id=%s" % mid)
try:
db.getNext()[0]
return True
except:
return False
def dbHelper(): def dbHelper():
if setuptools.dbtype() == SQLITE: if setuptools.dbtype() == SQLITE:
return dbObject(dbtype=SQLITE, path=setuptools.dbpath()) return dbObject(dbtype=SQLITE, path=setuptools.dbpath())

View file

@ -1,11 +1,13 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException from selenium.common.exceptions import NoSuchElementException
import urllib.request, urllib.error, urllib.parse, time, os import urllib.request, urllib.error, urllib.parse, time, os
import setuptools import dbtools, setuptools
currentRun = []
def status(driver): def status(driver):
if "/main/login.php" not in driver.page_source: if "/main/login.php" not in driver.page_source:
@ -13,14 +15,15 @@ def status(driver):
else: else:
return False return False
def loadPage(url,period=5,init=False, driver=driver): def loadPage(url, driver, period=5,init=False):
if not (init or status(driver)): if not (init or status(driver)):
login() login(driver)
driver.get(url) driver.get(url)
time.sleep(period) time.sleep(period)
def loginHandler(user = setuptools.user, password = setuptools.password, driver = driver): def loginHandler(driver, user = setuptools.user(), password = setuptools.password()):
loadPage("http://www.planetromeo.com/",10,True,driver) loadPage("https://www.planetromeo.com/",driver,3,True)
loadPage("https://www.planetromeo.com/main/login.php",driver,3,True)
curfield = driver.find_element_by_name("username") curfield = driver.find_element_by_name("username")
curfield.send_keys(user) curfield.send_keys(user)
@ -29,59 +32,89 @@ def loginHandler(user = setuptools.user, password = setuptools.password, driver
curfield.send_keys(password) curfield.send_keys(password)
curfield.send_keys(Keys.RETURN) curfield.send_keys(Keys.RETURN)
time.sleep(10) time.sleep(3)
return status() return status(driver)
class LoginError(Exception): class LoginError(Exception):
pass pass
def login(): def login(driver):
if not (status() or loginHandler()): if not (status(driver) or loginHandler(driver)):
raise LoginError("Login failed.") raise LoginError("Login failed.")
return True return True
def messageID(url): def messageID(url):
return url.split("=")[1] return url.split("=")[-1]
def messageHandler(mid, driver): def messageHandler(sender, recipient, mid, date, driver, mode = 0, db = dbtools.dbHelper()):
loadPage("https://www.planetromeo.com/msg/?id=" + mid, driver=driver) global currentRun
if mode == 0:
loadPage("https://www.planetromeo.com/msg/?id=" + mid, driver)
else:
loadPage("https://www.planetromeo.com/msg/?type=sent&id=" + mid, driver)
juha = BeautifulSoup(driver.page_source, "html5lib")
text = juha.select("div.msg div")[0]
db.executeQuery("INSERT INTO messages(id, text, sender_id, recipient_id, created_at) VALUES('%s', '%s', '%s', '%s', '%s');" % (mid, setuptools.unescapeText(text.string or "").strip(), sender, recipient, date))
db.commit()
try: try:
links = driver.find_elements_by_partial_link_text('pix/popup.php/') links = juha.findAll("a")
for link in links: for link in links:
phototools.processURL(link.get_attribute('href'), mid) if "/pix/popup.php/" in link["href"]:
except NoSuchElementException as e: phototools.processURL(link["href"], sender)
pass except:
pass
def pageHandler(driver): currentRun += [mid]
webpage = driver.page_source
links = BeautifulSoup(webpage).findAll('a') def pageHandler(driver, db = dbtools.dbHelper()):
global currentRun
count = 0 count = 0
juha = BeautifulSoup(driver.page_source, "html5lib")
for l in links: mode = 0
url = l['href'] if "sent" in driver.current_url:
if "/msg/?id=" in url: mode = 1
count += 1
mid = messageID(url) try:
if mid <= dbtools.getLatestMessage() for msg in juha.select("table.messageCenter tr")[1:]:
return False try:
messageHandler(mid, driver) data = msg.findAll('td')
user = data[1].string
mid = messageID(data[2].find("a")["href"])
date = data[3].string
if not db.checkID(mid):
if mode == 1:
messageHandler(user, setuptools.user(), mid, date, driver, mode, db)
else:
messageHandler(setuptools.user(), user, mid, date, driver, mode, db)
count += 1
except IndexError:
pass
except IndexError:
return False
if count == 0: if count == 0:
return False return False
return True return True
def siteHandler(p = 0, driver = driver): def siteHandler(driver, mode = 0, p = 0, db = dbtools.dbHelper()):
loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?seite=" + str(p), driver=driver) if mode == 0:
if pageHandler(driver): loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=all&seite=" + str(p), driver)
siteHandler(p+1, driver) else:
loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=sent&seite=" + str(p), driver)
if pageHandler(driver, db):
siteHandler(driver, mode, p+1, db)
if __name__ == "__main__": if __name__ == "__main__":
db = dbtools.dbHelper()
driver = webdriver.Firefox() driver = webdriver.Firefox()
if login(driver): if loginHandler(driver):
siteHandler(driver=driver) siteHandler(driver, db=db)
siteHandler(driver, 1, db=db)
print("KTHXBAI") print("KTHXBAI")