Loads of changes. Filling the database seems to work now.
This commit is contained in:
parent
59fe1a23f4
commit
be09282609
2 changed files with 80 additions and 36 deletions
|
@ -63,13 +63,24 @@ class dbObject:
|
|||
except:
|
||||
return False
|
||||
|
||||
def getLatestMessage(db):
|
||||
db.executeQuery("SELECT max(id) FROM messages")
|
||||
def getLatestMessage(db, mode = 0, user = setuptools.user()):
|
||||
if mode == 0:
|
||||
db.executeQuery("SELECT max(id) FROM messages WHERE recipient_id='%s'" % user)
|
||||
else:
|
||||
db.executeQuery("SELECT max(id) FROM messages WHERE sender_id='%s'" % user)
|
||||
try:
|
||||
return int(db.getNext()[0])
|
||||
except:
|
||||
return 0
|
||||
|
||||
def checkID(db, mid):
|
||||
db.executeQuery("SELECT * FROM messages WHERE id=%s" % mid)
|
||||
try:
|
||||
db.getNext()[0]
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def dbHelper():
|
||||
if setuptools.dbtype() == SQLITE:
|
||||
return dbObject(dbtype=SQLITE, path=setuptools.dbpath())
|
||||
|
|
101
messages.py
101
messages.py
|
@ -1,11 +1,13 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
from bs4 import BeautifulSoup
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.common.exceptions import NoSuchElementException
|
||||
import urllib.request, urllib.error, urllib.parse, time, os
|
||||
import setuptools
|
||||
import dbtools, setuptools
|
||||
|
||||
currentRun = []
|
||||
|
||||
def status(driver):
|
||||
if "/main/login.php" not in driver.page_source:
|
||||
|
@ -13,14 +15,15 @@ def status(driver):
|
|||
else:
|
||||
return False
|
||||
|
||||
def loadPage(url,period=5,init=False, driver=driver):
|
||||
def loadPage(url, driver, period=5,init=False):
|
||||
if not (init or status(driver)):
|
||||
login()
|
||||
login(driver)
|
||||
driver.get(url)
|
||||
time.sleep(period)
|
||||
|
||||
def loginHandler(user = setuptools.user, password = setuptools.password, driver = driver):
|
||||
loadPage("http://www.planetromeo.com/",10,True,driver)
|
||||
def loginHandler(driver, user = setuptools.user(), password = setuptools.password()):
|
||||
loadPage("https://www.planetromeo.com/",driver,3,True)
|
||||
loadPage("https://www.planetromeo.com/main/login.php",driver,3,True)
|
||||
|
||||
curfield = driver.find_element_by_name("username")
|
||||
curfield.send_keys(user)
|
||||
|
@ -29,59 +32,89 @@ def loginHandler(user = setuptools.user, password = setuptools.password, driver
|
|||
curfield.send_keys(password)
|
||||
|
||||
curfield.send_keys(Keys.RETURN)
|
||||
time.sleep(10)
|
||||
time.sleep(3)
|
||||
|
||||
return status()
|
||||
return status(driver)
|
||||
|
||||
class LoginError(Exception):
|
||||
pass
|
||||
|
||||
def login():
|
||||
if not (status() or loginHandler()):
|
||||
def login(driver):
|
||||
if not (status(driver) or loginHandler(driver)):
|
||||
raise LoginError("Login failed.")
|
||||
return True
|
||||
|
||||
def messageID(url):
|
||||
return url.split("=")[1]
|
||||
return url.split("=")[-1]
|
||||
|
||||
def messageHandler(mid, driver):
|
||||
loadPage("https://www.planetromeo.com/msg/?id=" + mid, driver=driver)
|
||||
def messageHandler(sender, recipient, mid, date, driver, mode = 0, db = dbtools.dbHelper()):
|
||||
global currentRun
|
||||
if mode == 0:
|
||||
loadPage("https://www.planetromeo.com/msg/?id=" + mid, driver)
|
||||
else:
|
||||
loadPage("https://www.planetromeo.com/msg/?type=sent&id=" + mid, driver)
|
||||
juha = BeautifulSoup(driver.page_source, "html5lib")
|
||||
text = juha.select("div.msg div")[0]
|
||||
|
||||
db.executeQuery("INSERT INTO messages(id, text, sender_id, recipient_id, created_at) VALUES('%s', '%s', '%s', '%s', '%s');" % (mid, setuptools.unescapeText(text.string or "").strip(), sender, recipient, date))
|
||||
db.commit()
|
||||
|
||||
try:
|
||||
links = driver.find_elements_by_partial_link_text('pix/popup.php/')
|
||||
links = juha.findAll("a")
|
||||
for link in links:
|
||||
phototools.processURL(link.get_attribute('href'), mid)
|
||||
except NoSuchElementException as e:
|
||||
pass
|
||||
if "/pix/popup.php/" in link["href"]:
|
||||
phototools.processURL(link["href"], sender)
|
||||
except:
|
||||
pass
|
||||
|
||||
def pageHandler(driver):
|
||||
webpage = driver.page_source
|
||||
links = BeautifulSoup(webpage).findAll('a')
|
||||
currentRun += [mid]
|
||||
|
||||
def pageHandler(driver, db = dbtools.dbHelper()):
|
||||
global currentRun
|
||||
count = 0
|
||||
juha = BeautifulSoup(driver.page_source, "html5lib")
|
||||
|
||||
for l in links:
|
||||
url = l['href']
|
||||
if "/msg/?id=" in url:
|
||||
count += 1
|
||||
mid = messageID(url)
|
||||
if mid <= dbtools.getLatestMessage()
|
||||
return False
|
||||
messageHandler(mid, driver)
|
||||
mode = 0
|
||||
if "sent" in driver.current_url:
|
||||
mode = 1
|
||||
|
||||
try:
|
||||
for msg in juha.select("table.messageCenter tr")[1:]:
|
||||
try:
|
||||
data = msg.findAll('td')
|
||||
user = data[1].string
|
||||
mid = messageID(data[2].find("a")["href"])
|
||||
date = data[3].string
|
||||
if not db.checkID(mid):
|
||||
if mode == 1:
|
||||
messageHandler(user, setuptools.user(), mid, date, driver, mode, db)
|
||||
else:
|
||||
messageHandler(setuptools.user(), user, mid, date, driver, mode, db)
|
||||
count += 1
|
||||
except IndexError:
|
||||
pass
|
||||
except IndexError:
|
||||
return False
|
||||
|
||||
if count == 0:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def siteHandler(p = 0, driver = driver):
|
||||
loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?seite=" + str(p), driver=driver)
|
||||
if pageHandler(driver):
|
||||
siteHandler(p+1, driver)
|
||||
def siteHandler(driver, mode = 0, p = 0, db = dbtools.dbHelper()):
|
||||
if mode == 0:
|
||||
loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=all&seite=" + str(p), driver)
|
||||
else:
|
||||
loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=sent&seite=" + str(p), driver)
|
||||
if pageHandler(driver, db):
|
||||
siteHandler(driver, mode, p+1, db)
|
||||
|
||||
if __name__ == "__main__":
|
||||
db = dbtools.dbHelper()
|
||||
driver = webdriver.Firefox()
|
||||
if login(driver):
|
||||
siteHandler(driver=driver)
|
||||
if loginHandler(driver):
|
||||
siteHandler(driver, db=db)
|
||||
siteHandler(driver, 1, db=db)
|
||||
|
||||
print("KTHXBAI")
|
||||
|
||||
|
|
Loading…
Reference in a new issue