2016-08-06 13:03:39 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
2016-08-06 23:46:20 +00:00
|
|
|
from bs4 import BeautifulSoup
|
2016-08-06 13:03:39 +00:00
|
|
|
from selenium import webdriver
|
|
|
|
from selenium.webdriver.common.keys import Keys
|
|
|
|
from selenium.common.exceptions import NoSuchElementException
|
2016-08-07 19:59:23 +00:00
|
|
|
import multiprocessing, urllib.request, urllib.error, urllib.parse, time, os
|
2016-08-07 15:39:42 +00:00
|
|
|
import dbtools, phototools, setuptools
|
2016-08-06 23:46:20 +00:00
|
|
|
|
|
|
|
currentRun = []
|
2016-08-07 21:14:58 +00:00
|
|
|
rec = 0
|
|
|
|
snt = 0
|
2016-08-06 13:03:39 +00:00
|
|
|
|
|
|
|
def status(driver):
|
|
|
|
if "/main/login.php" not in driver.page_source:
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
2016-08-06 23:46:20 +00:00
|
|
|
def loadPage(url, driver, period=5,init=False):
|
2016-08-06 13:03:39 +00:00
|
|
|
if not (init or status(driver)):
|
2016-08-06 23:46:20 +00:00
|
|
|
login(driver)
|
2016-08-06 13:03:39 +00:00
|
|
|
driver.get(url)
|
|
|
|
time.sleep(period)
|
|
|
|
|
2016-08-06 23:46:20 +00:00
|
|
|
def loginHandler(driver, user = setuptools.user(), password = setuptools.password()):
|
|
|
|
loadPage("https://www.planetromeo.com/",driver,3,True)
|
|
|
|
loadPage("https://www.planetromeo.com/main/login.php",driver,3,True)
|
2016-08-06 13:03:39 +00:00
|
|
|
|
|
|
|
curfield = driver.find_element_by_name("username")
|
|
|
|
curfield.send_keys(user)
|
|
|
|
|
|
|
|
curfield = driver.find_element_by_name("passwort")
|
|
|
|
curfield.send_keys(password)
|
|
|
|
|
|
|
|
curfield.send_keys(Keys.RETURN)
|
2016-08-06 23:46:20 +00:00
|
|
|
time.sleep(3)
|
2016-08-06 13:03:39 +00:00
|
|
|
|
2016-08-06 23:46:20 +00:00
|
|
|
return status(driver)
|
2016-08-06 13:03:39 +00:00
|
|
|
|
|
|
|
class LoginError(Exception):
|
|
|
|
pass
|
|
|
|
|
2016-08-06 23:46:20 +00:00
|
|
|
def login(driver):
|
|
|
|
if not (status(driver) or loginHandler(driver)):
|
2016-08-06 13:03:39 +00:00
|
|
|
raise LoginError("Login failed.")
|
|
|
|
return True
|
|
|
|
|
|
|
|
def messageID(url):
|
2016-08-06 23:46:20 +00:00
|
|
|
return url.split("=")[-1]
|
|
|
|
|
|
|
|
def messageHandler(sender, recipient, mid, date, driver, mode = 0, db = dbtools.dbHelper()):
|
|
|
|
global currentRun
|
|
|
|
if mode == 0:
|
|
|
|
loadPage("https://www.planetromeo.com/msg/?id=" + mid, driver)
|
|
|
|
else:
|
|
|
|
loadPage("https://www.planetromeo.com/msg/?type=sent&id=" + mid, driver)
|
|
|
|
juha = BeautifulSoup(driver.page_source, "html5lib")
|
|
|
|
text = juha.select("div.msg div")[0]
|
2016-08-06 13:03:39 +00:00
|
|
|
|
2017-01-29 22:09:16 +00:00
|
|
|
db.executeQuery("INSERT INTO messages(id, text, sender_id, recipient_id, created_at) VALUES('%s', '%s', '%s', '%s', '%s');" % (mid, setuptools.unescapeText(text.text).strip(), sender, recipient, date))
|
2016-08-06 23:46:20 +00:00
|
|
|
db.commit()
|
2016-08-06 13:03:39 +00:00
|
|
|
|
|
|
|
try:
|
2016-08-06 23:46:20 +00:00
|
|
|
links = juha.findAll("a")
|
2016-08-06 13:03:39 +00:00
|
|
|
for link in links:
|
2016-08-06 23:46:20 +00:00
|
|
|
if "/pix/popup.php/" in link["href"]:
|
|
|
|
phototools.processURL(link["href"], sender)
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
|
|
|
currentRun += [mid]
|
2016-08-06 13:03:39 +00:00
|
|
|
|
2016-08-06 23:46:20 +00:00
|
|
|
def pageHandler(driver, db = dbtools.dbHelper()):
|
2016-08-07 21:14:58 +00:00
|
|
|
global currentRun, rec, snt
|
2016-08-06 13:03:39 +00:00
|
|
|
count = 0
|
2016-08-06 23:46:20 +00:00
|
|
|
juha = BeautifulSoup(driver.page_source, "html5lib")
|
2016-08-06 13:03:39 +00:00
|
|
|
|
2016-08-06 23:46:20 +00:00
|
|
|
mode = 0
|
|
|
|
if "sent" in driver.current_url:
|
|
|
|
mode = 1
|
|
|
|
|
|
|
|
try:
|
|
|
|
for msg in juha.select("table.messageCenter tr")[1:]:
|
|
|
|
try:
|
|
|
|
data = msg.findAll('td')
|
|
|
|
user = data[1].string
|
|
|
|
mid = messageID(data[2].find("a")["href"])
|
|
|
|
date = data[3].string
|
|
|
|
if not db.checkID(mid):
|
2016-08-07 15:39:42 +00:00
|
|
|
if mode == 0:
|
2016-08-06 23:46:20 +00:00
|
|
|
messageHandler(user, setuptools.user(), mid, date, driver, mode, db)
|
2016-08-07 21:14:58 +00:00
|
|
|
rec += 1
|
2016-08-06 23:46:20 +00:00
|
|
|
else:
|
|
|
|
messageHandler(setuptools.user(), user, mid, date, driver, mode, db)
|
2016-08-07 21:14:58 +00:00
|
|
|
snt += 1
|
2016-08-06 23:46:20 +00:00
|
|
|
count += 1
|
|
|
|
except IndexError:
|
|
|
|
pass
|
|
|
|
except IndexError:
|
|
|
|
return False
|
2016-08-06 13:03:39 +00:00
|
|
|
|
|
|
|
if count == 0:
|
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
2016-08-06 23:46:20 +00:00
|
|
|
def siteHandler(driver, mode = 0, p = 0, db = dbtools.dbHelper()):
|
|
|
|
if mode == 0:
|
|
|
|
loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=all&seite=" + str(p), driver)
|
|
|
|
else:
|
|
|
|
loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=sent&seite=" + str(p), driver)
|
|
|
|
if pageHandler(driver, db):
|
|
|
|
siteHandler(driver, mode, p+1, db)
|
2016-08-06 13:03:39 +00:00
|
|
|
|
2016-08-07 19:59:23 +00:00
|
|
|
def mainHandler():
|
|
|
|
siteHandler(driver, db=db)
|
|
|
|
siteHandler(driver, 1, db=db)
|
|
|
|
|
2016-08-06 13:03:39 +00:00
|
|
|
if __name__ == "__main__":
|
2016-08-06 23:46:20 +00:00
|
|
|
db = dbtools.dbHelper()
|
2016-08-06 13:03:39 +00:00
|
|
|
driver = webdriver.Firefox()
|
2016-08-06 23:46:20 +00:00
|
|
|
if loginHandler(driver):
|
2016-08-07 19:59:23 +00:00
|
|
|
p = multiprocessing.Process(target=mainHandler)
|
|
|
|
p.start()
|
|
|
|
p.join(1500)
|
|
|
|
if p.is_alive():
|
|
|
|
p.terminate()
|
|
|
|
p.join()
|
2016-08-06 13:03:39 +00:00
|
|
|
driver.close()
|
2016-08-07 21:14:58 +00:00
|
|
|
print("Received", rec, "messages")
|
|
|
|
print("Sent", snt, "messages")
|