sbtools/handler.py

141 lines
4.2 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
import multiprocessing, urllib.request, urllib.error, urllib.parse, time, os
2016-08-07 15:39:42 +00:00
import dbtools, phototools, setuptools
def status(driver):
if "Unauthorised Access" not in driver.page_source and "/main/login.php" not in driver.page_source and len(driver.page_source) > 100:
return True
else:
return False
def loadPage(url, driver, period=5,init=False):
if not (init or status(driver)):
login(driver)
driver.get(url)
time.sleep(period)
def loginHandler(driver, user = setuptools.user(), password = setuptools.password()):
loadPage("https://classic.planetromeo.com/",driver,3,True)
loadPage("https://classic.planetromeo.com/main/login.php",driver,3,True)
curfield = driver.find_element_by_name("username")
curfield.send_keys(user)
curfield = driver.find_element_by_name("passwort")
curfield.send_keys(password)
curfield.send_keys(Keys.RETURN)
time.sleep(3)
return status(driver)
class LoginError(Exception):
pass
def login(driver):
if not (status(driver) or loginHandler(driver)):
raise LoginError("Login failed.")
return True
def messageID(url):
return url.split("=")[-1]
2017-01-30 23:52:56 +00:00
def quickshareHandler(driver, url, sender):
nurl = "https://classic.planetromeo.com/" + url if "planetromeo.com" not in url else url
2017-01-30 23:52:56 +00:00
loadPage(nurl)
juha = BeautifulSoup(driver.page_source, "html5lib")
try:
links = juha.findAll("a")
for link in links:
try:
purl = "https://classic.planetromeo.com/" + link["data-pic"] if "planetromeo.com" not in link["data-pic"] else link["data-pic"]
2017-01-30 23:52:56 +00:00
phototools.processURL(purl, sender, shutup=True)
except:
pass
except:
pass
def messageHandler(sender, recipient, mid, date, driver, mode = 0, db = dbtools.dbHelper()):
if mode == 0:
loadPage("https://classic.planetromeo.com/msg/?id=" + mid, driver)
else:
loadPage("https://classic.planetromeo.com/msg/?type=sent&id=" + mid, driver)
juha = BeautifulSoup(driver.page_source, "html5lib")
text = juha.select("div.msg div")[0]
db.executeQuery("INSERT INTO messages(id, text, sender_id, recipient_id, created_at) VALUES('%s', '%s', '%s', '%s', '%s');" % (mid, setuptools.unescapeText(text.text).strip(), sender, recipient, date))
db.commit()
try:
links = juha.findAll("a")
for link in links:
if "/pix/popup.php/" in link["href"]:
phototools.processURL(link["href"], sender)
2017-01-30 23:52:56 +00:00
try:
db.executeQuery("INSERT INTO photos(mid, pid) VALUES('%s', '%s');" % (mid, phototools.parseurl(link["href"]).split('/')[-1]))
except:
pass
if "/quickshare/" in link["href"]:
quickshareHandler(driver, link["href"], sender)
except:
pass
def pageHandler(driver, db = dbtools.dbHelper()):
2017-01-30 23:52:56 +00:00
mode = 0
abort = True
juha = BeautifulSoup(driver.page_source, "html5lib")
if "sent" in driver.current_url:
mode = 1
try:
for msg in juha.select("table.messageCenter tr")[1:]:
try:
data = msg.findAll('td')
user = data[1].string
mid = messageID(data[2].find("a")["href"])
date = data[3].string
if not db.checkID(mid):
2017-01-30 23:52:56 +00:00
abort = False
2016-08-07 15:39:42 +00:00
if mode == 0:
messageHandler(user, setuptools.user(), mid, date, driver, mode, db)
else:
messageHandler(setuptools.user(), user, mid, date, driver, mode, db)
except IndexError:
pass
except IndexError:
return False
2017-01-30 23:52:56 +00:00
if abort:
return False
2017-01-30 23:52:56 +00:00
else:
return True
def siteHandler(driver, mode = 0, p = 0, db = dbtools.dbHelper()):
if mode == 0:
loadPage("https://classic.planetromeo.com/mitglieder/messages/uebersicht.php?view=all&seite=" + str(p), driver)
else:
loadPage("https://classic.planetromeo.com/mitglieder/messages/uebersicht.php?view=sent&seite=" + str(p), driver)
if pageHandler(driver, db):
siteHandler(driver, mode, p+1, db)
2017-01-30 23:52:56 +00:00
2017-05-01 21:50:13 +00:00
def mainHandler(driver, db):
loginHandler(driver)
siteHandler(driver, 0, db=db)
siteHandler(driver, 1, db=db)
if __name__ == "__main__":
db = dbtools.dbHelper()
caps = webdriver.DesiredCapabilities().PHANTOMJS.copy()
caps["phantoms.page.settings.userAgent"] = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0"
driver = webdriver.PhantomJS(desired_capabilities=caps)
mainHandler(driver, db)
driver.close()