oebb_py/workers/conn.py

235 lines
7.4 KiB
Python
Raw Normal View History

2017-09-21 12:48:05 +00:00
from bs4 import BeautifulSoup
import datetime
import pytz
import threading
import queue
import sys
2017-09-21 12:48:05 +00:00
import workers.val
from classes import *
def getStation(name):
return list(workers.val.validateName(name))[0]
2017-09-21 12:48:05 +00:00
def getService(sid, lines, q, eq = None):
try:
dep = lines[0]
arr = lines[1]
det = lines[2]
2017-09-21 12:48:05 +00:00
depst = list(workers.val.validateName(dep.find("td", { "class": "station" }).findAll("a")[0].string))[0]
depdate = dep.find("td", { "class": "date" }).string.strip() or None
deptime = dep.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip()
2019-03-14 09:46:39 +00:00
depprog = dep.find("span", { "class": "prognosis" }).find("span").string.strip() if dep.find("span", { "class": "prognosis" }) and dep.find("span", { "class": "prognosis" }).find("span") else None
depplat = dep.find("td", { "class": "platform" }).find("span").string.strip() if dep.find("td", { "class": "platform" }) and dep.find("td", { "class": "platform" }).find("span") else None
walk = dep.find("img", { "class": "product" }).get("src") == "/img/vs_oebb/fuss_pic.gif"
name = dep.find("img", { "class": "product" }).get("alt") if not walk else "Walk"
2017-09-21 12:48:05 +00:00
arrst = list(workers.val.validateName(arr.find("td", { "class": "station" }).findAll("a")[0].string))[0]
arrdate = (arr.find("td", { "class": "date" }).find("span") or arr.find("td", { "class": "date" })).string.strip() or depdate
arrtime = arr.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip()
arrprog = (arr.find("span", { "class": "prognosis" }).find("span") or arr.find("span", { "class": "prognosis" })).string.strip() or None
arrplat = (arr.find("td", { "class": "platform" }).find("span") or arr.find("td", { "class": "platform" })).string.strip() or None
2017-09-21 12:48:05 +00:00
if arrdate and not depdate:
arrdts = datetime.datetime.strptime(arrdate, "%d.%m.%Y")
depdts = arrdts - datetime.timedelta(days=1)
depdate = datetime.datetime.strftime(depdts, "%d.%m.%Y")
2017-09-21 12:48:05 +00:00
dest = None
if not (walk or depdate):
2019-03-14 09:46:39 +00:00
try:
purl = dep.find("td", { "class": "product" }).find("a").get("href")
psource = HTTPClient().get(purl).text
zuppa = BeautifulSoup(psource, "html5lib")
depdate = zuppa.findAll("div", { "class": "block" })[1].text.strip()
arrdate = depdate
dest = list(workers.val.validateName(zuppa.findAll("div", { "class": "block" })[2].text.split(":")[1].strip()))[0]
except:
pass
if not walk and not depdate:
depdate = "01.01.2000"
arrdate = depdate
depts = datetime.datetime.strptime("%s %s" % (depdate, deptime), "%d.%m.%Y %H:%M")
arrts = datetime.datetime.strptime("%s %s" % (arrdate, arrtime), "%d.%m.%Y %H:%M")
depprog = deptime if depprog == "pünktlich" else depprog
arrprog = arrtime if arrprog == "pünktlich" else arrprog
svc = Service(name, depst, depts, arrst, arrts, dest, depplat, depprog, arrplat, arrprog)
q.put((sid, svc))
except Exception as e:
if eq:
eq.put(sys.exc_info())
raise
def getDetails(cid, url, q, via = [], eq = None):
try:
ssource = HTTPClient().get(url).text
suppe = BeautifulSoup(ssource, "html5lib")
cont = suppe.find("tr", id="trC0-%i" % cid)
if not cont:
return
# buyurl = None
2017-09-24 20:59:51 +00:00
#
# for url in cont.findAll("a"):
# if url.get("href") and "https://tickets.oebb.at/de/ticket/ticket?" in url.get("href"):
# buyurl = url.get("href")
conn = Connection(True)
for vst in via:
conn.addVia(vst)
lines = cont.findAll("tr", { "class": "tpDetails" })[1:]
threads = []
iq = queue.PriorityQueue()
for line in range(0, len(lines), 3):
t = threading.Thread(target=getService, args=(line, lines[line:line + 3], iq, eq), daemon = True)
t.start()
threads += [t]
for t in threads:
t.join()
2017-10-26 20:00:43 +00:00
wdate = None
while not iq.empty():
2017-10-26 20:00:43 +00:00
svc = iq.get()[1]
if not wdate or svc.arrtime > wdate:
wdate = svc.arrtime
elif svc.deptime < wdate:
ttime0 = datetime.datetime(wdate.year, wdate.month, wdate.day)
ttime1 = ttime0 + datetime.timedelta(hours=svc.deptime.hour, minutes=svc.deptime.minute)
ttime2 = ttime0 + datetime.timedelta(hours=svc.arrtime.hour, minutes=svc.arrtime.minute)
if ttime1 < wdate:
ttime1 += datetime.timedelta(days=1)
ttime2 += datetime.timedelta(days=1)
if ttime1 > ttime2:
ttime2 += datetime.timedelta(days=1)
svc.deptime = ttime1
svc.arrtime = ttime2
conn.addService(svc)
q.put((cid, conn))
except:
if eq:
eq.put(sys.exc_info())
raise
def connRequest(frm, to, count = 3, time = datetime.datetime.now(), mode = False, details = False, via = []):
outdate = datetime.datetime.strftime(time, "%d.%m.%Y")
outtime = datetime.datetime.strftime(time, "%H:%M")
url = "http://fahrplan.oebb.at/bin/query.exe/dn?start=1&S=%s&Z=%s&REQ0JourneyDate=%s&time=%s&REQ0HafasNumCons0=%s%s" % (frm.extid if frm.extid else frm.name, to.extid if to.extid else to.name, outdate, outtime, count, "&timesel=arrive" if mode else "")
for i in range(len(via)):
url += "&REQ0JourneyStops%i.0G=%s&REQ0JourneyStops%i.0A=1" % (i + 1, via[i].extid if via[i].extid else via[i].name, i + 1)
source = HTTPClient().get(url).text
if "GO_conViewMode=outward" not in source:
raise ValueError("No connection found.")
juha = BeautifulSoup(source, "html5lib")
if details:
conns = []
for a in juha.findAll("a"):
if a.get("href") and "GO_conViewMode" in a.get("href"):
conns += [a.get("href")]
threads = []
eq = queue.Queue()
q = queue.PriorityQueue()
for i in range(len(conns)):
t = threading.Thread(target=getDetails, args=(i, conns[i], q, via, eq), daemon = True)
t.start()
threads += [t]
for t in threads:
t.join()
if not eq.empty():
exc = eq.get()
raise exc[1].with_traceback(exc[2])
while not q.empty():
yield q.get()[1]
else:
for i in range(0, count):
det = juha.find("tr", id="trOverviewC0-%i" % i)
if not det:
break
stations = det.find("td", { "class": "station" }).findAll("div")
depst = getStation(stations[0].text.strip())
arrst = getStation(stations[-1].text.strip())
2017-09-22 17:00:55 +00:00
dates = list(det.find("td", { "class": "date" }).strings)
depdate = dates[0]
try:
arrdate = dates[1]
except:
arrdate = depdate
times = det.find("div", { "class": "planed" }).text
deptime = times.split()[0]
arrtime = times.split()[2]
projections = det.find("div", { "class": "prognosis" })
curdep = None
curarr = None
depts = datetime.datetime.strptime("%s %s" % (depdate, deptime), "%d.%m.%Y %H:%M")
arrts = datetime.datetime.strptime("%s %s" % (arrdate, arrtime), "%d.%m.%Y %H:%M")
name = "/".join([img.get("title") for img in det.findAll("img", { "class": "product" })])
2017-09-25 19:43:02 +00:00
# ticketurl = det.find("td", { "class": "fares" }).find("a").get("href")
svc = Service(name, depst, depts, arrst, arrts, currdep = curdep, curarr = curarr)
2017-10-02 10:26:19 +00:00
conn = Connection(details)
2017-10-02 10:26:19 +00:00
for vst in via:
conn.addVia(vst)
2017-10-02 10:26:19 +00:00
conn.addService(svc)
yield conn
def worker(frm, to, count = 3, time = datetime.datetime.now(pytz.timezone("Europe/Vienna")), mode = False, details = False, json = False, via = None):
2017-10-02 19:31:06 +00:00
conns = list(connRequest(getStation(frm), getStation(to), count, time, mode, details, [getStation(vst) for vst in via] if via else []))
conns = conns[::-1] if mode else conns
output = """<?xml version="1.0" encoding="UTF-8"?>
<connections>
2017-09-25 19:43:02 +00:00
""" if not json else """{
\"connections\": [
"""
2017-09-25 19:43:02 +00:00
for i in range(len(conns)):
output += (conns[i].xml(1, i) + "\n") if not json else (conns[i].json(2, i) + ("\n" if i == len(conns) - 1 else ",\n"))
2017-09-25 19:43:02 +00:00
output += "</connections>" if not json else " ]\n}"
2017-09-21 12:48:05 +00:00
return output