from bs4 import BeautifulSoup import datetime import pytz import threading import queue import sys import workers.val from classes import * def getStation(name): return list(workers.val.validateName(name))[0] def getService(sid, lines, q, eq = None): try: dep = lines[0] arr = lines[1] det = lines[2] depst = list(workers.val.validateName(dep.find("td", { "class": "station" }).findAll("a")[0].string))[0] depdate = dep.find("td", { "class": "date" }).string.strip() or None deptime = dep.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip() depprog = dep.find("span", { "class": "prognosis" }).find("span").string.strip() if dep.find("span", { "class": "prognosis" }) and dep.find("span", { "class": "prognosis" }).find("span") else None depplat = dep.find("td", { "class": "platform" }).find("span").string.strip() if dep.find("td", { "class": "platform" }) and dep.find("td", { "class": "platform" }).find("span") else None walk = dep.find("img", { "class": "product" }).get("src") == "/img/vs_oebb/fuss_pic.gif" name = dep.find("img", { "class": "product" }).get("alt") if not walk else "Walk" arrst = list(workers.val.validateName(arr.find("td", { "class": "station" }).findAll("a")[0].string))[0] arrdate = (arr.find("td", { "class": "date" }).find("span") or arr.find("td", { "class": "date" })).string.strip() or depdate arrtime = arr.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip() arrprog = (arr.find("span", { "class": "prognosis" }).find("span") or arr.find("span", { "class": "prognosis" })).string.strip() or None arrplat = (arr.find("td", { "class": "platform" }).find("span") or arr.find("td", { "class": "platform" })).string.strip() or None if arrdate and not depdate: arrdts = datetime.datetime.strptime(arrdate, "%d.%m.%Y") depdts = arrdts - datetime.timedelta(days=1) depdate = datetime.datetime.strftime(depdts, "%d.%m.%Y") dest = None if not (walk or depdate): try: purl = dep.find("td", { "class": "product" }).find("a").get("href") psource = HTTPClient().get(purl).text zuppa = BeautifulSoup(psource, "html5lib") depdate = zuppa.findAll("div", { "class": "block" })[1].text.strip() arrdate = depdate dest = list(workers.val.validateName(zuppa.findAll("div", { "class": "block" })[2].text.split(":")[1].strip()))[0] except: pass if not walk and not depdate: depdate = "01.01.2000" arrdate = depdate depts = datetime.datetime.strptime("%s %s" % (depdate, deptime), "%d.%m.%Y %H:%M") arrts = datetime.datetime.strptime("%s %s" % (arrdate, arrtime), "%d.%m.%Y %H:%M") depprog = deptime if depprog == "pünktlich" else depprog arrprog = arrtime if arrprog == "pünktlich" else arrprog svc = Service(name, depst, depts, arrst, arrts, dest, depplat, depprog, arrplat, arrprog) q.put((sid, svc)) except Exception as e: if eq: eq.put(sys.exc_info()) raise def getDetails(cid, url, q, via = [], eq = None): try: ssource = HTTPClient().get(url).text suppe = BeautifulSoup(ssource, "html5lib") cont = suppe.find("tr", id="trC0-%i" % cid) if not cont: return # buyurl = None # # for url in cont.findAll("a"): # if url.get("href") and "https://tickets.oebb.at/de/ticket/ticket?" in url.get("href"): # buyurl = url.get("href") conn = Connection(True) for vst in via: conn.addVia(vst) lines = cont.findAll("tr", { "class": "tpDetails" })[1:] threads = [] iq = queue.PriorityQueue() for line in range(0, len(lines), 3): t = threading.Thread(target=getService, args=(line, lines[line:line + 3], iq, eq), daemon = True) t.start() threads += [t] for t in threads: t.join() wdate = None while not iq.empty(): svc = iq.get()[1] if not wdate or svc.arrtime > wdate: wdate = svc.arrtime elif svc.deptime < wdate: ttime0 = datetime.datetime(wdate.year, wdate.month, wdate.day) ttime1 = ttime0 + datetime.timedelta(hours=svc.deptime.hour, minutes=svc.deptime.minute) ttime2 = ttime0 + datetime.timedelta(hours=svc.arrtime.hour, minutes=svc.arrtime.minute) if ttime1 < wdate: ttime1 += datetime.timedelta(days=1) ttime2 += datetime.timedelta(days=1) if ttime1 > ttime2: ttime2 += datetime.timedelta(days=1) svc.deptime = ttime1 svc.arrtime = ttime2 conn.addService(svc) q.put((cid, conn)) except: if eq: eq.put(sys.exc_info()) raise def connRequest(frm, to, count = 3, time = datetime.datetime.now(), mode = False, details = False, via = []): outdate = datetime.datetime.strftime(time, "%d.%m.%Y") outtime = datetime.datetime.strftime(time, "%H:%M") url = "http://fahrplan.oebb.at/bin/query.exe/dn?start=1&S=%s&Z=%s&REQ0JourneyDate=%s&time=%s&REQ0HafasNumCons0=%s%s" % (frm.extid if frm.extid else frm.name, to.extid if to.extid else to.name, outdate, outtime, count, "×el=arrive" if mode else "") for i in range(len(via)): url += "&REQ0JourneyStops%i.0G=%s&REQ0JourneyStops%i.0A=1" % (i + 1, via[i].extid if via[i].extid else via[i].name, i + 1) source = HTTPClient().get(url).text if "GO_conViewMode=outward" not in source: raise ValueError("No connection found.") juha = BeautifulSoup(source, "html5lib") if details: conns = [] for a in juha.findAll("a"): if a.get("href") and "GO_conViewMode" in a.get("href"): conns += [a.get("href")] threads = [] eq = queue.Queue() q = queue.PriorityQueue() for i in range(len(conns)): t = threading.Thread(target=getDetails, args=(i, conns[i], q, via, eq), daemon = True) t.start() threads += [t] for t in threads: t.join() if not eq.empty(): exc = eq.get() raise exc[1].with_traceback(exc[2]) while not q.empty(): yield q.get()[1] else: for i in range(0, count): det = juha.find("tr", id="trOverviewC0-%i" % i) if not det: break stations = det.find("td", { "class": "station" }).findAll("div") depst = getStation(stations[0].text.strip()) arrst = getStation(stations[-1].text.strip()) dates = list(det.find("td", { "class": "date" }).strings) depdate = dates[0] try: arrdate = dates[1] except: arrdate = depdate times = det.find("div", { "class": "planed" }).text deptime = times.split()[0] arrtime = times.split()[2] projections = det.find("div", { "class": "prognosis" }) curdep = None curarr = None depts = datetime.datetime.strptime("%s %s" % (depdate, deptime), "%d.%m.%Y %H:%M") arrts = datetime.datetime.strptime("%s %s" % (arrdate, arrtime), "%d.%m.%Y %H:%M") name = "/".join([img.get("title") for img in det.findAll("img", { "class": "product" })]) # ticketurl = det.find("td", { "class": "fares" }).find("a").get("href") svc = Service(name, depst, depts, arrst, arrts, currdep = curdep, curarr = curarr) conn = Connection(details) for vst in via: conn.addVia(vst) conn.addService(svc) yield conn def worker(frm, to, count = 3, time = datetime.datetime.now(pytz.timezone("Europe/Vienna")), mode = False, details = False, json = False, via = None): conns = list(connRequest(getStation(frm), getStation(to), count, time, mode, details, [getStation(vst) for vst in via] if via else [])) conns = conns[::-1] if mode else conns output = """ """ if not json else """{ \"connections\": [ """ for i in range(len(conns)): output += (conns[i].xml(1, i) + "\n") if not json else (conns[i].json(2, i) + ("\n" if i == len(conns) - 1 else ",\n")) output += "" if not json else " ]\n}" return output