twitools/getdates.py

115 lines
2.1 KiB
Python
Raw Normal View History

2015-04-21 22:16:42 +00:00
#!/usr/bin/env python3
import tools
import sys, datetime
2015-04-21 22:16:42 +00:00
def dateArgs(argv = sys.argv[1:]):
strings = []
fr = None
to = None
av = 0
mode = 0
for arg in argv:
2015-04-28 19:56:42 +00:00
if mode == 0:
if arg == "-f":
mode = 1
elif arg == "-t":
mode = 2
elif arg == "-a":
mode = 3
else:
strings += [arg]
mode = 0
elif mode == 3:
2015-04-28 19:56:42 +00:00
try:
av = int(arg)
except ValueError:
raise ValueError("Number of days for running average must be an integer.")
mode = 0
elif mode == 1:
fr = tools.getDate(arg)
mode = 0
else:
to = tools.getDate(arg)
mode = 0
if mode in (1, 2):
raise ValueError("Date missing.")
if to != None and fr != None and to < fr:
raise ValueError("From date must be before To date.")
2015-04-28 19:56:42 +00:00
return strings, fr, to, av
def queryBuilder(date, string = ""):
return "SELECT COUNT(*) FROM tweets WHERE SUBSTR(timestamp,0,11) = '%s' AND LOWER(text) LIKE '%%%s%%'" % (date, string)
2015-04-21 22:16:42 +00:00
def dateList(fr, to):
return [[(fr+datetime.timedelta(days=i)).strftime('%Y-%m-%d')] for i in range((to+datetime.timedelta(days=1)-fr).days)]
2015-04-28 19:56:42 +00:00
def avg(list):
sum = 0
for i in list:
sum += i
return int(sum / len(list))
def fillAverage(cur, av):
vals = []
for day in cur:
vals = ([day[len(day)-1]] + vals)[:av]
day += [avg(vals)]
return cur
def fillList(db, string, cur, av):
for day in cur:
day += list(db.executeQuery(queryBuilder(day[0], string)))[0]
2015-04-28 19:56:42 +00:00
if not av == 0:
cur = fillAverage(cur, av)
return cur
def getHeaders(strings, av):
2015-04-28 19:56:42 +00:00
if av == 0:
return [["Date", "Tweets"] + [string for string in strings]]
headers = ["Date", "Tweets", "Average"]
for string in strings:
headers += [string, "Average " + string]
return [headers]
2015-10-10 22:10:57 +00:00
def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.dbpath(), headers = False):
2015-04-21 22:16:42 +00:00
db = tools.dbHelper(path)
if fr == None:
fr = db.getFLDate()
if to == None:
to = db.getFLDate(1)
cur = dateList(fr, to)
for string in [""] + strings:
cur = fillList(db, string, cur, av)
if headers:
cur = getHeaders(strings, av) + cur
return cur
2015-04-21 22:16:42 +00:00
if __name__ == "__main__":
tools.printCSV(getTweetsByDate(*dateArgs(), headers = True))