2015-04-21 22:16:42 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import tools
|
|
|
|
|
2015-04-24 21:33:42 +00:00
|
|
|
import sys, datetime
|
2015-04-21 22:16:42 +00:00
|
|
|
|
2015-04-27 21:43:36 +00:00
|
|
|
def getDate(date):
|
2015-04-22 22:57:24 +00:00
|
|
|
try:
|
2015-04-27 21:43:36 +00:00
|
|
|
return datetime.datetime.strptime(date, '%Y-%m-%d')
|
2015-04-22 22:57:24 +00:00
|
|
|
except ValueError:
|
2015-04-27 21:43:36 +00:00
|
|
|
raise ValueError("Dates must be in YYYY-MM-DD format.")
|
2015-04-22 22:57:24 +00:00
|
|
|
|
2015-04-27 21:43:36 +00:00
|
|
|
def dateArgs(argv = sys.argv[1:]):
|
2015-04-22 22:57:24 +00:00
|
|
|
strings = []
|
2015-04-27 21:43:36 +00:00
|
|
|
fr = None
|
|
|
|
to = None
|
|
|
|
av = 0
|
2015-04-22 22:57:24 +00:00
|
|
|
|
|
|
|
mode = 0
|
|
|
|
|
2015-04-27 21:43:36 +00:00
|
|
|
for arg in argv:
|
|
|
|
if mode == 0 or (mode == 3 and not isinstance(arg,int)):
|
2015-04-22 22:57:24 +00:00
|
|
|
if arg == "-f":
|
|
|
|
mode = 1
|
|
|
|
elif arg == "-t":
|
|
|
|
mode = 2
|
2015-04-27 21:43:36 +00:00
|
|
|
elif arg == "-a":
|
|
|
|
mode = 3
|
2015-04-22 22:57:24 +00:00
|
|
|
else:
|
|
|
|
strings += [arg]
|
|
|
|
mode = 0
|
2015-04-27 21:43:36 +00:00
|
|
|
elif mode == 3:
|
|
|
|
av = arg
|
|
|
|
mode = 0
|
|
|
|
elif mode == 1:
|
|
|
|
fr = getDate(arg)
|
2015-04-22 22:57:24 +00:00
|
|
|
mode = 0
|
|
|
|
else:
|
2015-04-27 21:43:36 +00:00
|
|
|
to = getDate(arg)
|
|
|
|
mode = 0
|
2015-04-22 22:57:24 +00:00
|
|
|
|
2015-04-27 21:43:36 +00:00
|
|
|
if mode in (1, 2):
|
2015-04-22 22:57:24 +00:00
|
|
|
raise ValueError("Date missing.")
|
2015-04-27 21:43:36 +00:00
|
|
|
|
|
|
|
if to != None and fr != None and to < fr:
|
|
|
|
raise ValueError("From date must be before To date.")
|
|
|
|
|
|
|
|
return strings, fr, to, av
|
2015-04-22 22:57:24 +00:00
|
|
|
|
|
|
|
|
2015-04-27 21:43:36 +00:00
|
|
|
def queryBuilder(date, string = ""):
|
|
|
|
return "SELECT COUNT(*) FROM tweets WHERE SUBSTR(timestamp,0,11) = '%s' AND LOWER(text) LIKE '%%%s%%'" % (date, string)
|
2015-04-22 22:57:24 +00:00
|
|
|
|
2015-04-21 22:16:42 +00:00
|
|
|
|
2015-04-27 21:43:36 +00:00
|
|
|
def getFLDate(db, val = 0):
|
|
|
|
if val == 0:
|
|
|
|
mode = "MIN"
|
|
|
|
else:
|
|
|
|
mode = "MAX"
|
2015-04-21 22:16:42 +00:00
|
|
|
|
2015-04-27 21:43:36 +00:00
|
|
|
return getDate(str(db.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode)[0][0]))
|
2015-04-22 22:57:24 +00:00
|
|
|
|
|
|
|
|
2015-04-27 21:43:36 +00:00
|
|
|
def dateList(fr, to):
|
|
|
|
return [[(fr+datetime.timedelta(days=i)).strftime('%Y-%m-%d')] for i in range((to+datetime.timedelta(days=1)-fr).days)]
|
|
|
|
|
|
|
|
|
|
|
|
def fillList(db, string, cur, av):
|
|
|
|
for day in cur:
|
|
|
|
day += db.executeQuery(queryBuilder(day[0], string))[0]
|
|
|
|
|
|
|
|
return cur
|
2015-04-22 22:57:24 +00:00
|
|
|
|
|
|
|
|
2015-04-27 21:43:36 +00:00
|
|
|
def getHeaders(strings, av):
|
|
|
|
return [["Date", "Tweets"] + [string for string in strings]]
|
|
|
|
|
|
|
|
|
|
|
|
def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.config.dbpath, headers = False):
|
2015-04-21 22:16:42 +00:00
|
|
|
db = tools.dbHelper(path)
|
2015-04-27 21:43:36 +00:00
|
|
|
|
|
|
|
if fr == None:
|
|
|
|
fr = getFLDate(db)
|
|
|
|
if to == None:
|
|
|
|
to = getFLDate(db, 1)
|
|
|
|
|
|
|
|
cur = dateList(fr, to)
|
|
|
|
|
|
|
|
for string in [""] + strings:
|
|
|
|
cur = fillList(db, string, cur, av)
|
|
|
|
|
|
|
|
if headers:
|
|
|
|
cur = getHeaders(strings, av) + cur
|
|
|
|
|
|
|
|
return cur
|
2015-04-21 22:16:42 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2015-04-27 21:43:36 +00:00
|
|
|
tools.printCSV(getTweetsByDate(*dateArgs(), headers = True))
|