Prepare getdates for running average calculation
Include days with 0 tweets in getdates output
This commit is contained in:
parent
d854411884
commit
ca332951fa
1 changed files with 67 additions and 46 deletions
113
getdates.py
113
getdates.py
|
@ -4,74 +4,95 @@ import tools
|
|||
|
||||
import sys, datetime
|
||||
|
||||
def checkDate(date):
|
||||
def getDate(date):
|
||||
try:
|
||||
datetime.datetime.strptime(date, '%Y-%m-%d')
|
||||
return True
|
||||
return datetime.datetime.strptime(date, '%Y-%m-%d')
|
||||
except ValueError:
|
||||
return False
|
||||
raise ValueError("Dates must be in YYYY-MM-DD format.")
|
||||
|
||||
def dateArgs(argv = sys.argv):
|
||||
fr = ""
|
||||
to = ""
|
||||
def dateArgs(argv = sys.argv[1:]):
|
||||
strings = []
|
||||
fr = None
|
||||
to = None
|
||||
av = 0
|
||||
|
||||
mode = 0
|
||||
|
||||
for arg in argv[1:]:
|
||||
if mode == 0:
|
||||
for arg in argv:
|
||||
if mode == 0 or (mode == 3 and not isinstance(arg,int)):
|
||||
if arg == "-f":
|
||||
mode = 1
|
||||
elif arg == "-t":
|
||||
mode = 2
|
||||
elif arg == "-a":
|
||||
mode = 3
|
||||
else:
|
||||
strings += [arg]
|
||||
elif checkDate(arg):
|
||||
if mode == 1:
|
||||
fr = arg
|
||||
mode = 0
|
||||
else:
|
||||
to = arg
|
||||
elif mode == 3:
|
||||
av = arg
|
||||
mode = 0
|
||||
elif mode == 1:
|
||||
fr = getDate(arg)
|
||||
mode = 0
|
||||
else:
|
||||
raise ValueError("Dates must be in YYYY-MM-DD format.")
|
||||
to = getDate(arg)
|
||||
mode = 0
|
||||
|
||||
if not mode == 0:
|
||||
if mode in (1, 2):
|
||||
raise ValueError("Date missing.")
|
||||
return strings, fr, to
|
||||
|
||||
if to != None and fr != None and to < fr:
|
||||
raise ValueError("From date must be before To date.")
|
||||
|
||||
return strings, fr, to, av
|
||||
|
||||
|
||||
def queryBuilder(strings = [], fr = "", to = ""):
|
||||
query = "SELECT SUBSTR(t.timestamp,0,11) AS 'Date', (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11)) AS 'Tweets'"
|
||||
|
||||
for string in strings:
|
||||
query += ", (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11) AND LOWER(e.text) LIKE '%" + string.lower() + "%') AS '" + string + "'"
|
||||
|
||||
query += "FROM tweets t "
|
||||
status = 0
|
||||
|
||||
if not fr == "":
|
||||
status += 1
|
||||
|
||||
if not to == "":
|
||||
status += 2
|
||||
|
||||
if status > 0:
|
||||
query += "WHERE "
|
||||
if status in (1,3):
|
||||
query += "SUBSTR(t.timestamp,0,11) >= '" + fr + "' "
|
||||
if status == 3:
|
||||
query += "AND "
|
||||
if status > 1:
|
||||
query += "SUBSTR(t.timestamp,0,11) <= '" + to + "' "
|
||||
|
||||
return query + "GROUP BY SUBSTR(t.timestamp,0,11)"
|
||||
def queryBuilder(date, string = ""):
|
||||
return "SELECT COUNT(*) FROM tweets WHERE SUBSTR(timestamp,0,11) = '%s' AND LOWER(text) LIKE '%%%s%%'" % (date, string)
|
||||
|
||||
|
||||
def getTweetsByDate(strings = [], path = tools.config.dbpath, fr = "", to = ""):
|
||||
def getFLDate(db, val = 0):
|
||||
if val == 0:
|
||||
mode = "MIN"
|
||||
else:
|
||||
mode = "MAX"
|
||||
|
||||
return getDate(str(db.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode)[0][0]))
|
||||
|
||||
|
||||
def dateList(fr, to):
|
||||
return [[(fr+datetime.timedelta(days=i)).strftime('%Y-%m-%d')] for i in range((to+datetime.timedelta(days=1)-fr).days)]
|
||||
|
||||
|
||||
def fillList(db, string, cur, av):
|
||||
for day in cur:
|
||||
day += db.executeQuery(queryBuilder(day[0], string))[0]
|
||||
|
||||
return cur
|
||||
|
||||
|
||||
def getHeaders(strings, av):
|
||||
return [["Date", "Tweets"] + [string for string in strings]]
|
||||
|
||||
|
||||
def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.config.dbpath, headers = False):
|
||||
db = tools.dbHelper(path)
|
||||
return list(db.executeQuery(queryBuilder(strings,fr,to)))
|
||||
|
||||
if fr == None:
|
||||
fr = getFLDate(db)
|
||||
if to == None:
|
||||
to = getFLDate(db, 1)
|
||||
|
||||
cur = dateList(fr, to)
|
||||
|
||||
for string in [""] + strings:
|
||||
cur = fillList(db, string, cur, av)
|
||||
|
||||
if headers:
|
||||
cur = getHeaders(strings, av) + cur
|
||||
|
||||
return cur
|
||||
|
||||
if __name__ == "__main__":
|
||||
strings, fr, to = dateArgs()
|
||||
tools.printCSV([["Date", "Tweets"] + [s for s in strings]] + getTweetsByDate(strings = strings, fr = fr, to = to))
|
||||
tools.printCSV(getTweetsByDate(*dateArgs(), headers = True))
|
||||
|
|
Loading…
Reference in a new issue