Allow date filtering in datecsv using -f and -t flags (from and to)
This commit is contained in:
parent
591e9bf562
commit
f83c69f902
1 changed files with 64 additions and 11 deletions
75
datecsv.py
75
datecsv.py
|
@ -3,27 +3,80 @@
|
||||||
|
|
||||||
import tools
|
import tools
|
||||||
|
|
||||||
import csv, sys
|
import csv, sys, datetime
|
||||||
|
|
||||||
def queryBuilder(strings = []):
|
def checkDate(date):
|
||||||
|
try:
|
||||||
|
datetime.datetime.strptime(date, '%Y-%m-%d')
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def dateArgs(argv = sys.argv):
|
||||||
|
fr = ""
|
||||||
|
to = ""
|
||||||
|
strings = []
|
||||||
|
|
||||||
|
mode = 0
|
||||||
|
|
||||||
|
for arg in argv[1:]:
|
||||||
|
if mode == 0:
|
||||||
|
if arg == "-f":
|
||||||
|
mode = 1
|
||||||
|
elif arg == "-t":
|
||||||
|
mode = 2
|
||||||
|
else:
|
||||||
|
strings += [arg]
|
||||||
|
elif checkDate(arg):
|
||||||
|
if mode == 1:
|
||||||
|
fr = arg
|
||||||
|
mode = 0
|
||||||
|
else:
|
||||||
|
to = arg
|
||||||
|
mode = 0
|
||||||
|
else:
|
||||||
|
raise ValueError("Dates must be in YYYY-MM-DD format.")
|
||||||
|
|
||||||
|
if not mode == 0:
|
||||||
|
raise ValueError("Date missing.")
|
||||||
|
return strings, fr, to
|
||||||
|
|
||||||
|
|
||||||
|
def queryBuilder(strings = [], fr = "", to = ""):
|
||||||
query = "SELECT SUBSTR(t.timestamp,0,11) AS 'Date', (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11)) AS 'Tweets'"
|
query = "SELECT SUBSTR(t.timestamp,0,11) AS 'Date', (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11)) AS 'Tweets'"
|
||||||
|
|
||||||
for string in strings:
|
for string in strings:
|
||||||
query += ", (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11) AND LOWER(e.text) LIKE '%" + string.lower() + "%') AS '" + string + "'"
|
query += ", (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11) AND LOWER(e.text) LIKE '%" + string.lower() + "%') AS '" + string + "'"
|
||||||
return query + "FROM tweets t GROUP BY SUBSTR(t.timestamp,0,11)"
|
|
||||||
|
query += "FROM tweets t "
|
||||||
|
status = 0
|
||||||
|
|
||||||
|
if not fr == "":
|
||||||
|
status += 1
|
||||||
|
|
||||||
|
if not to == "":
|
||||||
|
status += 2
|
||||||
|
|
||||||
|
if status > 0:
|
||||||
|
query += "WHERE "
|
||||||
|
if status in (1,3):
|
||||||
|
query += "SUBSTR(t.timestamp,0,11) >= '" + fr + "' "
|
||||||
|
if status == 3:
|
||||||
|
query += "AND "
|
||||||
|
if status > 1:
|
||||||
|
query += "SUBSTR(t.timestamp,0,11) <= '" + to + "' "
|
||||||
|
|
||||||
|
return query + "GROUP BY SUBSTR(t.timestamp,0,11)"
|
||||||
|
|
||||||
|
|
||||||
def getTweetsByDate(strings = [], path = tools.config.dbpath):
|
def getTweetsByDate(strings = [], path = tools.config.dbpath, fr = "", to = ""):
|
||||||
db = tools.dbHelper(path)
|
db = tools.dbHelper(path)
|
||||||
tweets = db.executeQuery(queryBuilder(strings))
|
tweets = db.executeQuery(queryBuilder(strings,fr,to))
|
||||||
writer = csv.writer(sys.stdout)
|
writer = csv.writer(sys.stdout)
|
||||||
|
|
||||||
for day in tweets:
|
for day in tweets:
|
||||||
writer.writerow(day)
|
writer.writerow(day)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
strings, path = tools.parseArgs(sys.argv)
|
strings, fr, to = dateArgs()
|
||||||
|
getTweetsByDate(strings = strings, fr = fr, to = to)
|
||||||
if path == None:
|
|
||||||
path = tools.config.dbpath
|
|
||||||
|
|
||||||
getTweetsByDate(strings, path)
|
|
||||||
|
|
Loading…
Reference in a new issue