Check in CSV generator and Handle Extractor (basically the same thing in two different versions)

This commit is contained in:
Klaus-Uwe Mitterer 2015-04-13 00:08:33 +02:00
commit cf2e246db1
2 changed files with 44 additions and 0 deletions

16
datecsv.sh Executable file
View file

@ -0,0 +1,16 @@
#!/bin/bash
DATAFILE=Database.db
OUTFILE=output.csv
SQLITE="sqlite3 -csv -header"
QUERY="SELECT SUBSTR(t.timestamp,0,11) AS 'Date', (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11)) AS 'Tweets'"
for i in $@;
do QUERY="$QUERY, (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11) AND LOWER(e.text) LIKE '%${i,,}%') AS '$i'"
done
QUERY="$QUERY FROM tweets t GROUP BY SUBSTR(t.timestamp,0,11);"
$SQLITE $DATAFILE "$QUERY" > $OUTFILE

28
gethandles.py Executable file
View file

@ -0,0 +1,28 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import operator, re, sqlite3
def getTweets(database_filename = "Database.db"):
sql_conn = sqlite3.connect(database_filename)
cur = sql_conn.cursor()
handles = dict()
tweets = cur.execute("SELECT text FROM tweets")
for tweet in tweets:
for word in tweet[0].split():
if word[0] == "@":
handle = "@" + re.split('[\\W]',word[1:])[0].lower()
if handle != "@":
try:
handles[handle] += 1
except KeyError:
handles[handle] = 1
return handles
if __name__ == "__main__":
data = sorted(getTweets().items(), key=operator.itemgetter(1), reverse=True)
for handle, tweets in data:
print handle + "," + str(tweets)