From cf2e246db1b78abf214be3053701379bdd9329a1 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 13 Apr 2015 00:08:33 +0200 Subject: [PATCH] Check in CSV generator and Handle Extractor (basically the same thing in two different versions) --- datecsv.sh | 16 ++++++++++++++++ gethandles.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100755 datecsv.sh create mode 100755 gethandles.py diff --git a/datecsv.sh b/datecsv.sh new file mode 100755 index 0000000..5f2aeb3 --- /dev/null +++ b/datecsv.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +DATAFILE=Database.db +OUTFILE=output.csv + +SQLITE="sqlite3 -csv -header" + +QUERY="SELECT SUBSTR(t.timestamp,0,11) AS 'Date', (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11)) AS 'Tweets'" + +for i in $@; + do QUERY="$QUERY, (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11) AND LOWER(e.text) LIKE '%${i,,}%') AS '$i'" +done + +QUERY="$QUERY FROM tweets t GROUP BY SUBSTR(t.timestamp,0,11);" + +$SQLITE $DATAFILE "$QUERY" > $OUTFILE diff --git a/gethandles.py b/gethandles.py new file mode 100755 index 0000000..75533f4 --- /dev/null +++ b/gethandles.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import operator, re, sqlite3 + +def getTweets(database_filename = "Database.db"): + sql_conn = sqlite3.connect(database_filename) + cur = sql_conn.cursor() + + handles = dict() + tweets = cur.execute("SELECT text FROM tweets") + + for tweet in tweets: + for word in tweet[0].split(): + if word[0] == "@": + handle = "@" + re.split('[\\W]',word[1:])[0].lower() + if handle != "@": + try: + handles[handle] += 1 + except KeyError: + handles[handle] = 1 + + return handles + +if __name__ == "__main__": + data = sorted(getTweets().items(), key=operator.itemgetter(1), reverse=True) + for handle, tweets in data: + print handle + "," + str(tweets)