From cf2e246db1b78abf214be3053701379bdd9329a1 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 13 Apr 2015 00:08:33 +0200 Subject: [PATCH 1/2] Check in CSV generator and Handle Extractor (basically the same thing in two different versions) --- datecsv.sh | 16 ++++++++++++++++ gethandles.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100755 datecsv.sh create mode 100755 gethandles.py diff --git a/datecsv.sh b/datecsv.sh new file mode 100755 index 0000000..5f2aeb3 --- /dev/null +++ b/datecsv.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +DATAFILE=Database.db +OUTFILE=output.csv + +SQLITE="sqlite3 -csv -header" + +QUERY="SELECT SUBSTR(t.timestamp,0,11) AS 'Date', (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11)) AS 'Tweets'" + +for i in $@; + do QUERY="$QUERY, (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11) AND LOWER(e.text) LIKE '%${i,,}%') AS '$i'" +done + +QUERY="$QUERY FROM tweets t GROUP BY SUBSTR(t.timestamp,0,11);" + +$SQLITE $DATAFILE "$QUERY" > $OUTFILE diff --git a/gethandles.py b/gethandles.py new file mode 100755 index 0000000..75533f4 --- /dev/null +++ b/gethandles.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import operator, re, sqlite3 + +def getTweets(database_filename = "Database.db"): + sql_conn = sqlite3.connect(database_filename) + cur = sql_conn.cursor() + + handles = dict() + tweets = cur.execute("SELECT text FROM tweets") + + for tweet in tweets: + for word in tweet[0].split(): + if word[0] == "@": + handle = "@" + re.split('[\\W]',word[1:])[0].lower() + if handle != "@": + try: + handles[handle] += 1 + except KeyError: + handles[handle] = 1 + + return handles + +if __name__ == "__main__": + data = sorted(getTweets().items(), key=operator.itemgetter(1), reverse=True) + for handle, tweets in data: + print handle + "," + str(tweets) From 0c4eb44444c51c1f2d7fd3a821ecf3acbe406771 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 13 Apr 2015 22:11:42 +0200 Subject: [PATCH 2/2] Convert gethandles.py to Python 3 Directly output datecsv.sh results --- datecsv.sh | 3 +-- gethandles.py | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/datecsv.sh b/datecsv.sh index 5f2aeb3..17af6e3 100755 --- a/datecsv.sh +++ b/datecsv.sh @@ -1,7 +1,6 @@ #!/bin/bash DATAFILE=Database.db -OUTFILE=output.csv SQLITE="sqlite3 -csv -header" @@ -13,4 +12,4 @@ done QUERY="$QUERY FROM tweets t GROUP BY SUBSTR(t.timestamp,0,11);" -$SQLITE $DATAFILE "$QUERY" > $OUTFILE +$SQLITE $DATAFILE "$QUERY" diff --git a/gethandles.py b/gethandles.py index 75533f4..f246c10 100755 --- a/gethandles.py +++ b/gethandles.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import operator, re, sqlite3 @@ -23,6 +23,6 @@ def getTweets(database_filename = "Database.db"): return handles if __name__ == "__main__": - data = sorted(getTweets().items(), key=operator.itemgetter(1), reverse=True) + data = sorted(list(getTweets().items()), key=operator.itemgetter(1), reverse=True) for handle, tweets in data: - print handle + "," + str(tweets) + print(handle + "," + str(tweets))