From aad1531392f9cd1b00cbec7a89755606d13567bd Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 30 Jun 2016 12:33:09 +0200 Subject: [PATCH] Futher preparation for MySQL, which is not working yet though... Make filler handle direct messages Add table and functions for direct messages --- csvdb.py | 23 ++++++++--------------- dbtools/__init__.py | 20 +++++++++++++++++++- filler.py | 40 ++++++++++++++++++++++++++++++++++------ getdates.py | 2 +- getmentions.py | 4 ++-- gettweets.py | 2 +- makedb.py | 2 +- setuptools/__init__.py | 2 +- 8 files changed, 67 insertions(+), 28 deletions(-) diff --git a/csvdb.py b/csvdb.py index 41781b8..84b69d5 100755 --- a/csvdb.py +++ b/csvdb.py @@ -4,29 +4,22 @@ import dbtools import sqlite3, csv, sys -def makeDB(dbo=dbtools.dbObject(), infile='tweets.csv'): +def makeDB(dbo=dbtools.dbHelper(), infile='tweets.csv'): try: infile = open(infile) except IOError: raise IOError("Unable to read %s." % infile) - input = list(csv.reader(infile)) + infile = list(csv.reader(infile)) - try: - dbo.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") - except: - raise IOError("%s already exists. Please delete it before trying to create a new one." % path) + for row in infile[1:]: + try: + dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');") + except: + pass - for row in input[1:]: - dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');") - dbo.commit() if __name__ == "__main__": - if len(sys.argv) > 2: - raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.") - try: - makeDB(sys.argv[1]) - except IndexError: - makeDB() + makeDB() diff --git a/dbtools/__init__.py b/dbtools/__init__.py index ffca253..f895dfb 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -65,7 +65,18 @@ class dbObject: mode = "MIN" else: mode = "MAX" - return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) + if self.dbtype == SQLITE: + return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) + else: + self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode) + return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode])) + + def getLatestMessage(db): + db.executeQuery("SELECT max(id) FROM messages") + try: + return int(db.getNext()[0]) + except: + return 0 def getLatestTweet(db): db.executeQuery("SELECT max(tweet_id) FROM tweets") @@ -74,3 +85,10 @@ class dbObject: except: return 0 +def dbHelper(): + if setuptools.dbtype() == SQLITE: + return dbObject(dbtype=SQLITE, path=setuptools.dbpath()) + elif setuptools.dbtype() == MYSQL: + return dbObject(dbtype=MYSQL, host=setuptools.dbhost(), user=setuptools.dbuser(), pwd=setuptools.dbpass(), db=setuptools.dbname()) + else: + raise setuptools.SetupException() diff --git a/filler.py b/filler.py index 649b06c..25fad8a 100755 --- a/filler.py +++ b/filler.py @@ -2,9 +2,9 @@ import dbtools, setuptools, twitools -def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools.twObject()): +def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()): query = "from:" + user - savepoint = db.getLatestTweet() + savepoint = db.getLatestTweet() + 1 last = savepoint timeline = two.search(query, savepoint) @@ -15,7 +15,10 @@ def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools. timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" text = setuptools.unescapeText(status.text) - db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") + try: + db.executeQuery("INSERT INTO tweets(tweet_id,timestamp,text) VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") + except: + pass db.commit() last = status.id @@ -25,7 +28,32 @@ def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools. return tw_counter, last, savepoint -if __name__ == "__main__": - count, last, first = fill() - print("Stored %i tweets after %i until %i." % (count, first, last)) +def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): + mcount = 0 + savepoint = db.getLatestMessage() + 1 + new_messages = two.api.direct_messages(since_id=savepoint, count=200, full_text=True) + new_out_messages = two.api.sent_direct_messages(since_id=savepoint, count=200, full_text=True) + for m in new_messages: + try: + db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at)) + mcount += 1 + except: + pass + + for m in new_out_messages: + try: + db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at)) + mcount += 1 + except: + pass + + db.commit() + + return mcount, savepoint or 0, db.getLatestMessage() + +if __name__ == "__main__": + count, last, first = getTweets() + print("Stored %i tweets after %i until %i." % (count, first, last)) + count, last, first = getMessages() + print("Stored %i messages after %i until %i." % (count, first, last)) diff --git a/getdates.py b/getdates.py index 40b8fa6..18cd6a0 100755 --- a/getdates.py +++ b/getdates.py @@ -91,7 +91,7 @@ def getHeaders(strings, av): return [headers] -def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbObject(), headers = False): +def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbHelper(), headers = False): if fr == None: fr = db.getFLDate() if to == None: diff --git a/getmentions.py b/getmentions.py index 306363b..7806205 100755 --- a/getmentions.py +++ b/getmentions.py @@ -3,7 +3,7 @@ import dbtools import argparse, operator, re, sys -def getTweets(mode = "@", db = dbtools.dbObject()): +def getTweets(mode = "@", db = dbtools.dbHelper()): handles = dict() tweets = db.executeQuery("SELECT text FROM tweets") @@ -36,5 +36,5 @@ if __name__ == "__main__": else: mode = "@" - for handle, tweets in sorted(list(getTweets(mode,dbtools.dbObject()).items()), key=operator.itemgetter(1), reverse=True): + for handle, tweets in sorted(list(getTweets(mode=mode).items()), key=operator.itemgetter(1), reverse=True): print(handle + "," + str(tweets)) diff --git a/gettweets.py b/gettweets.py index 36299cb..9463807 100755 --- a/gettweets.py +++ b/gettweets.py @@ -39,7 +39,7 @@ def queryBuilder(fr, to): return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to) -def getDataByDate(fr, to, db = dbtools.dbObject(), headers = True): +def getDataByDate(fr, to, db = dbtools.dbHelper(), headers = True): if fr == None: fr = db.getFLDate() if to == None: diff --git a/makedb.py b/makedb.py index 0aa13bb..30a6520 100755 --- a/makedb.py +++ b/makedb.py @@ -4,7 +4,7 @@ import dbtools import sys -def makeDB(db=dbtools.dbObject()): +def makeDB(db=dbtools.dbHelper()): db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") db.commit() diff --git a/setuptools/__init__.py b/setuptools/__init__.py index 98274c3..c432018 100644 --- a/setuptools/__init__.py +++ b/setuptools/__init__.py @@ -12,7 +12,7 @@ def getSetting(section, setting, path = "config.cfg"): def dbtype(): try: - return getSetting("Database", "type") + return int(getSetting("Database", "type")) except: return 0 # for SQLite3