From f3da44fdd31003ac8ad640a3c707e842f7ad1298 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 7 Apr 2016 17:40:58 +0200 Subject: [PATCH 01/30] Add gitignore file --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d494f94 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +config.cfg +Database.db From 059633a32a21180918011bab5e34463c8259f9b7 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 7 Apr 2016 21:04:00 +0200 Subject: [PATCH 02/30] Move tools to subdir --- tools.py => tools/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tools.py => tools/__init__.py (100%) diff --git a/tools.py b/tools/__init__.py similarity index 100% rename from tools.py rename to tools/__init__.py From 77e66918db1328ce41e7fa96d8d0e34327a08cc7 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 7 Apr 2016 21:04:13 +0200 Subject: [PATCH 03/30] Allow file name to be passed to csvdb function --- csvdb.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/csvdb.py b/csvdb.py index 153f413..c3585f8 100755 --- a/csvdb.py +++ b/csvdb.py @@ -4,11 +4,11 @@ import tools import sqlite3, csv, sys -def makeDB(path=tools.dbpath()): +def makeDB(path=tools.dbpath(), infile='tweets.csv'): try: - infile = open('tweets.csv') + infile = open(infile) except IOError: - raise IOError("Please make sure that the tweets.csv from the Twitter download is located in this directory.") + raise IOError("Unable to read %s." % infile) input = list(csv.reader(infile)) From e0abdf0a56279b388a599ce3513843b1c2a42645 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 7 Apr 2016 23:57:22 +0200 Subject: [PATCH 04/30] Check in first version of dbtools --- dbtools/__init__.py | 64 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 dbtools/__init__.py diff --git a/dbtools/__init__.py b/dbtools/__init__.py new file mode 100644 index 0000000..64c2c27 --- /dev/null +++ b/dbtools/__init__.py @@ -0,0 +1,64 @@ +import sqlite3, pymysql, pymysql.cursors + +SQLITE = 0 +MYSQL = 1 +MARIADB = MYSQL + +class dbObject: + +# --------------------------------------------- Initialization ------------------------------------------------- + + def initMySQL(self, host, port, user, pwd, db): + self.conn = pymysql.connect(host = host, port = port, user = user, password = pwd, db = db, charset = "utf8mb4", cursorclass = pymysql.cursors.DictCursor) + self.cur = conn.cursor() + self.dbtype = MYSQL + self.host = host + self.port = port + self.user = user + self.pwd = pwd + self.db = db + + def initSQLite(self, path): + self.conn = sqlite3.connect(path) + self.cur = self.conn.cursor() + self.dbtype = SQLITE + self.path = path + + def __init__(self, dbtype = SQLITE, path = 'Database.db', host = None, port = None, user = None, pwd = None, db = None): + + if dbtype == SQLITE: + self.initSQLite(path) + + elif dbtype == MYSQL: + self.initMySQL(host, port or 3306, user, pwd, db) + + else: + raise ValueError("Unknown database type %s." % str(dbtype)) + +# ---------------------------------------------- No more initialization ---------------------------------------- + + def closeConnection(self): + return self.conn.close() + + def commit(self): + return self.conn.commit() + + def executeQuery(self, query): + return self.cur.execute(query) + + def getNext(self): + return self.cur.fetchone() + + def isInitialized(self): + try: + self.executeQuery("SELECT * FROM tweets") + return True + except: + return False + + def getFLDate(self, val = 0): + if val == 0: + mode = "MIN" + else: + mode = "MAX" + return getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) From 367b4e786483c7b9bdab8a0a3dc07319587c04fd Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 8 Apr 2016 00:15:21 +0200 Subject: [PATCH 05/30] Fixed filler dependencies --- dbtools/__init__.py | 15 +++++++++++++-- filler.py | 24 ++++-------------------- tools/__init__.py | 5 ++++- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index 64c2c27..b8683f1 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -4,6 +4,9 @@ SQLITE = 0 MYSQL = 1 MARIADB = MYSQL +MIN = 0 +MAX = 1 + class dbObject: # --------------------------------------------- Initialization ------------------------------------------------- @@ -56,9 +59,17 @@ class dbObject: except: return False - def getFLDate(self, val = 0): - if val == 0: + def getFLDate(self, val = MIN): + if val == MIN: mode = "MIN" else: mode = "MAX" return getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) + + def getLatestTweet(db): + db.executeQuery("SELECT max(tweet_id) FROM tweets") + try: + return int(db.getNext()[0]) + except: + return 0 + diff --git a/filler.py b/filler.py index 8ef8710..5e5e822 100755 --- a/filler.py +++ b/filler.py @@ -1,26 +1,10 @@ #!/usr/bin/env python3 -import tools +import dbtools, tools -import html.parser, os - -def getSavepoint(db): - db.executeQuery("SELECT max(tweet_id) FROM tweets") - try: - return int(db.getNext()[0]) - except: - print("No tweets stored yet.") - return 0 - -def unescapeText(text): - return html.parser.HTMLParser().unescape(text).replace("'","''") - -def fill(dbpath=tools.dbpath(), user=tools.user(), two=tools.twObject()): +def fill(db=dbtools.dbObject(), user=tools.user(), two=tools.twObject()): query = "from:" + user - - db = tools.dbHelper(dbpath) - - savepoint = getSavepoint(db) + savepoint = db.getLatestTweet() last = savepoint timeline = two.search(query, savepoint) @@ -29,7 +13,7 @@ def fill(dbpath=tools.dbpath(), user=tools.user(), two=tools.twObject()): for status in timeline: timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" - text = unescapeText(status.text) + text = tools.unescapeText(status.text) db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") db.commit() diff --git a/tools/__init__.py b/tools/__init__.py index d39a1eb..9358a6e 100644 --- a/tools/__init__.py +++ b/tools/__init__.py @@ -1,4 +1,4 @@ -import configparser, csv, datetime, itertools, os, sqlite3, sys, tweepy +import configparser, csv, datetime, html.parser, itertools, os, sqlite3, sys, tweepy class SetupException(Exception): def __str__(self): @@ -158,3 +158,6 @@ def printCSV(inlist): writer = csv.writer(sys.stdout) writer.writerows(inlist) +def unescapeText(text): + return html.parser.HTMLParser().unescape(text).replace("'","''") + From b239e36b25756907b50ff388bd2b9849ab21b47d Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 8 Apr 2016 00:23:04 +0200 Subject: [PATCH 06/30] Fix csvdb dependencies --- csvdb.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/csvdb.py b/csvdb.py index c3585f8..41781b8 100755 --- a/csvdb.py +++ b/csvdb.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 -import tools +import dbtools import sqlite3, csv, sys -def makeDB(path=tools.dbpath(), infile='tweets.csv'): +def makeDB(dbo=dbtools.dbObject(), infile='tweets.csv'): try: infile = open(infile) except IOError: @@ -12,18 +12,15 @@ def makeDB(path=tools.dbpath(), infile='tweets.csv'): input = list(csv.reader(infile)) - conn = sqlite3.connect(path) - cur = conn.cursor() - try: - cur.execute("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") - except sqlite3.OperationalError: + dbo.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") + except: raise IOError("%s already exists. Please delete it before trying to create a new one." % path) for row in input[1:]: - cur.execute("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');") + dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');") - conn.commit() + dbo.commit() if __name__ == "__main__": if len(sys.argv) > 2: From e751e85276d53bc9bf679f1133c3138465644426 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 8 Apr 2016 00:23:48 +0200 Subject: [PATCH 07/30] Update gitignore file --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d494f94..e04e077 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ config.cfg Database.db +__pycache__ From 7e9e27e00b5b5b770fb976f7818fd7e2830438e0 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 20 May 2016 21:39:21 +0200 Subject: [PATCH 08/30] Change default value handling in dbtools --- dbtools/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index b8683f1..cff5933 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -27,13 +27,13 @@ class dbObject: self.dbtype = SQLITE self.path = path - def __init__(self, dbtype = SQLITE, path = 'Database.db', host = None, port = None, user = None, pwd = None, db = None): + def __init__(self, dbtype = SQLITE, path = None, host = None, port = None, user = None, pwd = None, db = None): if dbtype == SQLITE: - self.initSQLite(path) + self.initSQLite(path or 'Database.db') elif dbtype == MYSQL: - self.initMySQL(host, port or 3306, user, pwd, db) + self.initMySQL(host or 'localhost', port or 3306, user, pwd, db) else: raise ValueError("Unknown database type %s." % str(dbtype)) From 1c0b3da582919e48e4d1f273a60aad8220e7ceb8 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 30 May 2016 20:40:21 +0200 Subject: [PATCH 09/30] Split up tools to setuptools and twitools. Testing pending. --- filler.py | 7 ++-- followertxt.py | 10 +++--- getdates.py | 13 +++---- getmentions.py | 12 +++---- gettweets.py | 10 +++--- makedb.py | 8 ++--- {tools => setuptools}/__init__.py | 58 ------------------------------- tweleter.py | 7 ++-- twitools/__init__.py | 20 +++++++++++ 9 files changed, 49 insertions(+), 96 deletions(-) rename {tools => setuptools}/__init__.py (65%) create mode 100644 twitools/__init__.py diff --git a/filler.py b/filler.py index 5e5e822..649b06c 100755 --- a/filler.py +++ b/filler.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 -import dbtools, tools +import dbtools, setuptools, twitools -def fill(db=dbtools.dbObject(), user=tools.user(), two=tools.twObject()): +def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools.twObject()): query = "from:" + user savepoint = db.getLatestTweet() last = savepoint @@ -13,7 +13,7 @@ def fill(db=dbtools.dbObject(), user=tools.user(), two=tools.twObject()): for status in timeline: timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" - text = tools.unescapeText(status.text) + text = setuptools.unescapeText(status.text) db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") db.commit() @@ -28,3 +28,4 @@ def fill(db=dbtools.dbObject(), user=tools.user(), two=tools.twObject()): if __name__ == "__main__": count, last, first = fill() print("Stored %i tweets after %i until %i." % (count, first, last)) + diff --git a/followertxt.py b/followertxt.py index 6b15653..7059b67 100755 --- a/followertxt.py +++ b/followertxt.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 -import tools +import twitools, setuptools import os, time, tweepy -def getFollowerIDs(two=tools.twObject()): +def getFollowerIDs(two=twitools.twObject()): ''' Returns 5,000 follower IDs at most ''' - return two.api.followers_ids(screen_name=tools.user()) + return two.api.followers_ids(screen_name=twitools.twObject().whoami()) -def getNamesByIDs(fids=getFollowerIDs(), two=tools.twObject()): - for page in tools.paginate(fids, 100): +def getNamesByIDs(fids=getFollowerIDs(), two=twitools.twObject()): + for page in setuptools.paginate(fids, 100): followers = two.api.lookup_users(user_ids=page) for follower in followers: yield follower.screen_name diff --git a/getdates.py b/getdates.py index 2d58eac..40b8fa6 100755 --- a/getdates.py +++ b/getdates.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 -import tools - +import dbtools, setuptools import sys, datetime def dateArgs(argv = sys.argv[1:]): @@ -30,10 +29,10 @@ def dateArgs(argv = sys.argv[1:]): raise ValueError("Number of days for running average must be an integer.") mode = 0 elif mode == 1: - fr = tools.getDate(arg) + fr = setuptools.getDate(arg) mode = 0 else: - to = tools.getDate(arg) + to = setuptools.getDate(arg) mode = 0 if mode in (1, 2): @@ -92,9 +91,7 @@ def getHeaders(strings, av): return [headers] -def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.dbpath(), headers = False): - db = tools.dbHelper(path) - +def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbObject(), headers = False): if fr == None: fr = db.getFLDate() if to == None: @@ -111,4 +108,4 @@ def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.dbp return cur if __name__ == "__main__": - tools.printCSV(getTweetsByDate(*dateArgs(), headers = True)) + setuptools.printCSV(getTweetsByDate(*dateArgs(), headers = True)) diff --git a/getmentions.py b/getmentions.py index 833f6ca..cade951 100755 --- a/getmentions.py +++ b/getmentions.py @@ -1,12 +1,10 @@ #!/usr/bin/env python3 -import tools +import dbtools import operator, re, sys -def getTweets(mode = "@", path = tools.dbpath()): - db = tools.dbHelper(path) - +def getTweets(mode = "@", db = dbtools.dbObject()): handles = dict() tweets = db.executeQuery("SELECT text FROM tweets") @@ -27,12 +25,12 @@ def getTweets(mode = "@", path = tools.dbpath()): if __name__ == "__main__": mode = "@" - path = tools.dbpath() - if len(sys.argv) > 1: if len(sys.argv) > 3 or (len(sys.argv) == 3 and "-h" not in sys.argv): raise ValueError("Invalid arguments passed.") + path = None + for arg in sys.argv[1:]: if arg == "-h": mode = "#" @@ -41,5 +39,5 @@ if __name__ == "__main__": else: path = arg - for handle, tweets in sorted(list(getTweets(mode,path).items()), key=operator.itemgetter(1), reverse=True): + for handle, tweets in sorted(list(getTweets(mode,dbtools.dbObject(path=path)).items()), key=operator.itemgetter(1), reverse=True): print(handle + "," + str(tweets)) diff --git a/gettweets.py b/gettweets.py index 448794b..1cd64a7 100755 --- a/gettweets.py +++ b/gettweets.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import tools +import dbtools, setuptools import sys, datetime @@ -20,10 +20,10 @@ def dateArgs(argv = sys.argv[1:]): strings += [arg] mode = 0 elif mode == 1: - fr = tools.getDate(arg) + fr = setuptools.getDate(arg) mode = 0 else: - to = tools.getDate(arg) + to = setuptools.getDate(arg) mode = 0 if mode in (1, 2): @@ -39,9 +39,7 @@ def queryBuilder(fr, to): return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to) -def getDataByDate(fr, to, path = tools.dbpath(), headers = True): - db = tools.dbHelper(path) - +def getDataByDate(fr, to, db = dbtools.dbObject(), headers = True): if fr == None: fr = db.getFLDate() if to == None: diff --git a/makedb.py b/makedb.py index a1b8fb6..0aa13bb 100755 --- a/makedb.py +++ b/makedb.py @@ -1,12 +1,10 @@ #!/usr/bin/env python3 -import tools +import dbtools import sys -def makeDB(path=tools.dbpath()): - db = tools.dbHelper(path, create = True) - +def makeDB(db=dbtools.dbObject()): db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") db.commit() @@ -16,6 +14,6 @@ if __name__ == "__main__": if len(sys.argv) > 2: raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.") try: - makeDB(sys.argv[1]) + makeDB(dbtools.dbObject(path=sys.argv[1])) except IndexError: makeDB() diff --git a/tools/__init__.py b/setuptools/__init__.py similarity index 65% rename from tools/__init__.py rename to setuptools/__init__.py index 9358a6e..25f5bf4 100644 --- a/tools/__init__.py +++ b/setuptools/__init__.py @@ -40,64 +40,6 @@ def ase(): except: raise SetupException() -def user(): - return twObject().whoami() - - -class dbObject: - - def __init__(self, path=dbpath()): - self.conn = sqlite3.connect(path) - self.cur = self.conn.cursor() - self.path = path - - def closeConnection(self): - return self.conn.close() - - def commit(self): - return self.conn.commit() - - def executeQuery(self, query): - return self.cur.execute(query) - - def getNext(self): - return self.cur.fetchone() - - def isInitialized(self): - try: - self.executeQuery("SELECT * FROM tweets") - return True - except: - return False - - def getFLDate(self, val = 0): - if val == 0: - mode = "MIN" - else: - mode = "MAX" - - return getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) - - -class twObject: - - def __init__(self, cke = cke(), cse = cse(), ato = ato(), ase = ase()): - self.auth = tweepy.OAuthHandler(cke, cse) - self.auth.set_access_token(ato, ase) - self.api = tweepy.API(self.auth) - - def delete(self, id): - self.api.destroy_status(id) - - def search(self, query, savepoint = 0): - tweets = list(tweepy.Cursor(self.api.search, q=query, since_id=savepoint).items()) - tweets.reverse() - return tweets - - def whoami(self): - return self.auth.get_username() - - def dbCheck(db, create = False): if (not create and dbInitialized(db)) or (create and not dbInitialized(db)): return True diff --git a/tweleter.py b/tweleter.py index 274cfdc..38342bf 100755 --- a/tweleter.py +++ b/tweleter.py @@ -1,10 +1,9 @@ #!/usr/bin/env python3 -import tools - +import twitools import tkinter, tkinter.messagebox, html.parser, os -two = tools.twObject() +two = twitools.twObject() top = tkinter.Tk() top.title("Tweet Deleter") scrollbar = tkinter.Scrollbar(top) @@ -41,7 +40,7 @@ def addStatus(id, text): list.insert(0, element.encode("UTF-8")) def getTweets(): - query = "from:" + tools.user() + query = "from:" + twitools.twObject().whoami() try: timeline = two.search(query, 0) diff --git a/twitools/__init__.py b/twitools/__init__.py new file mode 100644 index 0000000..8de5355 --- /dev/null +++ b/twitools/__init__.py @@ -0,0 +1,20 @@ +import tweepy, setuptools + +class twObject: + + def __init__(self, cke = setuptools.cke(), cse = setuptools.cse(), ato = setuptools.ato(), ase = setuptools.ase()): + self.auth = tweepy.OAuthHandler(cke, cse) + self.auth.set_access_token(ato, ase) + self.api = tweepy.API(self.auth) + + def delete(self, id): + self.api.destroy_status(id) + + def search(self, query, savepoint = 0): + tweets = list(tweepy.Cursor(self.api.search, q=query, since_id=savepoint).items()) + tweets.reverse() + return tweets + + def whoami(self): + return self.auth.get_username() + From 593213bb2e6baa033167a77e1b5d1f376d543380 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 30 May 2016 20:44:51 +0200 Subject: [PATCH 10/30] Fix call to getDate --- dbtools/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index cff5933..5ff001d 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -1,3 +1,4 @@ +import setuptools import sqlite3, pymysql, pymysql.cursors SQLITE = 0 @@ -64,7 +65,7 @@ class dbObject: mode = "MIN" else: mode = "MAX" - return getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) + return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) def getLatestTweet(db): db.executeQuery("SELECT max(tweet_id) FROM tweets") From c3575e6d7649ebe8063d65e197fc35caaeea9109 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 30 May 2016 20:58:55 +0200 Subject: [PATCH 11/30] Make getmentions use argparse. Finally. --- getmentions.py | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/getmentions.py b/getmentions.py index cade951..306363b 100755 --- a/getmentions.py +++ b/getmentions.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 import dbtools - -import operator, re, sys +import argparse, operator, re, sys def getTweets(mode = "@", db = dbtools.dbObject()): handles = dict() @@ -24,20 +23,18 @@ def getTweets(mode = "@", db = dbtools.dbObject()): return handles if __name__ == "__main__": - mode = "@" - if len(sys.argv) > 1: - if len(sys.argv) > 3 or (len(sys.argv) == 3 and "-h" not in sys.argv): - raise ValueError("Invalid arguments passed.") + parser = argparse.ArgumentParser() + g = parser.add_mutually_exclusive_group() + g.add_argument("-t", "--hashtags", help="count only #hashtags", action="store_true") + g.add_argument("-w", "--words", help="count all words", action="store_true") + g.add_argument("-m", "--mentions", help="count only @mentions (default)", action="store_true") + args = parser.parse_args() + if args.hashtags: + mode = "#" + elif args.words: + mode = "" + else: + mode = "@" - path = None - - for arg in sys.argv[1:]: - if arg == "-h": - mode = "#" - if arg == "-w": - mode = "" - else: - path = arg - - for handle, tweets in sorted(list(getTweets(mode,dbtools.dbObject(path=path)).items()), key=operator.itemgetter(1), reverse=True): + for handle, tweets in sorted(list(getTweets(mode,dbtools.dbObject()).items()), key=operator.itemgetter(1), reverse=True): print(handle + "," + str(tweets)) From 6a87b5c36a79a522d3bb27e67b9768ea97563768 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 30 May 2016 20:59:41 +0200 Subject: [PATCH 12/30] Fix function call --- gettweets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gettweets.py b/gettweets.py index 1cd64a7..36299cb 100755 --- a/gettweets.py +++ b/gettweets.py @@ -54,4 +54,4 @@ def getDataByDate(fr, to, db = dbtools.dbObject(), headers = True): if __name__ == "__main__": - tools.printCSV(getDataByDate(*dateArgs(), headers = True)) + setuptools.printCSV(getDataByDate(*dateArgs(), headers = True)) From 70d9cd45922f3056b32a0cae443bb1c2ca7bcbda Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 30 May 2016 21:58:53 +0200 Subject: [PATCH 13/30] Prepare setuptools for MySQL support --- setuptools/__init__.py | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/setuptools/__init__.py b/setuptools/__init__.py index 25f5bf4..98274c3 100644 --- a/setuptools/__init__.py +++ b/setuptools/__init__.py @@ -2,19 +2,53 @@ import configparser, csv, datetime, html.parser, itertools, os, sqlite3, sys, tw class SetupException(Exception): def __str__(self): - return "Seems like config.cfg has not been created yet. Run setup.py to do so." + return "Seems like config.cfg has not been created yet or contains serious errors. Run setup.py to create it." -def getSetting(section, setting): +def getSetting(section, setting, path = "config.cfg"): config = configparser.RawConfigParser() - config.read('config.cfg') + config.read(path) return config.get(section, setting) +def dbtype(): + try: + return getSetting("Database", "type") + except: + return 0 # for SQLite3 + +### Must only be called AFTER dbtype()! ### + +def dbhost(): + try: + return getSetting("Database", "host") + except: + raise SetupException() + +def dbuser(): + try: + return getSetting("Database", "user") + except: + raise SetupException() + +def dbpass(): + try: + return getSetting("Database", "pass") + except: + raise SetupException() + +def dbname(): + try: + return getSetting("Database", "name") + except: + raise SetupException() + def dbpath(): try: return getSetting("Database", "path") except: - return "Database.db" + return SetupException() + +### def cke(): try: From 8f2d551dce7e04515b7cffbd1ded9da7cd6791ff Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 30 May 2016 23:52:33 +0200 Subject: [PATCH 14/30] Fix variable --- dbtools/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index 5ff001d..ffca253 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -14,7 +14,7 @@ class dbObject: def initMySQL(self, host, port, user, pwd, db): self.conn = pymysql.connect(host = host, port = port, user = user, password = pwd, db = db, charset = "utf8mb4", cursorclass = pymysql.cursors.DictCursor) - self.cur = conn.cursor() + self.cur = self.conn.cursor() self.dbtype = MYSQL self.host = host self.port = port From aaa683aa736e8974b64f88e804434d8b6994744d Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Tue, 31 May 2016 00:05:35 +0200 Subject: [PATCH 15/30] Change tweet deleter name to deleter. --- tweleter.py => deleter.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tweleter.py => deleter.py (100%) diff --git a/tweleter.py b/deleter.py similarity index 100% rename from tweleter.py rename to deleter.py From 63787dbf28209caf94ad604afc56331e2b41d5bb Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 6 Jun 2016 19:19:23 +0200 Subject: [PATCH 16/30] Make csvdb use dbHelper() to get a dbo --- csvdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csvdb.py b/csvdb.py index 41781b8..d551d8d 100755 --- a/csvdb.py +++ b/csvdb.py @@ -4,7 +4,7 @@ import dbtools import sqlite3, csv, sys -def makeDB(dbo=dbtools.dbObject(), infile='tweets.csv'): +def makeDB(dbo=dbtools.dbHelper(), infile='tweets.csv'): try: infile = open(infile) except IOError: From aad1531392f9cd1b00cbec7a89755606d13567bd Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 30 Jun 2016 12:33:09 +0200 Subject: [PATCH 17/30] Futher preparation for MySQL, which is not working yet though... Make filler handle direct messages Add table and functions for direct messages --- csvdb.py | 23 ++++++++--------------- dbtools/__init__.py | 20 +++++++++++++++++++- filler.py | 40 ++++++++++++++++++++++++++++++++++------ getdates.py | 2 +- getmentions.py | 4 ++-- gettweets.py | 2 +- makedb.py | 2 +- setuptools/__init__.py | 2 +- 8 files changed, 67 insertions(+), 28 deletions(-) diff --git a/csvdb.py b/csvdb.py index 41781b8..84b69d5 100755 --- a/csvdb.py +++ b/csvdb.py @@ -4,29 +4,22 @@ import dbtools import sqlite3, csv, sys -def makeDB(dbo=dbtools.dbObject(), infile='tweets.csv'): +def makeDB(dbo=dbtools.dbHelper(), infile='tweets.csv'): try: infile = open(infile) except IOError: raise IOError("Unable to read %s." % infile) - input = list(csv.reader(infile)) + infile = list(csv.reader(infile)) - try: - dbo.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") - except: - raise IOError("%s already exists. Please delete it before trying to create a new one." % path) + for row in infile[1:]: + try: + dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');") + except: + pass - for row in input[1:]: - dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');") - dbo.commit() if __name__ == "__main__": - if len(sys.argv) > 2: - raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.") - try: - makeDB(sys.argv[1]) - except IndexError: - makeDB() + makeDB() diff --git a/dbtools/__init__.py b/dbtools/__init__.py index ffca253..f895dfb 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -65,7 +65,18 @@ class dbObject: mode = "MIN" else: mode = "MAX" - return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) + if self.dbtype == SQLITE: + return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) + else: + self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode) + return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode])) + + def getLatestMessage(db): + db.executeQuery("SELECT max(id) FROM messages") + try: + return int(db.getNext()[0]) + except: + return 0 def getLatestTweet(db): db.executeQuery("SELECT max(tweet_id) FROM tweets") @@ -74,3 +85,10 @@ class dbObject: except: return 0 +def dbHelper(): + if setuptools.dbtype() == SQLITE: + return dbObject(dbtype=SQLITE, path=setuptools.dbpath()) + elif setuptools.dbtype() == MYSQL: + return dbObject(dbtype=MYSQL, host=setuptools.dbhost(), user=setuptools.dbuser(), pwd=setuptools.dbpass(), db=setuptools.dbname()) + else: + raise setuptools.SetupException() diff --git a/filler.py b/filler.py index 649b06c..25fad8a 100755 --- a/filler.py +++ b/filler.py @@ -2,9 +2,9 @@ import dbtools, setuptools, twitools -def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools.twObject()): +def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()): query = "from:" + user - savepoint = db.getLatestTweet() + savepoint = db.getLatestTweet() + 1 last = savepoint timeline = two.search(query, savepoint) @@ -15,7 +15,10 @@ def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools. timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" text = setuptools.unescapeText(status.text) - db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") + try: + db.executeQuery("INSERT INTO tweets(tweet_id,timestamp,text) VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") + except: + pass db.commit() last = status.id @@ -25,7 +28,32 @@ def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools. return tw_counter, last, savepoint -if __name__ == "__main__": - count, last, first = fill() - print("Stored %i tweets after %i until %i." % (count, first, last)) +def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): + mcount = 0 + savepoint = db.getLatestMessage() + 1 + new_messages = two.api.direct_messages(since_id=savepoint, count=200, full_text=True) + new_out_messages = two.api.sent_direct_messages(since_id=savepoint, count=200, full_text=True) + for m in new_messages: + try: + db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at)) + mcount += 1 + except: + pass + + for m in new_out_messages: + try: + db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at)) + mcount += 1 + except: + pass + + db.commit() + + return mcount, savepoint or 0, db.getLatestMessage() + +if __name__ == "__main__": + count, last, first = getTweets() + print("Stored %i tweets after %i until %i." % (count, first, last)) + count, last, first = getMessages() + print("Stored %i messages after %i until %i." % (count, first, last)) diff --git a/getdates.py b/getdates.py index 40b8fa6..18cd6a0 100755 --- a/getdates.py +++ b/getdates.py @@ -91,7 +91,7 @@ def getHeaders(strings, av): return [headers] -def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbObject(), headers = False): +def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbHelper(), headers = False): if fr == None: fr = db.getFLDate() if to == None: diff --git a/getmentions.py b/getmentions.py index 306363b..7806205 100755 --- a/getmentions.py +++ b/getmentions.py @@ -3,7 +3,7 @@ import dbtools import argparse, operator, re, sys -def getTweets(mode = "@", db = dbtools.dbObject()): +def getTweets(mode = "@", db = dbtools.dbHelper()): handles = dict() tweets = db.executeQuery("SELECT text FROM tweets") @@ -36,5 +36,5 @@ if __name__ == "__main__": else: mode = "@" - for handle, tweets in sorted(list(getTweets(mode,dbtools.dbObject()).items()), key=operator.itemgetter(1), reverse=True): + for handle, tweets in sorted(list(getTweets(mode=mode).items()), key=operator.itemgetter(1), reverse=True): print(handle + "," + str(tweets)) diff --git a/gettweets.py b/gettweets.py index 36299cb..9463807 100755 --- a/gettweets.py +++ b/gettweets.py @@ -39,7 +39,7 @@ def queryBuilder(fr, to): return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to) -def getDataByDate(fr, to, db = dbtools.dbObject(), headers = True): +def getDataByDate(fr, to, db = dbtools.dbHelper(), headers = True): if fr == None: fr = db.getFLDate() if to == None: diff --git a/makedb.py b/makedb.py index 0aa13bb..30a6520 100755 --- a/makedb.py +++ b/makedb.py @@ -4,7 +4,7 @@ import dbtools import sys -def makeDB(db=dbtools.dbObject()): +def makeDB(db=dbtools.dbHelper()): db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") db.commit() diff --git a/setuptools/__init__.py b/setuptools/__init__.py index 98274c3..c432018 100644 --- a/setuptools/__init__.py +++ b/setuptools/__init__.py @@ -12,7 +12,7 @@ def getSetting(section, setting, path = "config.cfg"): def dbtype(): try: - return getSetting("Database", "type") + return int(getSetting("Database", "type")) except: return 0 # for SQLite3 From b39b708270e14aff738c55d1a847a4395f21b0be Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 1 Aug 2016 16:39:55 +0200 Subject: [PATCH 18/30] Move follower functions to twitools Create tables for storing followers and following in setup.py --- followertxt.py | 12 +----------- twitools/__init__.py | 9 +++++++++ 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/followertxt.py b/followertxt.py index 7059b67..4e8deaa 100755 --- a/followertxt.py +++ b/followertxt.py @@ -3,16 +3,6 @@ import twitools, setuptools import os, time, tweepy -def getFollowerIDs(two=twitools.twObject()): - ''' Returns 5,000 follower IDs at most ''' - return two.api.followers_ids(screen_name=twitools.twObject().whoami()) - -def getNamesByIDs(fids=getFollowerIDs(), two=twitools.twObject()): - for page in setuptools.paginate(fids, 100): - followers = two.api.lookup_users(user_ids=page) - for follower in followers: - yield follower.screen_name - def getOutDir(dirname="followers"): if not os.path.isdir(dirname): os.mkdir(dirname) @@ -23,7 +13,7 @@ def getOutFile(dirname="followers"): def writeOutFile(outfile=getOutFile()): with open(getOutFile(), 'a') as f: - for follower in getNamesByIDs(getFollowerIDs()): + for follower in twitools.getNamesByIDs(twitools.getFollowerIDs()): f.write(follower + "\n") if __name__ == "__main__": diff --git a/twitools/__init__.py b/twitools/__init__.py index 8de5355..730d7de 100644 --- a/twitools/__init__.py +++ b/twitools/__init__.py @@ -18,3 +18,12 @@ class twObject: def whoami(self): return self.auth.get_username() +def getFollowerIDs(two=twObject()): + ''' Returns 5,000 follower IDs at most ''' + return two.api.followers_ids(screen_name=twObject().whoami()) + +def getNamesByIDs(fids=getFollowerIDs(), two=twObject()): + for page in setuptools.paginate(fids, 100): + followers = two.api.lookup_users(user_ids=page) + for follower in followers: + yield follower.screen_name From 114fc04b4377ba18a76721cf40b0848079360455 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 1 Aug 2016 18:02:11 +0200 Subject: [PATCH 19/30] Implement following/follower monitoring --- dbtools/__init__.py | 14 ++++++++++++++ filler.py | 34 +++++++++++++++++++++++++++++++++- twitools/__init__.py | 3 +++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index f895dfb..a0df836 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -50,6 +50,9 @@ class dbObject: def executeQuery(self, query): return self.cur.execute(query) + def getAll(self): + return self.cur.fetchall() + def getNext(self): return self.cur.fetchone() @@ -71,6 +74,17 @@ class dbObject: self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode) return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode])) + def getFollowers(db): + db.executeQuery("SELECT id FROM followers;") + for i in db.getAll(): + yield i[0] + + def getFollowing(db): + db.executeQuery("SELECT id FROM following;") + for i in db.getAll(): + yield i[0] + + def getLatestMessage(db): db.executeQuery("SELECT max(id) FROM messages") try: diff --git a/filler.py b/filler.py index 25fad8a..00ca02b 100755 --- a/filler.py +++ b/filler.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import dbtools, setuptools, twitools +import dbtools, setuptools, time, twitools def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()): query = "from:" + user @@ -52,8 +52,40 @@ def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): return mcount, savepoint or 0, db.getLatestMessage() +def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject()): + current = db.getFollowers() + new = twitools.getNamesByIDs(twitools.getFollowerIDs()) + + for follower in new: + if follower not in current: + db.executeQuery("INSERT INTO followers VALUES('%s', %i, NULL)" % (follower, int(time.time()))) + + for follower in current: + if follower not in new: + db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` IS NULL" % (int(time.time()), follower)) + + db.commit() + +def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject()): + current = db.getFollowing() + new = twitools.getNamesByIDs(twitools.getFollowingIDs()) + + for following in new: + if following not in current: + db.executeQuery("INSERT INTO following VALUES('%s', %i, NULL)" % (following, int(time.time()))) + + for following in current: + if following not in new: + db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = %s AND `until` IS NULL" % (int(time.time()), following)) + + db.commit() + if __name__ == "__main__": count, last, first = getTweets() print("Stored %i tweets after %i until %i." % (count, first, last)) count, last, first = getMessages() print("Stored %i messages after %i until %i." % (count, first, last)) + getFollowers() + print("Processed followers.") + getFollowing() + print("Processed following.") diff --git a/twitools/__init__.py b/twitools/__init__.py index 730d7de..ff18d7a 100644 --- a/twitools/__init__.py +++ b/twitools/__init__.py @@ -22,6 +22,9 @@ def getFollowerIDs(two=twObject()): ''' Returns 5,000 follower IDs at most ''' return two.api.followers_ids(screen_name=twObject().whoami()) +def getFollowingIDs(two=twObject()): + return two.api.friends_ids(screen_name=twObject().whoami()) + def getNamesByIDs(fids=getFollowerIDs(), two=twObject()): for page in setuptools.paginate(fids, 100): followers = two.api.lookup_users(user_ids=page) From 0a4de77aae6bca42d95a0923f3a9c85a3e29227c Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 1 Aug 2016 18:03:07 +0200 Subject: [PATCH 20/30] Kick out followertxt.py which isn't going to be used anymore --- followertxt.py | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100755 followertxt.py diff --git a/followertxt.py b/followertxt.py deleted file mode 100755 index 4e8deaa..0000000 --- a/followertxt.py +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env python3 - -import twitools, setuptools -import os, time, tweepy - -def getOutDir(dirname="followers"): - if not os.path.isdir(dirname): - os.mkdir(dirname) - -def getOutFile(dirname="followers"): - getOutDir(dirname) - return os.path.join(dirname, str(int(time.time())) + ".txt") - -def writeOutFile(outfile=getOutFile()): - with open(getOutFile(), 'a') as f: - for follower in twitools.getNamesByIDs(twitools.getFollowerIDs()): - f.write(follower + "\n") - -if __name__ == "__main__": - writeOutFile() From f87ad68b8bebdb9bc56ed2693f36049d422061f8 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 1 Aug 2016 21:46:57 +0200 Subject: [PATCH 21/30] Optimize follower/following handling, print changes when running filler.py --- filler.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/filler.py b/filler.py index 00ca02b..146b5ec 100755 --- a/filler.py +++ b/filler.py @@ -54,38 +54,58 @@ def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject()): current = db.getFollowers() - new = twitools.getNamesByIDs(twitools.getFollowerIDs()) + new = list(twitools.getNamesByIDs(twitools.getFollowerIDs())) + gained = 0 + lost = 0 + + if len(new) == 0: + print("Something went wrong here. -.-") + return 0,0 for follower in new: if follower not in current: db.executeQuery("INSERT INTO followers VALUES('%s', %i, NULL)" % (follower, int(time.time()))) + print("New follower: %s" % follower) + gained += 1 for follower in current: if follower not in new: db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` IS NULL" % (int(time.time()), follower)) + print("Lost follower: %s" % follower) + lost += 1 db.commit() + return gained, lost + def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject()): current = db.getFollowing() - new = twitools.getNamesByIDs(twitools.getFollowingIDs()) + new = list(twitools.getNamesByIDs(twitools.getFollowingIDs())) + gained = 0 + lost = 0 for following in new: if following not in current: db.executeQuery("INSERT INTO following VALUES('%s', %i, NULL)" % (following, int(time.time()))) + print("You started following: %s" % following) + gained += 1 for following in current: if following not in new: db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = %s AND `until` IS NULL" % (int(time.time()), following)) + print("You no longer follow: %s" % following) + lost += 1 db.commit() + + return gained, lost if __name__ == "__main__": count, last, first = getTweets() print("Stored %i tweets after %i until %i." % (count, first, last)) count, last, first = getMessages() print("Stored %i messages after %i until %i." % (count, first, last)) - getFollowers() - print("Processed followers.") - getFollowing() - print("Processed following.") + gained, lost = getFollowers() + print("Gained %i followers, lost %i." % (gained, lost)) + gained, lost = getFollowing() + print("Started following %i, stopped following %i." % (gained, lost)) From f19f04c415d72ebffe31eeb0ed83dd51c46089c5 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 1 Aug 2016 22:07:00 +0200 Subject: [PATCH 22/30] Nobody cares about the tweet IDs. --- filler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/filler.py b/filler.py index 146b5ec..8b9f980 100755 --- a/filler.py +++ b/filler.py @@ -102,9 +102,9 @@ def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject()): if __name__ == "__main__": count, last, first = getTweets() - print("Stored %i tweets after %i until %i." % (count, first, last)) + print("Stored %i tweets." % (count, first, last)) count, last, first = getMessages() - print("Stored %i messages after %i until %i." % (count, first, last)) + print("Stored %i messages." % (count, first, last)) gained, lost = getFollowers() print("Gained %i followers, lost %i." % (gained, lost)) gained, lost = getFollowing() From 5c22b7834761bf608d3414bd95868afe206de528 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Tue, 2 Aug 2016 01:17:08 +0200 Subject: [PATCH 23/30] Oh my, I'm an idiot... --- filler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/filler.py b/filler.py index 8b9f980..da3342f 100755 --- a/filler.py +++ b/filler.py @@ -102,9 +102,9 @@ def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject()): if __name__ == "__main__": count, last, first = getTweets() - print("Stored %i tweets." % (count, first, last)) + print("Stored %i tweets." % count) count, last, first = getMessages() - print("Stored %i messages." % (count, first, last)) + print("Stored %i messages." % count) gained, lost = getFollowers() print("Gained %i followers, lost %i." % (gained, lost)) gained, lost = getFollowing() From 3ec478038506090624ba37b2b06c8a71707bf7c8 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 3 Aug 2016 13:58:19 +0200 Subject: [PATCH 24/30] getFollowers()/getFollowing() should not return records where 'until' is not null, i.e. records that are no longer valid --- dbtools/__init__.py | 4 ++-- filler.py | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index a0df836..5087cb5 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -75,12 +75,12 @@ class dbObject: return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode])) def getFollowers(db): - db.executeQuery("SELECT id FROM followers;") + db.executeQuery("SELECT id FROM followers WHERE `until` IS NOT NULL;") for i in db.getAll(): yield i[0] def getFollowing(db): - db.executeQuery("SELECT id FROM following;") + db.executeQuery("SELECT id FROM following WHERE `until` IS NOT NULL;") for i in db.getAll(): yield i[0] diff --git a/filler.py b/filler.py index da3342f..657301f 100755 --- a/filler.py +++ b/filler.py @@ -58,10 +58,6 @@ def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject()): gained = 0 lost = 0 - if len(new) == 0: - print("Something went wrong here. -.-") - return 0,0 - for follower in new: if follower not in current: db.executeQuery("INSERT INTO followers VALUES('%s', %i, NULL)" % (follower, int(time.time()))) From 5fc9078ea8e1afcbabe250cf752cd380dcbda334 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 3 Aug 2016 15:26:31 +0200 Subject: [PATCH 25/30] IS NULL. We need those where 'until' IS NULL. -.- --- dbtools/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index 5087cb5..46d440a 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -75,12 +75,12 @@ class dbObject: return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode])) def getFollowers(db): - db.executeQuery("SELECT id FROM followers WHERE `until` IS NOT NULL;") + db.executeQuery("SELECT id FROM followers WHERE `until` IS NULL;") for i in db.getAll(): yield i[0] def getFollowing(db): - db.executeQuery("SELECT id FROM following WHERE `until` IS NOT NULL;") + db.executeQuery("SELECT id FROM following WHERE `until` IS NULL;") for i in db.getAll(): yield i[0] From 8381c4469a3c369bd196842051198ff8b56b6167 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 4 Aug 2016 18:17:59 +0200 Subject: [PATCH 26/30] Fix apparent problems with database objects in filler --- filler.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/filler.py b/filler.py index 657301f..8f56c21 100755 --- a/filler.py +++ b/filler.py @@ -24,8 +24,6 @@ def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twit last = status.id tw_counter = tw_counter + 1 - db.closeConnection() - return tw_counter, last, savepoint def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): @@ -97,11 +95,12 @@ def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject()): return gained, lost if __name__ == "__main__": - count, last, first = getTweets() + db = dbtools.dbHelper() + count, last, first = getTweets(db) print("Stored %i tweets." % count) - count, last, first = getMessages() + count, last, first = getMessages(db) print("Stored %i messages." % count) - gained, lost = getFollowers() + gained, lost = getFollowers(db) print("Gained %i followers, lost %i." % (gained, lost)) - gained, lost = getFollowing() + gained, lost = getFollowing(db) print("Started following %i, stopped following %i." % (gained, lost)) From 98afc3bffd2bb1f522c00f0f6c8c2ee9d2b5df73 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 5 Aug 2016 23:13:31 +0200 Subject: [PATCH 27/30] Whatever it is that keeps going wrong, I hate it. And I have to try mitigating it. --- filler.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/filler.py b/filler.py index 8f56c21..1fc7514 100755 --- a/filler.py +++ b/filler.py @@ -50,12 +50,16 @@ def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): return mcount, savepoint or 0, db.getLatestMessage() -def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject()): +def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): current = db.getFollowers() new = list(twitools.getNamesByIDs(twitools.getFollowerIDs())) gained = 0 lost = 0 + if (len(current) == 0 or len(new) == 0) and not firstrun: + print("Something went wrong.") + return 0,0 + for follower in new: if follower not in current: db.executeQuery("INSERT INTO followers VALUES('%s', %i, NULL)" % (follower, int(time.time()))) @@ -72,12 +76,16 @@ def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject()): return gained, lost -def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject()): +def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): current = db.getFollowing() new = list(twitools.getNamesByIDs(twitools.getFollowingIDs())) gained = 0 lost = 0 + if (len(current) == 0 or len(new) == 0) and not firstrun: + print("Something went wrong.") + return 0,0 + for following in new: if following not in current: db.executeQuery("INSERT INTO following VALUES('%s', %i, NULL)" % (following, int(time.time()))) From 21c90eb39483cfe32a9515cbbede2143a59d83eb Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 5 Aug 2016 23:20:18 +0200 Subject: [PATCH 28/30] Convert generator to list for getFollowers()/getFollowing() Fix SQL query in getFollowing() --- filler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/filler.py b/filler.py index 1fc7514..5782bf9 100755 --- a/filler.py +++ b/filler.py @@ -51,7 +51,7 @@ def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): return mcount, savepoint or 0, db.getLatestMessage() def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): - current = db.getFollowers() + current = list(db.getFollowers()) new = list(twitools.getNamesByIDs(twitools.getFollowerIDs())) gained = 0 lost = 0 @@ -77,7 +77,7 @@ def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False) return gained, lost def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): - current = db.getFollowing() + current = list(db.getFollowing()) new = list(twitools.getNamesByIDs(twitools.getFollowingIDs())) gained = 0 lost = 0 @@ -94,7 +94,7 @@ def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False) for following in current: if following not in new: - db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = %s AND `until` IS NULL" % (int(time.time()), following)) + db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = '%s' AND `until` IS NULL" % (int(time.time()), following)) print("You no longer follow: %s" % following) lost += 1 From 6b443f71c93afd7a2de71d9f3462c56ea1269a97 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Sun, 7 Aug 2016 17:11:22 +0200 Subject: [PATCH 29/30] Make sure that filler won't fail on first run because of empty data sets --- filler.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/filler.py b/filler.py index 5782bf9..499c352 100755 --- a/filler.py +++ b/filler.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import dbtools, setuptools, time, twitools +import argparse, dbtools, setuptools, time, twitools def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()): query = "from:" + user @@ -103,12 +103,15 @@ def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False) return gained, lost if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-f", "--first", help="first run: ignore empty databases", action="store_true") + args = parser.parse_args() db = dbtools.dbHelper() count, last, first = getTweets(db) print("Stored %i tweets." % count) count, last, first = getMessages(db) print("Stored %i messages." % count) - gained, lost = getFollowers(db) + gained, lost = getFollowers(db, firstrun=args.first) print("Gained %i followers, lost %i." % (gained, lost)) - gained, lost = getFollowing(db) + gained, lost = getFollowing(db, firstrun=args.first) print("Started following %i, stopped following %i." % (gained, lost)) From 44ee524ecef4a75f116656184c77bf9c9826b038 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Tue, 9 Aug 2016 00:19:29 +0200 Subject: [PATCH 30/30] Forgot that NULL values are actually always seen as distinct values in SQL. Why does SQLite even allow NULL in PRIMARY KEY? --- dbtools/__init__.py | 4 ++-- filler.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index 46d440a..e616238 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -75,12 +75,12 @@ class dbObject: return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode])) def getFollowers(db): - db.executeQuery("SELECT id FROM followers WHERE `until` IS NULL;") + db.executeQuery("SELECT id FROM followers WHERE `until` = 0;") for i in db.getAll(): yield i[0] def getFollowing(db): - db.executeQuery("SELECT id FROM following WHERE `until` IS NULL;") + db.executeQuery("SELECT id FROM following WHERE `until` = 0;") for i in db.getAll(): yield i[0] diff --git a/filler.py b/filler.py index 499c352..ccfe413 100755 --- a/filler.py +++ b/filler.py @@ -62,13 +62,13 @@ def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False) for follower in new: if follower not in current: - db.executeQuery("INSERT INTO followers VALUES('%s', %i, NULL)" % (follower, int(time.time()))) + db.executeQuery("INSERT INTO followers VALUES('%s', %i, 0)" % (follower, int(time.time()))) print("New follower: %s" % follower) gained += 1 for follower in current: if follower not in new: - db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` IS NULL" % (int(time.time()), follower)) + db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), follower)) print("Lost follower: %s" % follower) lost += 1 @@ -88,13 +88,13 @@ def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False) for following in new: if following not in current: - db.executeQuery("INSERT INTO following VALUES('%s', %i, NULL)" % (following, int(time.time()))) + db.executeQuery("INSERT INTO following VALUES('%s', %i, 0)" % (following, int(time.time()))) print("You started following: %s" % following) gained += 1 for following in current: if following not in new: - db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = '%s' AND `until` IS NULL" % (int(time.time()), following)) + db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), following)) print("You no longer follow: %s" % following) lost += 1