From ba55c7984e85641b984a93d7ef068fce60eb9da9 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 8 Feb 2016 01:55:12 +0100 Subject: [PATCH 01/57] Check in whatever I changed in the meantime, obviously including a setup script --- csvdb.py | 2 +- getmentions.py | 2 +- setup.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++ tools.py | 13 ++++----- 4 files changed, 82 insertions(+), 10 deletions(-) create mode 100755 setup.py diff --git a/csvdb.py b/csvdb.py index 15b6a0a..153f413 100755 --- a/csvdb.py +++ b/csvdb.py @@ -2,7 +2,7 @@ import tools -import sqlite3, csv +import sqlite3, csv, sys def makeDB(path=tools.dbpath()): try: diff --git a/getmentions.py b/getmentions.py index e0c9c1b..833f6ca 100755 --- a/getmentions.py +++ b/getmentions.py @@ -11,7 +11,7 @@ def getTweets(mode = "@", path = tools.dbpath()): tweets = db.executeQuery("SELECT text FROM tweets") for tweet in tweets: - for word in tweet[0].split(): + for word in tweet[0].lower().split(): if word[0] == mode or mode == "": if mode == "": handle = word diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..1f2c445 --- /dev/null +++ b/setup.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 + +import configparser, os.path, sqlite3, tweepy + +if os.path.isfile("config.cfg"): + print("config.cfg already exists. Please remove it before running this script.") + exit(1) + +config = configparser.RawConfigParser() + +config.add_section('Database') + +print('''Twitools will use a database for certain tasks. If this file does not exist yet, +it will be created in this process. The file name defaults to 'Database.db'. +''') + +dbpath = input("Name of the database file [Database.db]: ") +print() + +if dbpath == "": + dbpath = "Database.db" + +config.set('Database', 'path', dbpath) + +if os.path.isfile(dbpath): + pass +else: + conn = sqlite3.connect(dbpath) + cur = conn.cursor() + cur.execute("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") + conn.commit() + conn.close() + +config.add_section("Twitter") + +cke = "V6ekVFYtavi6IvRFLS0dHifSh" +cse = "U2duSfBtW0Z8UQFoJyARf3jU80gdQ44EEqWqC82ebuGbIPN3t7" + +config.set("Twitter", "cke", cke) +config.set("Twitter", "cse", cse) + +auth = tweepy.OAuthHandler(cke, cse) + +try: + authurl = auth.get_authorization_url() +except tweepy.TweepError: + print("Error getting request token. Please try again later...") + exit(1) + +print('''In the next step, we'll get you connected to Twitter. Please follow this link, +sign on to Twitter and copy the PIN you will get there. Insert it below, then +press Enter to continue. +''') + +print(authurl) +print() + +pin = input("PIN: ") +print() + +try: + auth.get_access_token(pin) +except tweepy.TweepError: + print("Error getting access token. Please try again later...") + exit(1) + +config.set("Twitter", "ato", auth.access_token) +config.set("Twitter", "ase", auth.access_token_secret) + +print("Seems like everything worked out fine. Let's write that config file...") + +with open('config.cfg', 'wt') as cfg: + config.write(cfg) + +print("We're all done. You can now use Twitools. Have fun!") diff --git a/tools.py b/tools.py index 76dd77a..d39a1eb 100644 --- a/tools.py +++ b/tools.py @@ -20,13 +20,13 @@ def cke(): try: return getSetting("Twitter", "cke") except: - return "V6ekVFYtavi6IvRFLS0dHifSh" + raise SetupException() def cse(): try: return getSetting("Twitter", "cse") except: - return "U2duSfBtW0Z8UQFoJyARf3jU80gdQ44EEqWqC82ebuGbIPN3t7" + raise SetupException() def ato(): try: @@ -36,15 +36,12 @@ def ato(): def ase(): try: - return getSetting("Twitter", "ato") + return getSetting("Twitter", "ase") except: raise SetupException() def user(): - try: - return twObject().whoami() - except: - raise SetupException() + return twObject().whoami() class dbObject: @@ -98,7 +95,7 @@ class twObject: return tweets def whoami(self): - return self.api.me().screen_name + return self.auth.get_username() def dbCheck(db, create = False): From 85591761b7a695052d577915f8725fad8eb2480f Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 7 Apr 2016 17:40:58 +0200 Subject: [PATCH 02/57] Add gitignore file --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d494f94 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +config.cfg +Database.db From d70faaa07437d5bed3006e88b64870d02f05a8a9 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 7 Apr 2016 21:04:00 +0200 Subject: [PATCH 03/57] Move tools to subdir --- tools.py => tools/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tools.py => tools/__init__.py (100%) diff --git a/tools.py b/tools/__init__.py similarity index 100% rename from tools.py rename to tools/__init__.py From de543283d5a77ef202e7cf4026a0467e12396ca5 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 7 Apr 2016 21:04:13 +0200 Subject: [PATCH 04/57] Allow file name to be passed to csvdb function --- csvdb.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/csvdb.py b/csvdb.py index 153f413..c3585f8 100755 --- a/csvdb.py +++ b/csvdb.py @@ -4,11 +4,11 @@ import tools import sqlite3, csv, sys -def makeDB(path=tools.dbpath()): +def makeDB(path=tools.dbpath(), infile='tweets.csv'): try: - infile = open('tweets.csv') + infile = open(infile) except IOError: - raise IOError("Please make sure that the tweets.csv from the Twitter download is located in this directory.") + raise IOError("Unable to read %s." % infile) input = list(csv.reader(infile)) From c688162300d644b259f94dc86c3c0f1ae82384f4 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 7 Apr 2016 23:57:22 +0200 Subject: [PATCH 05/57] Check in first version of dbtools --- dbtools/__init__.py | 64 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 dbtools/__init__.py diff --git a/dbtools/__init__.py b/dbtools/__init__.py new file mode 100644 index 0000000..64c2c27 --- /dev/null +++ b/dbtools/__init__.py @@ -0,0 +1,64 @@ +import sqlite3, pymysql, pymysql.cursors + +SQLITE = 0 +MYSQL = 1 +MARIADB = MYSQL + +class dbObject: + +# --------------------------------------------- Initialization ------------------------------------------------- + + def initMySQL(self, host, port, user, pwd, db): + self.conn = pymysql.connect(host = host, port = port, user = user, password = pwd, db = db, charset = "utf8mb4", cursorclass = pymysql.cursors.DictCursor) + self.cur = conn.cursor() + self.dbtype = MYSQL + self.host = host + self.port = port + self.user = user + self.pwd = pwd + self.db = db + + def initSQLite(self, path): + self.conn = sqlite3.connect(path) + self.cur = self.conn.cursor() + self.dbtype = SQLITE + self.path = path + + def __init__(self, dbtype = SQLITE, path = 'Database.db', host = None, port = None, user = None, pwd = None, db = None): + + if dbtype == SQLITE: + self.initSQLite(path) + + elif dbtype == MYSQL: + self.initMySQL(host, port or 3306, user, pwd, db) + + else: + raise ValueError("Unknown database type %s." % str(dbtype)) + +# ---------------------------------------------- No more initialization ---------------------------------------- + + def closeConnection(self): + return self.conn.close() + + def commit(self): + return self.conn.commit() + + def executeQuery(self, query): + return self.cur.execute(query) + + def getNext(self): + return self.cur.fetchone() + + def isInitialized(self): + try: + self.executeQuery("SELECT * FROM tweets") + return True + except: + return False + + def getFLDate(self, val = 0): + if val == 0: + mode = "MIN" + else: + mode = "MAX" + return getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) From c6c994926d8e617e3df7485e89abbbb41c6f069f Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 8 Apr 2016 00:15:21 +0200 Subject: [PATCH 06/57] Fixed filler dependencies --- dbtools/__init__.py | 15 +++++++++++++-- filler.py | 24 ++++-------------------- tools/__init__.py | 5 ++++- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index 64c2c27..b8683f1 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -4,6 +4,9 @@ SQLITE = 0 MYSQL = 1 MARIADB = MYSQL +MIN = 0 +MAX = 1 + class dbObject: # --------------------------------------------- Initialization ------------------------------------------------- @@ -56,9 +59,17 @@ class dbObject: except: return False - def getFLDate(self, val = 0): - if val == 0: + def getFLDate(self, val = MIN): + if val == MIN: mode = "MIN" else: mode = "MAX" return getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) + + def getLatestTweet(db): + db.executeQuery("SELECT max(tweet_id) FROM tweets") + try: + return int(db.getNext()[0]) + except: + return 0 + diff --git a/filler.py b/filler.py index 8ef8710..5e5e822 100755 --- a/filler.py +++ b/filler.py @@ -1,26 +1,10 @@ #!/usr/bin/env python3 -import tools +import dbtools, tools -import html.parser, os - -def getSavepoint(db): - db.executeQuery("SELECT max(tweet_id) FROM tweets") - try: - return int(db.getNext()[0]) - except: - print("No tweets stored yet.") - return 0 - -def unescapeText(text): - return html.parser.HTMLParser().unescape(text).replace("'","''") - -def fill(dbpath=tools.dbpath(), user=tools.user(), two=tools.twObject()): +def fill(db=dbtools.dbObject(), user=tools.user(), two=tools.twObject()): query = "from:" + user - - db = tools.dbHelper(dbpath) - - savepoint = getSavepoint(db) + savepoint = db.getLatestTweet() last = savepoint timeline = two.search(query, savepoint) @@ -29,7 +13,7 @@ def fill(dbpath=tools.dbpath(), user=tools.user(), two=tools.twObject()): for status in timeline: timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" - text = unescapeText(status.text) + text = tools.unescapeText(status.text) db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") db.commit() diff --git a/tools/__init__.py b/tools/__init__.py index d39a1eb..9358a6e 100644 --- a/tools/__init__.py +++ b/tools/__init__.py @@ -1,4 +1,4 @@ -import configparser, csv, datetime, itertools, os, sqlite3, sys, tweepy +import configparser, csv, datetime, html.parser, itertools, os, sqlite3, sys, tweepy class SetupException(Exception): def __str__(self): @@ -158,3 +158,6 @@ def printCSV(inlist): writer = csv.writer(sys.stdout) writer.writerows(inlist) +def unescapeText(text): + return html.parser.HTMLParser().unescape(text).replace("'","''") + From 12df347fc52495e068ff56e8c50f8ebaaf9969ef Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 8 Apr 2016 00:23:04 +0200 Subject: [PATCH 07/57] Fix csvdb dependencies --- csvdb.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/csvdb.py b/csvdb.py index c3585f8..41781b8 100755 --- a/csvdb.py +++ b/csvdb.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 -import tools +import dbtools import sqlite3, csv, sys -def makeDB(path=tools.dbpath(), infile='tweets.csv'): +def makeDB(dbo=dbtools.dbObject(), infile='tweets.csv'): try: infile = open(infile) except IOError: @@ -12,18 +12,15 @@ def makeDB(path=tools.dbpath(), infile='tweets.csv'): input = list(csv.reader(infile)) - conn = sqlite3.connect(path) - cur = conn.cursor() - try: - cur.execute("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") - except sqlite3.OperationalError: + dbo.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") + except: raise IOError("%s already exists. Please delete it before trying to create a new one." % path) for row in input[1:]: - cur.execute("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');") + dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');") - conn.commit() + dbo.commit() if __name__ == "__main__": if len(sys.argv) > 2: From 1a31e345585e9f15193aa4f5960ed41d9c0697ba Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 8 Apr 2016 00:23:48 +0200 Subject: [PATCH 08/57] Update gitignore file --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d494f94..e04e077 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ config.cfg Database.db +__pycache__ From e9c6290b6902ea5346636a921a0b3b4d938c6dbf Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 20 May 2016 21:39:21 +0200 Subject: [PATCH 09/57] Change default value handling in dbtools --- dbtools/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index b8683f1..cff5933 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -27,13 +27,13 @@ class dbObject: self.dbtype = SQLITE self.path = path - def __init__(self, dbtype = SQLITE, path = 'Database.db', host = None, port = None, user = None, pwd = None, db = None): + def __init__(self, dbtype = SQLITE, path = None, host = None, port = None, user = None, pwd = None, db = None): if dbtype == SQLITE: - self.initSQLite(path) + self.initSQLite(path or 'Database.db') elif dbtype == MYSQL: - self.initMySQL(host, port or 3306, user, pwd, db) + self.initMySQL(host or 'localhost', port or 3306, user, pwd, db) else: raise ValueError("Unknown database type %s." % str(dbtype)) From b73103d92d6a4b6cef06ced44592a7a10a56e9d1 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 30 May 2016 20:40:21 +0200 Subject: [PATCH 10/57] Split up tools to setuptools and twitools. Testing pending. --- filler.py | 7 ++-- followertxt.py | 10 +++--- getdates.py | 13 +++---- getmentions.py | 12 +++---- gettweets.py | 10 +++--- makedb.py | 8 ++--- {tools => setuptools}/__init__.py | 58 ------------------------------- tweleter.py | 7 ++-- twitools/__init__.py | 20 +++++++++++ 9 files changed, 49 insertions(+), 96 deletions(-) rename {tools => setuptools}/__init__.py (65%) create mode 100644 twitools/__init__.py diff --git a/filler.py b/filler.py index 5e5e822..649b06c 100755 --- a/filler.py +++ b/filler.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 -import dbtools, tools +import dbtools, setuptools, twitools -def fill(db=dbtools.dbObject(), user=tools.user(), two=tools.twObject()): +def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools.twObject()): query = "from:" + user savepoint = db.getLatestTweet() last = savepoint @@ -13,7 +13,7 @@ def fill(db=dbtools.dbObject(), user=tools.user(), two=tools.twObject()): for status in timeline: timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" - text = tools.unescapeText(status.text) + text = setuptools.unescapeText(status.text) db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") db.commit() @@ -28,3 +28,4 @@ def fill(db=dbtools.dbObject(), user=tools.user(), two=tools.twObject()): if __name__ == "__main__": count, last, first = fill() print("Stored %i tweets after %i until %i." % (count, first, last)) + diff --git a/followertxt.py b/followertxt.py index 6b15653..7059b67 100755 --- a/followertxt.py +++ b/followertxt.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 -import tools +import twitools, setuptools import os, time, tweepy -def getFollowerIDs(two=tools.twObject()): +def getFollowerIDs(two=twitools.twObject()): ''' Returns 5,000 follower IDs at most ''' - return two.api.followers_ids(screen_name=tools.user()) + return two.api.followers_ids(screen_name=twitools.twObject().whoami()) -def getNamesByIDs(fids=getFollowerIDs(), two=tools.twObject()): - for page in tools.paginate(fids, 100): +def getNamesByIDs(fids=getFollowerIDs(), two=twitools.twObject()): + for page in setuptools.paginate(fids, 100): followers = two.api.lookup_users(user_ids=page) for follower in followers: yield follower.screen_name diff --git a/getdates.py b/getdates.py index 2d58eac..40b8fa6 100755 --- a/getdates.py +++ b/getdates.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 -import tools - +import dbtools, setuptools import sys, datetime def dateArgs(argv = sys.argv[1:]): @@ -30,10 +29,10 @@ def dateArgs(argv = sys.argv[1:]): raise ValueError("Number of days for running average must be an integer.") mode = 0 elif mode == 1: - fr = tools.getDate(arg) + fr = setuptools.getDate(arg) mode = 0 else: - to = tools.getDate(arg) + to = setuptools.getDate(arg) mode = 0 if mode in (1, 2): @@ -92,9 +91,7 @@ def getHeaders(strings, av): return [headers] -def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.dbpath(), headers = False): - db = tools.dbHelper(path) - +def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbObject(), headers = False): if fr == None: fr = db.getFLDate() if to == None: @@ -111,4 +108,4 @@ def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.dbp return cur if __name__ == "__main__": - tools.printCSV(getTweetsByDate(*dateArgs(), headers = True)) + setuptools.printCSV(getTweetsByDate(*dateArgs(), headers = True)) diff --git a/getmentions.py b/getmentions.py index 833f6ca..cade951 100755 --- a/getmentions.py +++ b/getmentions.py @@ -1,12 +1,10 @@ #!/usr/bin/env python3 -import tools +import dbtools import operator, re, sys -def getTweets(mode = "@", path = tools.dbpath()): - db = tools.dbHelper(path) - +def getTweets(mode = "@", db = dbtools.dbObject()): handles = dict() tweets = db.executeQuery("SELECT text FROM tweets") @@ -27,12 +25,12 @@ def getTweets(mode = "@", path = tools.dbpath()): if __name__ == "__main__": mode = "@" - path = tools.dbpath() - if len(sys.argv) > 1: if len(sys.argv) > 3 or (len(sys.argv) == 3 and "-h" not in sys.argv): raise ValueError("Invalid arguments passed.") + path = None + for arg in sys.argv[1:]: if arg == "-h": mode = "#" @@ -41,5 +39,5 @@ if __name__ == "__main__": else: path = arg - for handle, tweets in sorted(list(getTweets(mode,path).items()), key=operator.itemgetter(1), reverse=True): + for handle, tweets in sorted(list(getTweets(mode,dbtools.dbObject(path=path)).items()), key=operator.itemgetter(1), reverse=True): print(handle + "," + str(tweets)) diff --git a/gettweets.py b/gettweets.py index 448794b..1cd64a7 100755 --- a/gettweets.py +++ b/gettweets.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import tools +import dbtools, setuptools import sys, datetime @@ -20,10 +20,10 @@ def dateArgs(argv = sys.argv[1:]): strings += [arg] mode = 0 elif mode == 1: - fr = tools.getDate(arg) + fr = setuptools.getDate(arg) mode = 0 else: - to = tools.getDate(arg) + to = setuptools.getDate(arg) mode = 0 if mode in (1, 2): @@ -39,9 +39,7 @@ def queryBuilder(fr, to): return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to) -def getDataByDate(fr, to, path = tools.dbpath(), headers = True): - db = tools.dbHelper(path) - +def getDataByDate(fr, to, db = dbtools.dbObject(), headers = True): if fr == None: fr = db.getFLDate() if to == None: diff --git a/makedb.py b/makedb.py index a1b8fb6..0aa13bb 100755 --- a/makedb.py +++ b/makedb.py @@ -1,12 +1,10 @@ #!/usr/bin/env python3 -import tools +import dbtools import sys -def makeDB(path=tools.dbpath()): - db = tools.dbHelper(path, create = True) - +def makeDB(db=dbtools.dbObject()): db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") db.commit() @@ -16,6 +14,6 @@ if __name__ == "__main__": if len(sys.argv) > 2: raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.") try: - makeDB(sys.argv[1]) + makeDB(dbtools.dbObject(path=sys.argv[1])) except IndexError: makeDB() diff --git a/tools/__init__.py b/setuptools/__init__.py similarity index 65% rename from tools/__init__.py rename to setuptools/__init__.py index 9358a6e..25f5bf4 100644 --- a/tools/__init__.py +++ b/setuptools/__init__.py @@ -40,64 +40,6 @@ def ase(): except: raise SetupException() -def user(): - return twObject().whoami() - - -class dbObject: - - def __init__(self, path=dbpath()): - self.conn = sqlite3.connect(path) - self.cur = self.conn.cursor() - self.path = path - - def closeConnection(self): - return self.conn.close() - - def commit(self): - return self.conn.commit() - - def executeQuery(self, query): - return self.cur.execute(query) - - def getNext(self): - return self.cur.fetchone() - - def isInitialized(self): - try: - self.executeQuery("SELECT * FROM tweets") - return True - except: - return False - - def getFLDate(self, val = 0): - if val == 0: - mode = "MIN" - else: - mode = "MAX" - - return getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) - - -class twObject: - - def __init__(self, cke = cke(), cse = cse(), ato = ato(), ase = ase()): - self.auth = tweepy.OAuthHandler(cke, cse) - self.auth.set_access_token(ato, ase) - self.api = tweepy.API(self.auth) - - def delete(self, id): - self.api.destroy_status(id) - - def search(self, query, savepoint = 0): - tweets = list(tweepy.Cursor(self.api.search, q=query, since_id=savepoint).items()) - tweets.reverse() - return tweets - - def whoami(self): - return self.auth.get_username() - - def dbCheck(db, create = False): if (not create and dbInitialized(db)) or (create and not dbInitialized(db)): return True diff --git a/tweleter.py b/tweleter.py index 274cfdc..38342bf 100755 --- a/tweleter.py +++ b/tweleter.py @@ -1,10 +1,9 @@ #!/usr/bin/env python3 -import tools - +import twitools import tkinter, tkinter.messagebox, html.parser, os -two = tools.twObject() +two = twitools.twObject() top = tkinter.Tk() top.title("Tweet Deleter") scrollbar = tkinter.Scrollbar(top) @@ -41,7 +40,7 @@ def addStatus(id, text): list.insert(0, element.encode("UTF-8")) def getTweets(): - query = "from:" + tools.user() + query = "from:" + twitools.twObject().whoami() try: timeline = two.search(query, 0) diff --git a/twitools/__init__.py b/twitools/__init__.py new file mode 100644 index 0000000..8de5355 --- /dev/null +++ b/twitools/__init__.py @@ -0,0 +1,20 @@ +import tweepy, setuptools + +class twObject: + + def __init__(self, cke = setuptools.cke(), cse = setuptools.cse(), ato = setuptools.ato(), ase = setuptools.ase()): + self.auth = tweepy.OAuthHandler(cke, cse) + self.auth.set_access_token(ato, ase) + self.api = tweepy.API(self.auth) + + def delete(self, id): + self.api.destroy_status(id) + + def search(self, query, savepoint = 0): + tweets = list(tweepy.Cursor(self.api.search, q=query, since_id=savepoint).items()) + tweets.reverse() + return tweets + + def whoami(self): + return self.auth.get_username() + From 3ee3c68705f2506319b7a29b291354400cc9a5ab Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 30 May 2016 20:44:51 +0200 Subject: [PATCH 11/57] Fix call to getDate --- dbtools/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index cff5933..5ff001d 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -1,3 +1,4 @@ +import setuptools import sqlite3, pymysql, pymysql.cursors SQLITE = 0 @@ -64,7 +65,7 @@ class dbObject: mode = "MIN" else: mode = "MAX" - return getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) + return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) def getLatestTweet(db): db.executeQuery("SELECT max(tweet_id) FROM tweets") From 3b8811f6aabc9adc3203e3b0ad2fe7d9798c59e8 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 30 May 2016 20:58:55 +0200 Subject: [PATCH 12/57] Make getmentions use argparse. Finally. --- getmentions.py | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/getmentions.py b/getmentions.py index cade951..306363b 100755 --- a/getmentions.py +++ b/getmentions.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 import dbtools - -import operator, re, sys +import argparse, operator, re, sys def getTweets(mode = "@", db = dbtools.dbObject()): handles = dict() @@ -24,20 +23,18 @@ def getTweets(mode = "@", db = dbtools.dbObject()): return handles if __name__ == "__main__": - mode = "@" - if len(sys.argv) > 1: - if len(sys.argv) > 3 or (len(sys.argv) == 3 and "-h" not in sys.argv): - raise ValueError("Invalid arguments passed.") + parser = argparse.ArgumentParser() + g = parser.add_mutually_exclusive_group() + g.add_argument("-t", "--hashtags", help="count only #hashtags", action="store_true") + g.add_argument("-w", "--words", help="count all words", action="store_true") + g.add_argument("-m", "--mentions", help="count only @mentions (default)", action="store_true") + args = parser.parse_args() + if args.hashtags: + mode = "#" + elif args.words: + mode = "" + else: + mode = "@" - path = None - - for arg in sys.argv[1:]: - if arg == "-h": - mode = "#" - if arg == "-w": - mode = "" - else: - path = arg - - for handle, tweets in sorted(list(getTweets(mode,dbtools.dbObject(path=path)).items()), key=operator.itemgetter(1), reverse=True): + for handle, tweets in sorted(list(getTweets(mode,dbtools.dbObject()).items()), key=operator.itemgetter(1), reverse=True): print(handle + "," + str(tweets)) From 23218aff75a03359e88c77c13d12fcc94417c8e6 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 30 May 2016 20:59:41 +0200 Subject: [PATCH 13/57] Fix function call --- gettweets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gettweets.py b/gettweets.py index 1cd64a7..36299cb 100755 --- a/gettweets.py +++ b/gettweets.py @@ -54,4 +54,4 @@ def getDataByDate(fr, to, db = dbtools.dbObject(), headers = True): if __name__ == "__main__": - tools.printCSV(getDataByDate(*dateArgs(), headers = True)) + setuptools.printCSV(getDataByDate(*dateArgs(), headers = True)) From 6297f37bd726177c16b5b024161b419c516889ba Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 30 May 2016 21:58:53 +0200 Subject: [PATCH 14/57] Prepare setuptools for MySQL support --- setuptools/__init__.py | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/setuptools/__init__.py b/setuptools/__init__.py index 25f5bf4..98274c3 100644 --- a/setuptools/__init__.py +++ b/setuptools/__init__.py @@ -2,19 +2,53 @@ import configparser, csv, datetime, html.parser, itertools, os, sqlite3, sys, tw class SetupException(Exception): def __str__(self): - return "Seems like config.cfg has not been created yet. Run setup.py to do so." + return "Seems like config.cfg has not been created yet or contains serious errors. Run setup.py to create it." -def getSetting(section, setting): +def getSetting(section, setting, path = "config.cfg"): config = configparser.RawConfigParser() - config.read('config.cfg') + config.read(path) return config.get(section, setting) +def dbtype(): + try: + return getSetting("Database", "type") + except: + return 0 # for SQLite3 + +### Must only be called AFTER dbtype()! ### + +def dbhost(): + try: + return getSetting("Database", "host") + except: + raise SetupException() + +def dbuser(): + try: + return getSetting("Database", "user") + except: + raise SetupException() + +def dbpass(): + try: + return getSetting("Database", "pass") + except: + raise SetupException() + +def dbname(): + try: + return getSetting("Database", "name") + except: + raise SetupException() + def dbpath(): try: return getSetting("Database", "path") except: - return "Database.db" + return SetupException() + +### def cke(): try: From 2b1b640c5dbb9d6ed162e75d8abcf50f68e050ca Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 30 May 2016 23:52:33 +0200 Subject: [PATCH 15/57] Fix variable --- dbtools/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index 5ff001d..ffca253 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -14,7 +14,7 @@ class dbObject: def initMySQL(self, host, port, user, pwd, db): self.conn = pymysql.connect(host = host, port = port, user = user, password = pwd, db = db, charset = "utf8mb4", cursorclass = pymysql.cursors.DictCursor) - self.cur = conn.cursor() + self.cur = self.conn.cursor() self.dbtype = MYSQL self.host = host self.port = port From c1650218e152fd9fb299cce4fb91e27b449f0d35 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 30 May 2016 23:55:59 +0200 Subject: [PATCH 16/57] Make setup work with MySQL. Tested. ^^ --- setup.py | 55 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/setup.py b/setup.py index 1f2c445..984b8db 100755 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import configparser, os.path, sqlite3, tweepy +import configparser, os.path, tweepy, dbtools, getpass if os.path.isfile("config.cfg"): print("config.cfg already exists. Please remove it before running this script.") @@ -10,26 +10,49 @@ config = configparser.RawConfigParser() config.add_section('Database') -print('''Twitools will use a database for certain tasks. If this file does not exist yet, -it will be created in this process. The file name defaults to 'Database.db'. +print('''Twitools will use a database for certain tasks. You can use a file or a MySQL database for this purpose. + +If you wish to use a MySQL database, you will need the credentials for it. If you don't know what any of that means, stick with the default value and just press Enter. ''') -dbpath = input("Name of the database file [Database.db]: ") +dbtype = input("Database type: %i (file), %i (MySQL) [%i]: " % (dbtools.SQLITE, dbtools.MYSQL, dbtools.SQLITE)) print() -if dbpath == "": - dbpath = "Database.db" - -config.set('Database', 'path', dbpath) - -if os.path.isfile(dbpath): +try: + dbtype = int(dbtype) +except: pass -else: - conn = sqlite3.connect(dbpath) - cur = conn.cursor() - cur.execute("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") - conn.commit() - conn.close() + +if dbtype == dbtools.MYSQL: + dbhost = input("MySQL host [localhost]: ") or "localhost" + dbuser = input("MySQL username [twitools]: ") or "twitools" + dbpass = getpass.getpass("MySQL password (not echoed!): ") + dbname = input("MySQL database name [twitools]: ") or "twitools" + print() + + config.set('Database', 'type', dbtype) + config.set('Database', 'host', dbhost) + config.set('Database', 'user', dbuser) + config.set('Database', 'pass', dbpass) + config.set('Database', 'name', dbname) + +else: + dbtype = dbtools.SQLITE + dbpath = input("Name of the database file [Database.db]: ") or "Database.db" + print() + + config.set('Database', 'type', dbtype) + config.set('Database', 'path', dbpath) + +if dbtype == dbtools.MYSQL: + db = dbtools.dbObject(dbtype=dbtype, host=dbhost, user=dbuser, pwd=dbpass, db=dbname) +else: + db = dbtools.dbObject(dbtype=dbtype, path=dbpath) +if not db.isInitialized(): + db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") + db.commit() + +db.closeConnection() config.add_section("Twitter") From de79a03f2f7736f8c5aee97cd91e5eee2bd44db4 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Tue, 31 May 2016 00:05:35 +0200 Subject: [PATCH 17/57] Change tweet deleter name to deleter. --- tweleter.py => deleter.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tweleter.py => deleter.py (100%) diff --git a/tweleter.py b/deleter.py similarity index 100% rename from tweleter.py rename to deleter.py From 6d7c6c127e5ef93715bde1165274e67745abbe07 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 6 Jun 2016 19:19:23 +0200 Subject: [PATCH 18/57] Make csvdb use dbHelper() to get a dbo --- csvdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csvdb.py b/csvdb.py index 41781b8..d551d8d 100755 --- a/csvdb.py +++ b/csvdb.py @@ -4,7 +4,7 @@ import dbtools import sqlite3, csv, sys -def makeDB(dbo=dbtools.dbObject(), infile='tweets.csv'): +def makeDB(dbo=dbtools.dbHelper(), infile='tweets.csv'): try: infile = open(infile) except IOError: From 7e17aeb1699677ed1577b33e6fb18be0f56b15c5 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 30 Jun 2016 12:33:09 +0200 Subject: [PATCH 19/57] Futher preparation for MySQL, which is not working yet though... Make filler handle direct messages Add table and functions for direct messages --- csvdb.py | 23 ++++++++--------------- dbtools/__init__.py | 20 +++++++++++++++++++- filler.py | 40 ++++++++++++++++++++++++++++++++++------ getdates.py | 2 +- getmentions.py | 4 ++-- gettweets.py | 2 +- makedb.py | 2 +- setup.py | 1 + setuptools/__init__.py | 2 +- 9 files changed, 68 insertions(+), 28 deletions(-) diff --git a/csvdb.py b/csvdb.py index 41781b8..84b69d5 100755 --- a/csvdb.py +++ b/csvdb.py @@ -4,29 +4,22 @@ import dbtools import sqlite3, csv, sys -def makeDB(dbo=dbtools.dbObject(), infile='tweets.csv'): +def makeDB(dbo=dbtools.dbHelper(), infile='tweets.csv'): try: infile = open(infile) except IOError: raise IOError("Unable to read %s." % infile) - input = list(csv.reader(infile)) + infile = list(csv.reader(infile)) - try: - dbo.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") - except: - raise IOError("%s already exists. Please delete it before trying to create a new one." % path) + for row in infile[1:]: + try: + dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');") + except: + pass - for row in input[1:]: - dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');") - dbo.commit() if __name__ == "__main__": - if len(sys.argv) > 2: - raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.") - try: - makeDB(sys.argv[1]) - except IndexError: - makeDB() + makeDB() diff --git a/dbtools/__init__.py b/dbtools/__init__.py index ffca253..f895dfb 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -65,7 +65,18 @@ class dbObject: mode = "MIN" else: mode = "MAX" - return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) + if self.dbtype == SQLITE: + return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) + else: + self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode) + return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode])) + + def getLatestMessage(db): + db.executeQuery("SELECT max(id) FROM messages") + try: + return int(db.getNext()[0]) + except: + return 0 def getLatestTweet(db): db.executeQuery("SELECT max(tweet_id) FROM tweets") @@ -74,3 +85,10 @@ class dbObject: except: return 0 +def dbHelper(): + if setuptools.dbtype() == SQLITE: + return dbObject(dbtype=SQLITE, path=setuptools.dbpath()) + elif setuptools.dbtype() == MYSQL: + return dbObject(dbtype=MYSQL, host=setuptools.dbhost(), user=setuptools.dbuser(), pwd=setuptools.dbpass(), db=setuptools.dbname()) + else: + raise setuptools.SetupException() diff --git a/filler.py b/filler.py index 649b06c..25fad8a 100755 --- a/filler.py +++ b/filler.py @@ -2,9 +2,9 @@ import dbtools, setuptools, twitools -def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools.twObject()): +def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()): query = "from:" + user - savepoint = db.getLatestTweet() + savepoint = db.getLatestTweet() + 1 last = savepoint timeline = two.search(query, savepoint) @@ -15,7 +15,10 @@ def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools. timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" text = setuptools.unescapeText(status.text) - db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") + try: + db.executeQuery("INSERT INTO tweets(tweet_id,timestamp,text) VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") + except: + pass db.commit() last = status.id @@ -25,7 +28,32 @@ def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools. return tw_counter, last, savepoint -if __name__ == "__main__": - count, last, first = fill() - print("Stored %i tweets after %i until %i." % (count, first, last)) +def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): + mcount = 0 + savepoint = db.getLatestMessage() + 1 + new_messages = two.api.direct_messages(since_id=savepoint, count=200, full_text=True) + new_out_messages = two.api.sent_direct_messages(since_id=savepoint, count=200, full_text=True) + for m in new_messages: + try: + db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at)) + mcount += 1 + except: + pass + + for m in new_out_messages: + try: + db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at)) + mcount += 1 + except: + pass + + db.commit() + + return mcount, savepoint or 0, db.getLatestMessage() + +if __name__ == "__main__": + count, last, first = getTweets() + print("Stored %i tweets after %i until %i." % (count, first, last)) + count, last, first = getMessages() + print("Stored %i messages after %i until %i." % (count, first, last)) diff --git a/getdates.py b/getdates.py index 40b8fa6..18cd6a0 100755 --- a/getdates.py +++ b/getdates.py @@ -91,7 +91,7 @@ def getHeaders(strings, av): return [headers] -def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbObject(), headers = False): +def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbHelper(), headers = False): if fr == None: fr = db.getFLDate() if to == None: diff --git a/getmentions.py b/getmentions.py index 306363b..7806205 100755 --- a/getmentions.py +++ b/getmentions.py @@ -3,7 +3,7 @@ import dbtools import argparse, operator, re, sys -def getTweets(mode = "@", db = dbtools.dbObject()): +def getTweets(mode = "@", db = dbtools.dbHelper()): handles = dict() tweets = db.executeQuery("SELECT text FROM tweets") @@ -36,5 +36,5 @@ if __name__ == "__main__": else: mode = "@" - for handle, tweets in sorted(list(getTweets(mode,dbtools.dbObject()).items()), key=operator.itemgetter(1), reverse=True): + for handle, tweets in sorted(list(getTweets(mode=mode).items()), key=operator.itemgetter(1), reverse=True): print(handle + "," + str(tweets)) diff --git a/gettweets.py b/gettweets.py index 36299cb..9463807 100755 --- a/gettweets.py +++ b/gettweets.py @@ -39,7 +39,7 @@ def queryBuilder(fr, to): return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to) -def getDataByDate(fr, to, db = dbtools.dbObject(), headers = True): +def getDataByDate(fr, to, db = dbtools.dbHelper(), headers = True): if fr == None: fr = db.getFLDate() if to == None: diff --git a/makedb.py b/makedb.py index 0aa13bb..30a6520 100755 --- a/makedb.py +++ b/makedb.py @@ -4,7 +4,7 @@ import dbtools import sys -def makeDB(db=dbtools.dbObject()): +def makeDB(db=dbtools.dbHelper()): db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") db.commit() diff --git a/setup.py b/setup.py index 984b8db..bf7cd21 100755 --- a/setup.py +++ b/setup.py @@ -50,6 +50,7 @@ else: db = dbtools.dbObject(dbtype=dbtype, path=dbpath) if not db.isInitialized(): db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") + db.executeQuery("CREATE TABLE messages(`id` INTEGER NOT NULL, `text` TEXT, `sender_id` INTEGER, `recipient_id` INTEGER, `created_at` TEXT, PRIMARY KEY(id));") db.commit() db.closeConnection() diff --git a/setuptools/__init__.py b/setuptools/__init__.py index 98274c3..c432018 100644 --- a/setuptools/__init__.py +++ b/setuptools/__init__.py @@ -12,7 +12,7 @@ def getSetting(section, setting, path = "config.cfg"): def dbtype(): try: - return getSetting("Database", "type") + return int(getSetting("Database", "type")) except: return 0 # for SQLite3 From 41f5fa288aa579429fc01c7560253a9ce9fba816 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 1 Aug 2016 16:39:55 +0200 Subject: [PATCH 20/57] Move follower functions to twitools Create tables for storing followers and following in setup.py --- followertxt.py | 12 +----------- setup.py | 2 ++ twitools/__init__.py | 9 +++++++++ 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/followertxt.py b/followertxt.py index 7059b67..4e8deaa 100755 --- a/followertxt.py +++ b/followertxt.py @@ -3,16 +3,6 @@ import twitools, setuptools import os, time, tweepy -def getFollowerIDs(two=twitools.twObject()): - ''' Returns 5,000 follower IDs at most ''' - return two.api.followers_ids(screen_name=twitools.twObject().whoami()) - -def getNamesByIDs(fids=getFollowerIDs(), two=twitools.twObject()): - for page in setuptools.paginate(fids, 100): - followers = two.api.lookup_users(user_ids=page) - for follower in followers: - yield follower.screen_name - def getOutDir(dirname="followers"): if not os.path.isdir(dirname): os.mkdir(dirname) @@ -23,7 +13,7 @@ def getOutFile(dirname="followers"): def writeOutFile(outfile=getOutFile()): with open(getOutFile(), 'a') as f: - for follower in getNamesByIDs(getFollowerIDs()): + for follower in twitools.getNamesByIDs(twitools.getFollowerIDs()): f.write(follower + "\n") if __name__ == "__main__": diff --git a/setup.py b/setup.py index bf7cd21..f02b276 100755 --- a/setup.py +++ b/setup.py @@ -51,6 +51,8 @@ else: if not db.isInitialized(): db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") db.executeQuery("CREATE TABLE messages(`id` INTEGER NOT NULL, `text` TEXT, `sender_id` INTEGER, `recipient_id` INTEGER, `created_at` TEXT, PRIMARY KEY(id));") + db.executeQuery("CREATE TABLE followers(`id` INTEGER NOT NULL, `since` TEXT, `until` TEXT);") + db.executeQuery("CREATE TABLE following(`id` INTEGER NOT NULL, `since` TEXT, `until` TEXT);") db.commit() db.closeConnection() diff --git a/twitools/__init__.py b/twitools/__init__.py index 8de5355..730d7de 100644 --- a/twitools/__init__.py +++ b/twitools/__init__.py @@ -18,3 +18,12 @@ class twObject: def whoami(self): return self.auth.get_username() +def getFollowerIDs(two=twObject()): + ''' Returns 5,000 follower IDs at most ''' + return two.api.followers_ids(screen_name=twObject().whoami()) + +def getNamesByIDs(fids=getFollowerIDs(), two=twObject()): + for page in setuptools.paginate(fids, 100): + followers = two.api.lookup_users(user_ids=page) + for follower in followers: + yield follower.screen_name From 2ebe1e10aedb5791fc2173f360d8a1076bbf1361 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 1 Aug 2016 16:47:10 +0200 Subject: [PATCH 21/57] Use INTEGER as data type for follower/following dates --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index f02b276..c91ae3b 100755 --- a/setup.py +++ b/setup.py @@ -51,8 +51,8 @@ else: if not db.isInitialized(): db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") db.executeQuery("CREATE TABLE messages(`id` INTEGER NOT NULL, `text` TEXT, `sender_id` INTEGER, `recipient_id` INTEGER, `created_at` TEXT, PRIMARY KEY(id));") - db.executeQuery("CREATE TABLE followers(`id` INTEGER NOT NULL, `since` TEXT, `until` TEXT);") - db.executeQuery("CREATE TABLE following(`id` INTEGER NOT NULL, `since` TEXT, `until` TEXT);") + db.executeQuery("CREATE TABLE followers(`id` INTEGER NOT NULL, `since` INTEGER, `until` INTEGER);") + db.executeQuery("CREATE TABLE following(`id` INTEGER NOT NULL, `since` INTEGER, `until` INTEGER);") db.commit() db.closeConnection() From 4255596ecf4a950878cb201deffcd28f192be0cd Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 1 Aug 2016 18:02:11 +0200 Subject: [PATCH 22/57] Implement following/follower monitoring --- dbtools/__init__.py | 14 ++++++++++++++ filler.py | 34 +++++++++++++++++++++++++++++++++- setup.py | 4 ++-- twitools/__init__.py | 3 +++ 4 files changed, 52 insertions(+), 3 deletions(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index f895dfb..a0df836 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -50,6 +50,9 @@ class dbObject: def executeQuery(self, query): return self.cur.execute(query) + def getAll(self): + return self.cur.fetchall() + def getNext(self): return self.cur.fetchone() @@ -71,6 +74,17 @@ class dbObject: self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode) return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode])) + def getFollowers(db): + db.executeQuery("SELECT id FROM followers;") + for i in db.getAll(): + yield i[0] + + def getFollowing(db): + db.executeQuery("SELECT id FROM following;") + for i in db.getAll(): + yield i[0] + + def getLatestMessage(db): db.executeQuery("SELECT max(id) FROM messages") try: diff --git a/filler.py b/filler.py index 25fad8a..00ca02b 100755 --- a/filler.py +++ b/filler.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import dbtools, setuptools, twitools +import dbtools, setuptools, time, twitools def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()): query = "from:" + user @@ -52,8 +52,40 @@ def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): return mcount, savepoint or 0, db.getLatestMessage() +def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject()): + current = db.getFollowers() + new = twitools.getNamesByIDs(twitools.getFollowerIDs()) + + for follower in new: + if follower not in current: + db.executeQuery("INSERT INTO followers VALUES('%s', %i, NULL)" % (follower, int(time.time()))) + + for follower in current: + if follower not in new: + db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` IS NULL" % (int(time.time()), follower)) + + db.commit() + +def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject()): + current = db.getFollowing() + new = twitools.getNamesByIDs(twitools.getFollowingIDs()) + + for following in new: + if following not in current: + db.executeQuery("INSERT INTO following VALUES('%s', %i, NULL)" % (following, int(time.time()))) + + for following in current: + if following not in new: + db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = %s AND `until` IS NULL" % (int(time.time()), following)) + + db.commit() + if __name__ == "__main__": count, last, first = getTweets() print("Stored %i tweets after %i until %i." % (count, first, last)) count, last, first = getMessages() print("Stored %i messages after %i until %i." % (count, first, last)) + getFollowers() + print("Processed followers.") + getFollowing() + print("Processed following.") diff --git a/setup.py b/setup.py index c91ae3b..a21b3e2 100755 --- a/setup.py +++ b/setup.py @@ -51,8 +51,8 @@ else: if not db.isInitialized(): db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") db.executeQuery("CREATE TABLE messages(`id` INTEGER NOT NULL, `text` TEXT, `sender_id` INTEGER, `recipient_id` INTEGER, `created_at` TEXT, PRIMARY KEY(id));") - db.executeQuery("CREATE TABLE followers(`id` INTEGER NOT NULL, `since` INTEGER, `until` INTEGER);") - db.executeQuery("CREATE TABLE following(`id` INTEGER NOT NULL, `since` INTEGER, `until` INTEGER);") + db.executeQuery("CREATE TABLE followers(`id` TEXT NOT NULL, `since` INTEGER NOT NULL, `until` INTEGER);") + db.executeQuery("CREATE TABLE following(`id` TEXT NOT NULL, `since` INTEGER NOT NULL, `until` INTEGER);") db.commit() db.closeConnection() diff --git a/twitools/__init__.py b/twitools/__init__.py index 730d7de..ff18d7a 100644 --- a/twitools/__init__.py +++ b/twitools/__init__.py @@ -22,6 +22,9 @@ def getFollowerIDs(two=twObject()): ''' Returns 5,000 follower IDs at most ''' return two.api.followers_ids(screen_name=twObject().whoami()) +def getFollowingIDs(two=twObject()): + return two.api.friends_ids(screen_name=twObject().whoami()) + def getNamesByIDs(fids=getFollowerIDs(), two=twObject()): for page in setuptools.paginate(fids, 100): followers = two.api.lookup_users(user_ids=page) From c2d9547dcddf41bf74d11a09c9f6c61afd3135ab Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 1 Aug 2016 18:03:07 +0200 Subject: [PATCH 23/57] Kick out followertxt.py which isn't going to be used anymore --- followertxt.py | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100755 followertxt.py diff --git a/followertxt.py b/followertxt.py deleted file mode 100755 index 4e8deaa..0000000 --- a/followertxt.py +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env python3 - -import twitools, setuptools -import os, time, tweepy - -def getOutDir(dirname="followers"): - if not os.path.isdir(dirname): - os.mkdir(dirname) - -def getOutFile(dirname="followers"): - getOutDir(dirname) - return os.path.join(dirname, str(int(time.time())) + ".txt") - -def writeOutFile(outfile=getOutFile()): - with open(getOutFile(), 'a') as f: - for follower in twitools.getNamesByIDs(twitools.getFollowerIDs()): - f.write(follower + "\n") - -if __name__ == "__main__": - writeOutFile() From f72a51fbee71cd8db3936233fbb0b282e7747a67 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 1 Aug 2016 21:46:57 +0200 Subject: [PATCH 24/57] Optimize follower/following handling, print changes when running filler.py --- filler.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/filler.py b/filler.py index 00ca02b..146b5ec 100755 --- a/filler.py +++ b/filler.py @@ -54,38 +54,58 @@ def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject()): current = db.getFollowers() - new = twitools.getNamesByIDs(twitools.getFollowerIDs()) + new = list(twitools.getNamesByIDs(twitools.getFollowerIDs())) + gained = 0 + lost = 0 + + if len(new) == 0: + print("Something went wrong here. -.-") + return 0,0 for follower in new: if follower not in current: db.executeQuery("INSERT INTO followers VALUES('%s', %i, NULL)" % (follower, int(time.time()))) + print("New follower: %s" % follower) + gained += 1 for follower in current: if follower not in new: db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` IS NULL" % (int(time.time()), follower)) + print("Lost follower: %s" % follower) + lost += 1 db.commit() + return gained, lost + def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject()): current = db.getFollowing() - new = twitools.getNamesByIDs(twitools.getFollowingIDs()) + new = list(twitools.getNamesByIDs(twitools.getFollowingIDs())) + gained = 0 + lost = 0 for following in new: if following not in current: db.executeQuery("INSERT INTO following VALUES('%s', %i, NULL)" % (following, int(time.time()))) + print("You started following: %s" % following) + gained += 1 for following in current: if following not in new: db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = %s AND `until` IS NULL" % (int(time.time()), following)) + print("You no longer follow: %s" % following) + lost += 1 db.commit() + + return gained, lost if __name__ == "__main__": count, last, first = getTweets() print("Stored %i tweets after %i until %i." % (count, first, last)) count, last, first = getMessages() print("Stored %i messages after %i until %i." % (count, first, last)) - getFollowers() - print("Processed followers.") - getFollowing() - print("Processed following.") + gained, lost = getFollowers() + print("Gained %i followers, lost %i." % (gained, lost)) + gained, lost = getFollowing() + print("Started following %i, stopped following %i." % (gained, lost)) From 33094915250864f27636ad6de178bea961312894 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 1 Aug 2016 22:07:00 +0200 Subject: [PATCH 25/57] Nobody cares about the tweet IDs. --- filler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/filler.py b/filler.py index 146b5ec..8b9f980 100755 --- a/filler.py +++ b/filler.py @@ -102,9 +102,9 @@ def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject()): if __name__ == "__main__": count, last, first = getTweets() - print("Stored %i tweets after %i until %i." % (count, first, last)) + print("Stored %i tweets." % (count, first, last)) count, last, first = getMessages() - print("Stored %i messages after %i until %i." % (count, first, last)) + print("Stored %i messages." % (count, first, last)) gained, lost = getFollowers() print("Gained %i followers, lost %i." % (gained, lost)) gained, lost = getFollowing() From 5a99261b46dbd449af822801594bc766f1ca4c06 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Tue, 2 Aug 2016 01:17:08 +0200 Subject: [PATCH 26/57] Oh my, I'm an idiot... --- filler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/filler.py b/filler.py index 8b9f980..da3342f 100755 --- a/filler.py +++ b/filler.py @@ -102,9 +102,9 @@ def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject()): if __name__ == "__main__": count, last, first = getTweets() - print("Stored %i tweets." % (count, first, last)) + print("Stored %i tweets." % count) count, last, first = getMessages() - print("Stored %i messages." % (count, first, last)) + print("Stored %i messages." % count) gained, lost = getFollowers() print("Gained %i followers, lost %i." % (gained, lost)) gained, lost = getFollowing() From 403b5cadef53ae467328d28a057e7e252c86fddf Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 3 Aug 2016 13:58:19 +0200 Subject: [PATCH 27/57] getFollowers()/getFollowing() should not return records where 'until' is not null, i.e. records that are no longer valid --- dbtools/__init__.py | 4 ++-- filler.py | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index a0df836..5087cb5 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -75,12 +75,12 @@ class dbObject: return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode])) def getFollowers(db): - db.executeQuery("SELECT id FROM followers;") + db.executeQuery("SELECT id FROM followers WHERE `until` IS NOT NULL;") for i in db.getAll(): yield i[0] def getFollowing(db): - db.executeQuery("SELECT id FROM following;") + db.executeQuery("SELECT id FROM following WHERE `until` IS NOT NULL;") for i in db.getAll(): yield i[0] diff --git a/filler.py b/filler.py index da3342f..657301f 100755 --- a/filler.py +++ b/filler.py @@ -58,10 +58,6 @@ def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject()): gained = 0 lost = 0 - if len(new) == 0: - print("Something went wrong here. -.-") - return 0,0 - for follower in new: if follower not in current: db.executeQuery("INSERT INTO followers VALUES('%s', %i, NULL)" % (follower, int(time.time()))) From 7264708832a6a9830e09605cd501d948cb000d83 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 3 Aug 2016 15:26:31 +0200 Subject: [PATCH 28/57] IS NULL. We need those where 'until' IS NULL. -.- --- dbtools/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index 5087cb5..46d440a 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -75,12 +75,12 @@ class dbObject: return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode])) def getFollowers(db): - db.executeQuery("SELECT id FROM followers WHERE `until` IS NOT NULL;") + db.executeQuery("SELECT id FROM followers WHERE `until` IS NULL;") for i in db.getAll(): yield i[0] def getFollowing(db): - db.executeQuery("SELECT id FROM following WHERE `until` IS NOT NULL;") + db.executeQuery("SELECT id FROM following WHERE `until` IS NULL;") for i in db.getAll(): yield i[0] From 12b34fb7e1648fd55cc367a73ab31fde967e5b5c Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 4 Aug 2016 18:17:42 +0200 Subject: [PATCH 29/57] Set primary keys for followers/following tables --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index a21b3e2..7937aef 100755 --- a/setup.py +++ b/setup.py @@ -51,8 +51,8 @@ else: if not db.isInitialized(): db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") db.executeQuery("CREATE TABLE messages(`id` INTEGER NOT NULL, `text` TEXT, `sender_id` INTEGER, `recipient_id` INTEGER, `created_at` TEXT, PRIMARY KEY(id));") - db.executeQuery("CREATE TABLE followers(`id` TEXT NOT NULL, `since` INTEGER NOT NULL, `until` INTEGER);") - db.executeQuery("CREATE TABLE following(`id` TEXT NOT NULL, `since` INTEGER NOT NULL, `until` INTEGER);") + db.executeQuery("CREATE TABLE followers(`id` TEXT NOT NULL, `since` INTEGER NOT NULL, `until` INTEGER, PRIMARY KEY(id, until));") + db.executeQuery("CREATE TABLE following(`id` TEXT NOT NULL, `since` INTEGER NOT NULL, `until` INTEGER, PRIMARY KEY(id, until));") db.commit() db.closeConnection() From 188be95eb9ee375868810ba8279a3e4634e49650 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 4 Aug 2016 18:17:59 +0200 Subject: [PATCH 30/57] Fix apparent problems with database objects in filler --- filler.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/filler.py b/filler.py index 657301f..8f56c21 100755 --- a/filler.py +++ b/filler.py @@ -24,8 +24,6 @@ def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twit last = status.id tw_counter = tw_counter + 1 - db.closeConnection() - return tw_counter, last, savepoint def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): @@ -97,11 +95,12 @@ def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject()): return gained, lost if __name__ == "__main__": - count, last, first = getTweets() + db = dbtools.dbHelper() + count, last, first = getTweets(db) print("Stored %i tweets." % count) - count, last, first = getMessages() + count, last, first = getMessages(db) print("Stored %i messages." % count) - gained, lost = getFollowers() + gained, lost = getFollowers(db) print("Gained %i followers, lost %i." % (gained, lost)) - gained, lost = getFollowing() + gained, lost = getFollowing(db) print("Started following %i, stopped following %i." % (gained, lost)) From 1730a46ec3ee3f61bd472f94969276ad1474275b Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 5 Aug 2016 23:13:31 +0200 Subject: [PATCH 31/57] Whatever it is that keeps going wrong, I hate it. And I have to try mitigating it. --- filler.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/filler.py b/filler.py index 8f56c21..1fc7514 100755 --- a/filler.py +++ b/filler.py @@ -50,12 +50,16 @@ def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): return mcount, savepoint or 0, db.getLatestMessage() -def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject()): +def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): current = db.getFollowers() new = list(twitools.getNamesByIDs(twitools.getFollowerIDs())) gained = 0 lost = 0 + if (len(current) == 0 or len(new) == 0) and not firstrun: + print("Something went wrong.") + return 0,0 + for follower in new: if follower not in current: db.executeQuery("INSERT INTO followers VALUES('%s', %i, NULL)" % (follower, int(time.time()))) @@ -72,12 +76,16 @@ def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject()): return gained, lost -def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject()): +def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): current = db.getFollowing() new = list(twitools.getNamesByIDs(twitools.getFollowingIDs())) gained = 0 lost = 0 + if (len(current) == 0 or len(new) == 0) and not firstrun: + print("Something went wrong.") + return 0,0 + for following in new: if following not in current: db.executeQuery("INSERT INTO following VALUES('%s', %i, NULL)" % (following, int(time.time()))) From c78f40c179f22846324b4ca6ab75d555b948c8bb Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 5 Aug 2016 23:20:18 +0200 Subject: [PATCH 32/57] Convert generator to list for getFollowers()/getFollowing() Fix SQL query in getFollowing() --- filler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/filler.py b/filler.py index 1fc7514..5782bf9 100755 --- a/filler.py +++ b/filler.py @@ -51,7 +51,7 @@ def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): return mcount, savepoint or 0, db.getLatestMessage() def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): - current = db.getFollowers() + current = list(db.getFollowers()) new = list(twitools.getNamesByIDs(twitools.getFollowerIDs())) gained = 0 lost = 0 @@ -77,7 +77,7 @@ def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False) return gained, lost def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): - current = db.getFollowing() + current = list(db.getFollowing()) new = list(twitools.getNamesByIDs(twitools.getFollowingIDs())) gained = 0 lost = 0 @@ -94,7 +94,7 @@ def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False) for following in current: if following not in new: - db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = %s AND `until` IS NULL" % (int(time.time()), following)) + db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = '%s' AND `until` IS NULL" % (int(time.time()), following)) print("You no longer follow: %s" % following) lost += 1 From 07740b880bb55823955133d981cec6241b4ce9fe Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Sun, 7 Aug 2016 17:11:22 +0200 Subject: [PATCH 33/57] Make sure that filler won't fail on first run because of empty data sets --- filler.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/filler.py b/filler.py index 5782bf9..499c352 100755 --- a/filler.py +++ b/filler.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import dbtools, setuptools, time, twitools +import argparse, dbtools, setuptools, time, twitools def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()): query = "from:" + user @@ -103,12 +103,15 @@ def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False) return gained, lost if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-f", "--first", help="first run: ignore empty databases", action="store_true") + args = parser.parse_args() db = dbtools.dbHelper() count, last, first = getTweets(db) print("Stored %i tweets." % count) count, last, first = getMessages(db) print("Stored %i messages." % count) - gained, lost = getFollowers(db) + gained, lost = getFollowers(db, firstrun=args.first) print("Gained %i followers, lost %i." % (gained, lost)) - gained, lost = getFollowing(db) + gained, lost = getFollowing(db, firstrun=args.first) print("Started following %i, stopped following %i." % (gained, lost)) From 25f82e40f8fe8ca9292861f2caa6c8ddffdbe948 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Tue, 9 Aug 2016 00:19:29 +0200 Subject: [PATCH 34/57] Forgot that NULL values are actually always seen as distinct values in SQL. Why does SQLite even allow NULL in PRIMARY KEY? --- dbtools/__init__.py | 4 ++-- filler.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index 46d440a..e616238 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -75,12 +75,12 @@ class dbObject: return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode])) def getFollowers(db): - db.executeQuery("SELECT id FROM followers WHERE `until` IS NULL;") + db.executeQuery("SELECT id FROM followers WHERE `until` = 0;") for i in db.getAll(): yield i[0] def getFollowing(db): - db.executeQuery("SELECT id FROM following WHERE `until` IS NULL;") + db.executeQuery("SELECT id FROM following WHERE `until` = 0;") for i in db.getAll(): yield i[0] diff --git a/filler.py b/filler.py index 499c352..ccfe413 100755 --- a/filler.py +++ b/filler.py @@ -62,13 +62,13 @@ def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False) for follower in new: if follower not in current: - db.executeQuery("INSERT INTO followers VALUES('%s', %i, NULL)" % (follower, int(time.time()))) + db.executeQuery("INSERT INTO followers VALUES('%s', %i, 0)" % (follower, int(time.time()))) print("New follower: %s" % follower) gained += 1 for follower in current: if follower not in new: - db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` IS NULL" % (int(time.time()), follower)) + db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), follower)) print("Lost follower: %s" % follower) lost += 1 @@ -88,13 +88,13 @@ def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False) for following in new: if following not in current: - db.executeQuery("INSERT INTO following VALUES('%s', %i, NULL)" % (following, int(time.time()))) + db.executeQuery("INSERT INTO following VALUES('%s', %i, 0)" % (following, int(time.time()))) print("You started following: %s" % following) gained += 1 for following in current: if following not in new: - db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = '%s' AND `until` IS NULL" % (int(time.time()), following)) + db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), following)) print("You no longer follow: %s" % following) lost += 1 From 09ca89991839ecef5c43b3bd16a7d7ea414eba9e Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Tue, 6 Dec 2016 21:49:13 +0100 Subject: [PATCH 35/57] Add table for retweets --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 1f2c445..37a10a7 100755 --- a/setup.py +++ b/setup.py @@ -28,6 +28,7 @@ else: conn = sqlite3.connect(dbpath) cur = conn.cursor() cur.execute("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") + cur.execute("CREATE TABLE IF NOT EXISTS retweets(id INT PRIMARY KEY, author VARCHAR(30), created_at VARCHAR(30), text TEXT);") conn.commit() conn.close() From 35b2771e38e93188e068802b12bcf939d272c8a6 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Tue, 6 Dec 2016 21:50:41 +0100 Subject: [PATCH 36/57] Finally add gitignore... --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..56dc96b --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.swp +Database.db +__pycache__ +config.cfg From e774e54a01bb60234a78b10fa3deb5c177c76752 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Tue, 6 Dec 2016 22:25:24 +0100 Subject: [PATCH 37/57] Add #mentionchallenge tool --- extras/challenge.py | 46 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100755 extras/challenge.py diff --git a/extras/challenge.py b/extras/challenge.py new file mode 100755 index 0000000..2fa4677 --- /dev/null +++ b/extras/challenge.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 + +import tools + +import operator, re, sys + +def getTweets(path, mon, mode = "@"): + db = tools.dbHelper(path) + + handles = dict() + tweets = db.executeQuery("SELECT text FROM tweets WHERE SUBSTR(timestamp,0,11)>='%s-01' AND SUBSTR(timestamp,0,11)<='%s-31'" % (mon, mon)) + + for tweet in tweets: + for word in tweet[0].lower().split(): + if word[0] == mode or mode == "": + if mode == "": + handle = word + else: + handle = mode + re.split('[\\W]',word[1:])[0].lower() + if handle != mode: + try: + handles[handle] += 1 + except KeyError: + handles[handle] = 1 + + return handles + +if __name__ == "__main__": + mode = "@" + path = tools.dbpath() + mon = "2016-03" + + if len(sys.argv) > 1: + if len(sys.argv) > 3 or (len(sys.argv) == 3 and "-h" not in sys.argv): + raise ValueError("Invalid arguments passed.") + + for arg in sys.argv[1:]: + if arg == "-h": + mode = "#" + if arg == "-w": + mode = "" + else: + mon = arg + + for handle, tweets in sorted(list(getTweets(path,mon,mode).items()), key=operator.itemgetter(1), reverse=True): + print(handle + "," + str(tweets)) From 2fba8e12b5e7d004b2ccbe341af25c73e0e9a2c7 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Tue, 6 Dec 2016 22:31:45 +0100 Subject: [PATCH 38/57] Add retweet method --- twitools/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/twitools/__init__.py b/twitools/__init__.py index ff18d7a..e3ed7a3 100644 --- a/twitools/__init__.py +++ b/twitools/__init__.py @@ -7,6 +7,9 @@ class twObject: self.auth.set_access_token(ato, ase) self.api = tweepy.API(self.auth) + def retweet(self, id): + self.api.retweet(id) + def delete(self, id): self.api.destroy_status(id) From 81c7f7309bd0dfb6b4d0a757f774197eaa3aa92b Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Tue, 6 Dec 2016 22:42:06 +0100 Subject: [PATCH 39/57] Add retweeter, aka erdlofbot --- retweeter.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 retweeter.py diff --git a/retweeter.py b/retweeter.py new file mode 100644 index 0000000..1257c6c --- /dev/null +++ b/retweeter.py @@ -0,0 +1,41 @@ +import argparse, tools + +def getSavepoint(db, user): + db.executeQuery("SELECT max(tweet_id) FROM retweets WHERE author = '%s'" % user) + try: + return int(db.getNext()[0]) + except: + print("No tweets from %s stored yet." % user) + return 0 + +def retweet(user, two=tools.twObject(), db=tools.dbHelper(tools.dbpath())): + query = "from:" + user + + savepoint = getSavepoint(db, user) + last = savepoint + + timeline = two.search(query, savepoint) + + tw_counter = 0 + + for status in timeline: + if status.text[0] != "@" or two.whoami() in status.text: + timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" + text = unescapeText(status.text) + + db.executeQuery("INSERT INTO retweets('id','created_at','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") + db.commit() + + two.retweet(status.id) + + last = status.id + tw_counter = tw_counter + 1 + + db.closeConnection() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("user", help="handle (user name) of the source user") + user = parser.parse_args().user + retweet(user if user[0] != "@" else user[1:]) From 4272db0ac14b964fdfb3c1e3e337d685920adbd1 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 7 Dec 2016 12:12:17 +0100 Subject: [PATCH 40/57] Author handling in retweeter --- retweeter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/retweeter.py b/retweeter.py index 1257c6c..d223fbd 100644 --- a/retweeter.py +++ b/retweeter.py @@ -1,7 +1,7 @@ import argparse, tools def getSavepoint(db, user): - db.executeQuery("SELECT max(tweet_id) FROM retweets WHERE author = '%s'" % user) + db.executeQuery("SELECT MAX(tweet_id) FROM retweets WHERE LOWER(author) = '%s'" % user.lower()) try: return int(db.getNext()[0]) except: @@ -19,11 +19,11 @@ def retweet(user, two=tools.twObject(), db=tools.dbHelper(tools.dbpath())): tw_counter = 0 for status in timeline: - if status.text[0] != "@" or two.whoami() in status.text: + if (status.text[0] != "@" or two.whoami() in status.text) and status.text[0:2] != "RT": timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" text = unescapeText(status.text) - db.executeQuery("INSERT INTO retweets('id','created_at','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") + db.executeQuery("INSERT INTO retweets('id','author','created_at','text') VALUES(" + str(status.id) + ",'" + user.lower() + "','" + timestamp + "','" + text + "')") db.commit() two.retweet(status.id) From b72d94e2cc812672997ad0f18de38450632ffb97 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 7 Dec 2016 12:12:35 +0100 Subject: [PATCH 41/57] Make retweeter executable --- retweeter.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 retweeter.py diff --git a/retweeter.py b/retweeter.py old mode 100644 new mode 100755 From 09319bd40abf12530af2b155d3595350b0c3370f Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 7 Dec 2016 12:13:47 +0100 Subject: [PATCH 42/57] Oh well. --- retweeter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/retweeter.py b/retweeter.py index d223fbd..8b0d7eb 100755 --- a/retweeter.py +++ b/retweeter.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import argparse, tools def getSavepoint(db, user): From f39d6aefa6a7ca94f76f13a8388d00b4e4de31f3 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 7 Dec 2016 12:16:19 +0100 Subject: [PATCH 43/57] Cleaning up some stuff... --- retweeter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/retweeter.py b/retweeter.py index 8b0d7eb..0ca2b0a 100755 --- a/retweeter.py +++ b/retweeter.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import argparse, tools +import argparse, dbtools, setuptools, twitools def getSavepoint(db, user): db.executeQuery("SELECT MAX(tweet_id) FROM retweets WHERE LOWER(author) = '%s'" % user.lower()) @@ -10,7 +10,7 @@ def getSavepoint(db, user): print("No tweets from %s stored yet." % user) return 0 -def retweet(user, two=tools.twObject(), db=tools.dbHelper(tools.dbpath())): +def retweet(user, two=twitools.twObject(), db=dbtools.dbHelper()): query = "from:" + user savepoint = getSavepoint(db, user) From 057dc64a491309febad4dfce7978696c96541712 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 7 Dec 2016 12:16:57 +0100 Subject: [PATCH 44/57] Fix column name --- retweeter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retweeter.py b/retweeter.py index 0ca2b0a..c842bce 100755 --- a/retweeter.py +++ b/retweeter.py @@ -3,7 +3,7 @@ import argparse, dbtools, setuptools, twitools def getSavepoint(db, user): - db.executeQuery("SELECT MAX(tweet_id) FROM retweets WHERE LOWER(author) = '%s'" % user.lower()) + db.executeQuery("SELECT MAX(id) FROM retweets WHERE LOWER(author) = '%s'" % user.lower()) try: return int(db.getNext()[0]) except: From a2bff5c6d7d3f15a3daf8270de9f30ab0ca62925 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 7 Dec 2016 12:20:00 +0100 Subject: [PATCH 45/57] Fix function call --- retweeter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retweeter.py b/retweeter.py index c842bce..82697a1 100755 --- a/retweeter.py +++ b/retweeter.py @@ -23,7 +23,7 @@ def retweet(user, two=twitools.twObject(), db=dbtools.dbHelper()): for status in timeline: if (status.text[0] != "@" or two.whoami() in status.text) and status.text[0:2] != "RT": timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" - text = unescapeText(status.text) + text = setuptools.unescapeText(status.text) db.executeQuery("INSERT INTO retweets('id','author','created_at','text') VALUES(" + str(status.id) + ",'" + user.lower() + "','" + timestamp + "','" + text + "')") db.commit() From 2a28589c6e7eab6c9154f516d88b3cda79b179a6 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 7 Dec 2016 13:56:48 +0100 Subject: [PATCH 46/57] Give retweet function a return value --- retweeter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/retweeter.py b/retweeter.py index 82697a1..e5d5801 100755 --- a/retweeter.py +++ b/retweeter.py @@ -34,7 +34,7 @@ def retweet(user, two=twitools.twObject(), db=dbtools.dbHelper()): tw_counter = tw_counter + 1 db.closeConnection() - + return tw_counter if __name__ == "__main__": parser = argparse.ArgumentParser() From e1a8067593e5e004e1b8690e90e83a1ad9e270ca Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 7 Dec 2016 14:12:26 +0100 Subject: [PATCH 47/57] Add latest challenge --- extras/challenge.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/extras/challenge.py b/extras/challenge.py index 2fa4677..f9a6d6a 100755 --- a/extras/challenge.py +++ b/extras/challenge.py @@ -1,15 +1,13 @@ #!/usr/bin/env python3 -import tools +import dbtools, setuptools import operator, re, sys -def getTweets(path, mon, mode = "@"): - db = tools.dbHelper(path) - +def getTweets(mon, mode = "@", db = dbtools.dbHelper()): handles = dict() tweets = db.executeQuery("SELECT text FROM tweets WHERE SUBSTR(timestamp,0,11)>='%s-01' AND SUBSTR(timestamp,0,11)<='%s-31'" % (mon, mon)) - + for tweet in tweets: for word in tweet[0].lower().split(): if word[0] == mode or mode == "": @@ -27,10 +25,9 @@ def getTweets(path, mon, mode = "@"): if __name__ == "__main__": mode = "@" - path = tools.dbpath() - mon = "2016-03" + mon = "2016-07" - if len(sys.argv) > 1: + if len(sys.argv) > 1: if len(sys.argv) > 3 or (len(sys.argv) == 3 and "-h" not in sys.argv): raise ValueError("Invalid arguments passed.") @@ -42,5 +39,6 @@ if __name__ == "__main__": else: mon = arg - for handle, tweets in sorted(list(getTweets(path,mon,mode).items()), key=operator.itemgetter(1), reverse=True): + for handle, tweets in sorted(list(getTweets(mon,mode).items()), key=operator.itemgetter(1), reverse=True): print(handle + "," + str(tweets)) + From 47a0b7f7d47140ea97f7c50b328bf69b416f9726 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 7 Dec 2016 14:16:42 +0100 Subject: [PATCH 48/57] Move deleter to extras --- deleter.py => extras/deleter.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename deleter.py => extras/deleter.py (100%) diff --git a/deleter.py b/extras/deleter.py similarity index 100% rename from deleter.py rename to extras/deleter.py From 15ddbcc0bd75376d15f28b3918da777fce0933c3 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 23 Dec 2016 17:41:24 +0100 Subject: [PATCH 49/57] Add function for sending a tweet --- twitools/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/twitools/__init__.py b/twitools/__init__.py index e3ed7a3..a5686f4 100644 --- a/twitools/__init__.py +++ b/twitools/__init__.py @@ -21,6 +21,9 @@ class twObject: def whoami(self): return self.auth.get_username() + def tweet(self, text, reply = 0): + return self.api.update_status(text, reply) + def getFollowerIDs(two=twObject()): ''' Returns 5,000 follower IDs at most ''' return two.api.followers_ids(screen_name=twObject().whoami()) From b73becd23fdcdff6e870082eabb9f87470d60e56 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 27 Jan 2017 21:54:59 +0100 Subject: [PATCH 50/57] Move stuff out of that extras folder. Need to think that over... --- extras/challenge.py => challenge.py | 0 extras/deleter.py => deleter.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename extras/challenge.py => challenge.py (100%) rename extras/deleter.py => deleter.py (100%) diff --git a/extras/challenge.py b/challenge.py similarity index 100% rename from extras/challenge.py rename to challenge.py diff --git a/extras/deleter.py b/deleter.py similarity index 100% rename from extras/deleter.py rename to deleter.py From f769f9ac2684c0cc3f1bfe35a4392af3bdb64e73 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 27 Jan 2017 22:02:53 +0100 Subject: [PATCH 51/57] Added some comments JFF --- csvdb.py | 7 +++++++ filler.py | 10 ++++++++++ 2 files changed, 17 insertions(+) diff --git a/csvdb.py b/csvdb.py index 84b69d5..36d08e0 100755 --- a/csvdb.py +++ b/csvdb.py @@ -5,6 +5,13 @@ import dbtools import sqlite3, csv, sys def makeDB(dbo=dbtools.dbHelper(), infile='tweets.csv'): + """ + Initializes the database. + + :param dbo: Database object for the database to be initialized. + :param infile: Path of the CSV file to initalize the database with. + :return: Returns nothing. + """ try: infile = open(infile) except IOError: diff --git a/filler.py b/filler.py index ccfe413..298e61e 100755 --- a/filler.py +++ b/filler.py @@ -51,6 +51,8 @@ def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): return mcount, savepoint or 0, db.getLatestMessage() def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): + """ Get handles of users we are following. :param db: Database object to be used. :param two: Twitter object to be used. :param firstrun: Must be set to True if the function is executed for the first time. Defaults to False. :return: Returns the number of gained and lost followings in a list (gained, lost). """ + current = list(db.getFollowers()) new = list(twitools.getNamesByIDs(twitools.getFollowerIDs())) gained = 0 @@ -77,6 +79,14 @@ def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False) return gained, lost def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): + """ + Get handles of users we are following. + + :param db: Database object to be used. + :param two: Twitter object to be used. + :param firstrun: Must be set to True if the function is executed for the first time. Defaults to False. + :return: Returns the number of gained and lost followings in a list (gained, lost). + """ current = list(db.getFollowing()) new = list(twitools.getNamesByIDs(twitools.getFollowingIDs())) gained = 0 From 6f255e550a52fd758911b7bf8664d0101f2adf8c Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 27 Jan 2017 22:20:34 +0100 Subject: [PATCH 52/57] Add lyrics table to database --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2e9594c..a4a1fb2 100755 --- a/setup.py +++ b/setup.py @@ -53,7 +53,8 @@ if not db.isInitialized(): db.executeQuery("CREATE TABLE messages(`id` INTEGER NOT NULL, `text` TEXT, `sender_id` INTEGER, `recipient_id` INTEGER, `created_at` TEXT, PRIMARY KEY(id));") db.executeQuery("CREATE TABLE followers(`id` TEXT NOT NULL, `since` INTEGER NOT NULL, `until` INTEGER, PRIMARY KEY(id, until));") db.executeQuery("CREATE TABLE following(`id` TEXT NOT NULL, `since` INTEGER NOT NULL, `until` INTEGER, PRIMARY KEY(id, until));") - db.executeQuery("CREATE TABLE IF NOT EXISTS retweets(id INT PRIMARY KEY, author VARCHAR(30), created_at VARCHAR(30), text TEXT);") + db.executeQuery("CREATE TABLE retweets(id INT PRIMARY KEY, author VARCHAR(30), created_at VARCHAR(30), text TEXT);") + db.executeQuery("CREATE TABLE lyrics(id INTEGER PRIMARY KEY AUTOINCREMENT, text VARCHAR(140) NOT NULL, ref INT NOT NULL default '0', tweet_id INT, active BOOLEAN default '0');") db.commit() From 54322b716562dc96741a5aa7b570f5d2f7b6ce25 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 27 Jan 2017 23:25:00 +0100 Subject: [PATCH 53/57] Apparently actually got a working lyricsbot... --- lyricsbot.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100755 lyricsbot.py diff --git a/lyricsbot.py b/lyricsbot.py new file mode 100755 index 0000000..6da83b1 --- /dev/null +++ b/lyricsbot.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 + +import dbtools, twitools +import argparse, operator, random, re, sys + +def getLyrics(db = dbtools.dbHelper()): + lyrics = db.executeQuery("SELECT id, text, ref FROM lyrics WHERE active = 1") + lyric = random.sample(list(lyrics), 1)[0] + ref = list(db.executeQuery("SELECT tweet_id FROM lyrics WHERE id = %s" % lyric[2]))[0][0] if not int(lyric[2]) == 0 else 0 + + return lyric[0], lyric[1], ref + +def postprocess(lid, tid, db = dbtools.dbHelper()): + db.executeQuery("UPDATE lyrics SET tweet_id = %i WHERE id = %i;" % (int(tid), int(lid))) + db.executeQuery("UPDATE lyrics SET active = 1 WHERE ref = %i;" % int(lid)) + db.executeQuery("UPDATE lyrics SET active = 0 WHERE id = %i;" % int(lid)) + db.commit() + +def tweet(text, ref = 0, two = twitools.twObject()): + return two.tweet(text, ref).id + +if __name__ == "__main__": + lid, text, ref = getLyrics() + postprocess(lid, tweet(text, ref)) From ed6f74281cf459c7fc9bd3cae8d4dd5e86bacb9d Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Fri, 27 Jan 2017 23:44:52 +0100 Subject: [PATCH 54/57] Reactivate lyrics when reaching dead end --- lyricsbot.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lyricsbot.py b/lyricsbot.py index 6da83b1..5a0e762 100755 --- a/lyricsbot.py +++ b/lyricsbot.py @@ -10,10 +10,18 @@ def getLyrics(db = dbtools.dbHelper()): return lyric[0], lyric[1], ref +def findParent(lid, db = dbtools.dbHelper()): + ref = int(list(db.executeQuery("SELECT ref FROM lyrics WHERE id = %i;" % int(lid)))[0][0]) + return lid if ref == 0 else findParent(ref) + def postprocess(lid, tid, db = dbtools.dbHelper()): db.executeQuery("UPDATE lyrics SET tweet_id = %i WHERE id = %i;" % (int(tid), int(lid))) - db.executeQuery("UPDATE lyrics SET active = 1 WHERE ref = %i;" % int(lid)) db.executeQuery("UPDATE lyrics SET active = 0 WHERE id = %i;" % int(lid)) + + if list(db.executeQuery("SELECT COUNT(*) FROM lyrics WHERE ref = %i;" % int(lid)))[0][0] == 0: + db.executeQuery("UPDATE lyrics SET active = 1 WHERE id = %i;" % findParent(lid)) + else: + db.executeQuery("UPDATE lyrics SET active = 1 WHERE ref = %i;" % int(lid)) db.commit() def tweet(text, ref = 0, two = twitools.twObject()): From 9c3fdb29db1d11c0f455c07e7c0b37048f3c6801 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 30 Jan 2017 03:04:51 +0100 Subject: [PATCH 55/57] Store followers/followings as user IDs rather than handles. Store handles in separate table. --- dbtools/__init__.py | 13 ++++++++++-- filler.py | 47 ++++++++++++++++++++++---------------------- twitools/__init__.py | 12 ++++++++--- 3 files changed, 44 insertions(+), 28 deletions(-) diff --git a/dbtools/__init__.py b/dbtools/__init__.py index e616238..95d508e 100644 --- a/dbtools/__init__.py +++ b/dbtools/__init__.py @@ -77,12 +77,12 @@ class dbObject: def getFollowers(db): db.executeQuery("SELECT id FROM followers WHERE `until` = 0;") for i in db.getAll(): - yield i[0] + yield int(i[0]) def getFollowing(db): db.executeQuery("SELECT id FROM following WHERE `until` = 0;") for i in db.getAll(): - yield i[0] + yield int(i[0]) def getLatestMessage(db): @@ -99,6 +99,15 @@ class dbObject: except: return 0 + def matchNameID(db, name, id): + db.executeQuery("SELECT COUNT(*) FROM names WHERE id = '%s' AND name = '%s' AND until = 0;" % (id, name)) + try: + if int(db.getNext()[0]) != 0: + return True + except: + pass + return False + def dbHelper(): if setuptools.dbtype() == SQLITE: return dbObject(dbtype=SQLITE, path=setuptools.dbpath()) diff --git a/filler.py b/filler.py index 298e61e..bab2d54 100755 --- a/filler.py +++ b/filler.py @@ -48,13 +48,11 @@ def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): db.commit() - return mcount, savepoint or 0, db.getLatestMessage() + return mcount, savepoint or 0, db.getLatestMessage def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): - """ Get handles of users we are following. :param db: Database object to be used. :param two: Twitter object to be used. :param firstrun: Must be set to True if the function is executed for the first time. Defaults to False. :return: Returns the number of gained and lost followings in a list (gained, lost). """ - current = list(db.getFollowers()) - new = list(twitools.getNamesByIDs(twitools.getFollowerIDs())) + new = list(twitools.getFollowerIDs()) gained = 0 lost = 0 @@ -64,31 +62,23 @@ def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False) for follower in new: if follower not in current: - db.executeQuery("INSERT INTO followers VALUES('%s', %i, 0)" % (follower, int(time.time()))) - print("New follower: %s" % follower) + db.executeQuery("INSERT INTO followers VALUES('%s', %i, 0)" % (str(follower), int(time.time()))) + db.commit() + print("New follower: %s" % (twitools.getNameByID(follower) if not firstrun else follower)) gained += 1 for follower in current: if follower not in new: - db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), follower)) - print("Lost follower: %s" % follower) + db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), str(follower))) + db.commit() + print("Lost follower: %s" % twitools.getNameByID(follower)) lost += 1 - db.commit() - return gained, lost def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): - """ - Get handles of users we are following. - - :param db: Database object to be used. - :param two: Twitter object to be used. - :param firstrun: Must be set to True if the function is executed for the first time. Defaults to False. - :return: Returns the number of gained and lost followings in a list (gained, lost). - """ current = list(db.getFollowing()) - new = list(twitools.getNamesByIDs(twitools.getFollowingIDs())) + new = list(twitools.getFollowingIDs()) gained = 0 lost = 0 @@ -98,20 +88,29 @@ def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False) for following in new: if following not in current: - db.executeQuery("INSERT INTO following VALUES('%s', %i, 0)" % (following, int(time.time()))) - print("You started following: %s" % following) + db.executeQuery("INSERT INTO following VALUES('%s', %i, 0)" % (str(following), int(time.time()))) + db.commit() + print("You started following: %s" % (str(following) if not firstrun else following)) gained += 1 for following in current: if following not in new: - db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), following)) - print("You no longer follow: %s" % following) + db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), str(following))) + db.commit() + print("You no longer follow: %s" % twitools.getNameByID(following)) lost += 1 db.commit() return gained, lost +def getNames(db = dbtools.dbHelper(), two = twitools.twObject()): + for user in twitools.getNamesByIDs(list(set(list(db.getFollowing()) + list(db.getFollowers())))): + if not db.matchNameID(user["name"], user["id"]): + db.executeQuery("UPDATE names SET `until` = %i WHERE `id` = '%s' AND `name` = '%s';" % (int(time.time()), str(user["id"]), str(user["name"]))) + db.executeQuery("INSERT INTO names VALUES('%s', '%s', %i, 0)" % (str(user["id"]), str(user["name"]), int(time.time()))) + db.commit() + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-f", "--first", help="first run: ignore empty databases", action="store_true") @@ -125,3 +124,5 @@ if __name__ == "__main__": print("Gained %i followers, lost %i." % (gained, lost)) gained, lost = getFollowing(db, firstrun=args.first) print("Started following %i, stopped following %i." % (gained, lost)) + getNames(db) + print("Stored handles of following/followers.") diff --git a/twitools/__init__.py b/twitools/__init__.py index a5686f4..f69ef2b 100644 --- a/twitools/__init__.py +++ b/twitools/__init__.py @@ -26,13 +26,19 @@ class twObject: def getFollowerIDs(two=twObject()): ''' Returns 5,000 follower IDs at most ''' - return two.api.followers_ids(screen_name=twObject().whoami()) + for id in list(two.api.followers_ids(screen_name=twObject().whoami())): + yield int(id) def getFollowingIDs(two=twObject()): - return two.api.friends_ids(screen_name=twObject().whoami()) + for id in list(two.api.friends_ids(screen_name=twObject().whoami())): + yield int(id) + +def getNameByID(uid, two=twObject()): + return two.api.get_user(uid).screen_name def getNamesByIDs(fids=getFollowerIDs(), two=twObject()): for page in setuptools.paginate(fids, 100): followers = two.api.lookup_users(user_ids=page) for follower in followers: - yield follower.screen_name + yield {"id": follower.id, "name": follower.screen_name} + From be5f819f8d513cb03523460cb57778410c93b105 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 30 Jan 2017 03:06:24 +0100 Subject: [PATCH 56/57] Add table for user name/user ID combinations --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index a4a1fb2..77e20c3 100755 --- a/setup.py +++ b/setup.py @@ -53,6 +53,7 @@ if not db.isInitialized(): db.executeQuery("CREATE TABLE messages(`id` INTEGER NOT NULL, `text` TEXT, `sender_id` INTEGER, `recipient_id` INTEGER, `created_at` TEXT, PRIMARY KEY(id));") db.executeQuery("CREATE TABLE followers(`id` TEXT NOT NULL, `since` INTEGER NOT NULL, `until` INTEGER, PRIMARY KEY(id, until));") db.executeQuery("CREATE TABLE following(`id` TEXT NOT NULL, `since` INTEGER NOT NULL, `until` INTEGER, PRIMARY KEY(id, until));") + db.executeQuery("CREATE TABLE names(`id` TEXT NOT NULL, `name` TEXT NOT NULL, `since` INTEGER NOT NULL, `until` INTEGER, PRIMARY KEY(id, until));") db.executeQuery("CREATE TABLE retweets(id INT PRIMARY KEY, author VARCHAR(30), created_at VARCHAR(30), text TEXT);") db.executeQuery("CREATE TABLE lyrics(id INTEGER PRIMARY KEY AUTOINCREMENT, text VARCHAR(140) NOT NULL, ref INT NOT NULL default '0', tweet_id INT, active BOOLEAN default '0');") From ec4f06321a9d909263fdae31f86c9094363dd14a Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 6 Feb 2017 14:49:41 +0100 Subject: [PATCH 57/57] Fixed SQL query to properly handle changed handles --- filler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/filler.py b/filler.py index bab2d54..10d94f5 100755 --- a/filler.py +++ b/filler.py @@ -107,7 +107,7 @@ def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False) def getNames(db = dbtools.dbHelper(), two = twitools.twObject()): for user in twitools.getNamesByIDs(list(set(list(db.getFollowing()) + list(db.getFollowers())))): if not db.matchNameID(user["name"], user["id"]): - db.executeQuery("UPDATE names SET `until` = %i WHERE `id` = '%s' AND `name` = '%s';" % (int(time.time()), str(user["id"]), str(user["name"]))) + db.executeQuery("UPDATE names SET `until` = %i WHERE `id` = '%s';" % (int(time.time()), str(user["id"]))) db.executeQuery("INSERT INTO names VALUES('%s', '%s', %i, 0)" % (str(user["id"]), str(user["name"]), int(time.time()))) db.commit()