diff --git a/csvdb.py b/csvdb.py index 153f413..84b69d5 100755 --- a/csvdb.py +++ b/csvdb.py @@ -1,35 +1,25 @@ #!/usr/bin/env python3 -import tools +import dbtools import sqlite3, csv, sys -def makeDB(path=tools.dbpath()): +def makeDB(dbo=dbtools.dbHelper(), infile='tweets.csv'): try: - infile = open('tweets.csv') + infile = open(infile) except IOError: - raise IOError("Please make sure that the tweets.csv from the Twitter download is located in this directory.") + raise IOError("Unable to read %s." % infile) - input = list(csv.reader(infile)) + infile = list(csv.reader(infile)) - conn = sqlite3.connect(path) - cur = conn.cursor() - - try: - cur.execute("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") - except sqlite3.OperationalError: - raise IOError("%s already exists. Please delete it before trying to create a new one." % path) + for row in infile[1:]: + try: + dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');") + except: + pass - for row in input[1:]: - cur.execute("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');") - - conn.commit() + dbo.commit() if __name__ == "__main__": - if len(sys.argv) > 2: - raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.") - try: - makeDB(sys.argv[1]) - except IndexError: - makeDB() + makeDB() diff --git a/dbtools/__init__.py b/dbtools/__init__.py new file mode 100644 index 0000000..e616238 --- /dev/null +++ b/dbtools/__init__.py @@ -0,0 +1,108 @@ +import setuptools +import sqlite3, pymysql, pymysql.cursors + +SQLITE = 0 +MYSQL = 1 +MARIADB = MYSQL + +MIN = 0 +MAX = 1 + +class dbObject: + +# --------------------------------------------- Initialization ------------------------------------------------- + + def initMySQL(self, host, port, user, pwd, db): + self.conn = pymysql.connect(host = host, port = port, user = user, password = pwd, db = db, charset = "utf8mb4", cursorclass = pymysql.cursors.DictCursor) + self.cur = self.conn.cursor() + self.dbtype = MYSQL + self.host = host + self.port = port + self.user = user + self.pwd = pwd + self.db = db + + def initSQLite(self, path): + self.conn = sqlite3.connect(path) + self.cur = self.conn.cursor() + self.dbtype = SQLITE + self.path = path + + def __init__(self, dbtype = SQLITE, path = None, host = None, port = None, user = None, pwd = None, db = None): + + if dbtype == SQLITE: + self.initSQLite(path or 'Database.db') + + elif dbtype == MYSQL: + self.initMySQL(host or 'localhost', port or 3306, user, pwd, db) + + else: + raise ValueError("Unknown database type %s." % str(dbtype)) + +# ---------------------------------------------- No more initialization ---------------------------------------- + + def closeConnection(self): + return self.conn.close() + + def commit(self): + return self.conn.commit() + + def executeQuery(self, query): + return self.cur.execute(query) + + def getAll(self): + return self.cur.fetchall() + + def getNext(self): + return self.cur.fetchone() + + def isInitialized(self): + try: + self.executeQuery("SELECT * FROM tweets") + return True + except: + return False + + def getFLDate(self, val = MIN): + if val == MIN: + mode = "MIN" + else: + mode = "MAX" + if self.dbtype == SQLITE: + return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) + else: + self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode) + return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode])) + + def getFollowers(db): + db.executeQuery("SELECT id FROM followers WHERE `until` = 0;") + for i in db.getAll(): + yield i[0] + + def getFollowing(db): + db.executeQuery("SELECT id FROM following WHERE `until` = 0;") + for i in db.getAll(): + yield i[0] + + + def getLatestMessage(db): + db.executeQuery("SELECT max(id) FROM messages") + try: + return int(db.getNext()[0]) + except: + return 0 + + def getLatestTweet(db): + db.executeQuery("SELECT max(tweet_id) FROM tweets") + try: + return int(db.getNext()[0]) + except: + return 0 + +def dbHelper(): + if setuptools.dbtype() == SQLITE: + return dbObject(dbtype=SQLITE, path=setuptools.dbpath()) + elif setuptools.dbtype() == MYSQL: + return dbObject(dbtype=MYSQL, host=setuptools.dbhost(), user=setuptools.dbuser(), pwd=setuptools.dbpass(), db=setuptools.dbname()) + else: + raise setuptools.SetupException() diff --git a/tweleter.py b/deleter.py similarity index 93% rename from tweleter.py rename to deleter.py index 274cfdc..38342bf 100755 --- a/tweleter.py +++ b/deleter.py @@ -1,10 +1,9 @@ #!/usr/bin/env python3 -import tools - +import twitools import tkinter, tkinter.messagebox, html.parser, os -two = tools.twObject() +two = twitools.twObject() top = tkinter.Tk() top.title("Tweet Deleter") scrollbar = tkinter.Scrollbar(top) @@ -41,7 +40,7 @@ def addStatus(id, text): list.insert(0, element.encode("UTF-8")) def getTweets(): - query = "from:" + tools.user() + query = "from:" + twitools.twObject().whoami() try: timeline = two.search(query, 0) diff --git a/filler.py b/filler.py index 8ef8710..ccfe413 100755 --- a/filler.py +++ b/filler.py @@ -1,26 +1,10 @@ #!/usr/bin/env python3 -import tools +import argparse, dbtools, setuptools, time, twitools -import html.parser, os - -def getSavepoint(db): - db.executeQuery("SELECT max(tweet_id) FROM tweets") - try: - return int(db.getNext()[0]) - except: - print("No tweets stored yet.") - return 0 - -def unescapeText(text): - return html.parser.HTMLParser().unescape(text).replace("'","''") - -def fill(dbpath=tools.dbpath(), user=tools.user(), two=tools.twObject()): +def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()): query = "from:" + user - - db = tools.dbHelper(dbpath) - - savepoint = getSavepoint(db) + savepoint = db.getLatestTweet() + 1 last = savepoint timeline = two.search(query, savepoint) @@ -29,18 +13,105 @@ def fill(dbpath=tools.dbpath(), user=tools.user(), two=tools.twObject()): for status in timeline: timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" - text = unescapeText(status.text) + text = setuptools.unescapeText(status.text) - db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") + try: + db.executeQuery("INSERT INTO tweets(tweet_id,timestamp,text) VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") + except: + pass db.commit() last = status.id tw_counter = tw_counter + 1 - db.closeConnection() - return tw_counter, last, savepoint +def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): + mcount = 0 + savepoint = db.getLatestMessage() + 1 + new_messages = two.api.direct_messages(since_id=savepoint, count=200, full_text=True) + new_out_messages = two.api.sent_direct_messages(since_id=savepoint, count=200, full_text=True) + + for m in new_messages: + try: + db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at)) + mcount += 1 + except: + pass + + for m in new_out_messages: + try: + db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at)) + mcount += 1 + except: + pass + + db.commit() + + return mcount, savepoint or 0, db.getLatestMessage() + +def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): + current = list(db.getFollowers()) + new = list(twitools.getNamesByIDs(twitools.getFollowerIDs())) + gained = 0 + lost = 0 + + if (len(current) == 0 or len(new) == 0) and not firstrun: + print("Something went wrong.") + return 0,0 + + for follower in new: + if follower not in current: + db.executeQuery("INSERT INTO followers VALUES('%s', %i, 0)" % (follower, int(time.time()))) + print("New follower: %s" % follower) + gained += 1 + + for follower in current: + if follower not in new: + db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), follower)) + print("Lost follower: %s" % follower) + lost += 1 + + db.commit() + + return gained, lost + +def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): + current = list(db.getFollowing()) + new = list(twitools.getNamesByIDs(twitools.getFollowingIDs())) + gained = 0 + lost = 0 + + if (len(current) == 0 or len(new) == 0) and not firstrun: + print("Something went wrong.") + return 0,0 + + for following in new: + if following not in current: + db.executeQuery("INSERT INTO following VALUES('%s', %i, 0)" % (following, int(time.time()))) + print("You started following: %s" % following) + gained += 1 + + for following in current: + if following not in new: + db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), following)) + print("You no longer follow: %s" % following) + lost += 1 + + db.commit() + + return gained, lost + if __name__ == "__main__": - count, last, first = fill() - print("Stored %i tweets after %i until %i." % (count, first, last)) + parser = argparse.ArgumentParser() + parser.add_argument("-f", "--first", help="first run: ignore empty databases", action="store_true") + args = parser.parse_args() + db = dbtools.dbHelper() + count, last, first = getTweets(db) + print("Stored %i tweets." % count) + count, last, first = getMessages(db) + print("Stored %i messages." % count) + gained, lost = getFollowers(db, firstrun=args.first) + print("Gained %i followers, lost %i." % (gained, lost)) + gained, lost = getFollowing(db, firstrun=args.first) + print("Started following %i, stopped following %i." % (gained, lost)) diff --git a/followertxt.py b/followertxt.py deleted file mode 100755 index 6b15653..0000000 --- a/followertxt.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python3 - -import tools -import os, time, tweepy - -def getFollowerIDs(two=tools.twObject()): - ''' Returns 5,000 follower IDs at most ''' - return two.api.followers_ids(screen_name=tools.user()) - -def getNamesByIDs(fids=getFollowerIDs(), two=tools.twObject()): - for page in tools.paginate(fids, 100): - followers = two.api.lookup_users(user_ids=page) - for follower in followers: - yield follower.screen_name - -def getOutDir(dirname="followers"): - if not os.path.isdir(dirname): - os.mkdir(dirname) - -def getOutFile(dirname="followers"): - getOutDir(dirname) - return os.path.join(dirname, str(int(time.time())) + ".txt") - -def writeOutFile(outfile=getOutFile()): - with open(getOutFile(), 'a') as f: - for follower in getNamesByIDs(getFollowerIDs()): - f.write(follower + "\n") - -if __name__ == "__main__": - writeOutFile() diff --git a/getdates.py b/getdates.py index 2d58eac..18cd6a0 100755 --- a/getdates.py +++ b/getdates.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 -import tools - +import dbtools, setuptools import sys, datetime def dateArgs(argv = sys.argv[1:]): @@ -30,10 +29,10 @@ def dateArgs(argv = sys.argv[1:]): raise ValueError("Number of days for running average must be an integer.") mode = 0 elif mode == 1: - fr = tools.getDate(arg) + fr = setuptools.getDate(arg) mode = 0 else: - to = tools.getDate(arg) + to = setuptools.getDate(arg) mode = 0 if mode in (1, 2): @@ -92,9 +91,7 @@ def getHeaders(strings, av): return [headers] -def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.dbpath(), headers = False): - db = tools.dbHelper(path) - +def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbHelper(), headers = False): if fr == None: fr = db.getFLDate() if to == None: @@ -111,4 +108,4 @@ def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.dbp return cur if __name__ == "__main__": - tools.printCSV(getTweetsByDate(*dateArgs(), headers = True)) + setuptools.printCSV(getTweetsByDate(*dateArgs(), headers = True)) diff --git a/getmentions.py b/getmentions.py index 833f6ca..7806205 100755 --- a/getmentions.py +++ b/getmentions.py @@ -1,12 +1,9 @@ #!/usr/bin/env python3 -import tools +import dbtools +import argparse, operator, re, sys -import operator, re, sys - -def getTweets(mode = "@", path = tools.dbpath()): - db = tools.dbHelper(path) - +def getTweets(mode = "@", db = dbtools.dbHelper()): handles = dict() tweets = db.executeQuery("SELECT text FROM tweets") @@ -26,20 +23,18 @@ def getTweets(mode = "@", path = tools.dbpath()): return handles if __name__ == "__main__": - mode = "@" - path = tools.dbpath() + parser = argparse.ArgumentParser() + g = parser.add_mutually_exclusive_group() + g.add_argument("-t", "--hashtags", help="count only #hashtags", action="store_true") + g.add_argument("-w", "--words", help="count all words", action="store_true") + g.add_argument("-m", "--mentions", help="count only @mentions (default)", action="store_true") + args = parser.parse_args() + if args.hashtags: + mode = "#" + elif args.words: + mode = "" + else: + mode = "@" - if len(sys.argv) > 1: - if len(sys.argv) > 3 or (len(sys.argv) == 3 and "-h" not in sys.argv): - raise ValueError("Invalid arguments passed.") - - for arg in sys.argv[1:]: - if arg == "-h": - mode = "#" - if arg == "-w": - mode = "" - else: - path = arg - - for handle, tweets in sorted(list(getTweets(mode,path).items()), key=operator.itemgetter(1), reverse=True): + for handle, tweets in sorted(list(getTweets(mode=mode).items()), key=operator.itemgetter(1), reverse=True): print(handle + "," + str(tweets)) diff --git a/gettweets.py b/gettweets.py index 448794b..9463807 100755 --- a/gettweets.py +++ b/gettweets.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import tools +import dbtools, setuptools import sys, datetime @@ -20,10 +20,10 @@ def dateArgs(argv = sys.argv[1:]): strings += [arg] mode = 0 elif mode == 1: - fr = tools.getDate(arg) + fr = setuptools.getDate(arg) mode = 0 else: - to = tools.getDate(arg) + to = setuptools.getDate(arg) mode = 0 if mode in (1, 2): @@ -39,9 +39,7 @@ def queryBuilder(fr, to): return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to) -def getDataByDate(fr, to, path = tools.dbpath(), headers = True): - db = tools.dbHelper(path) - +def getDataByDate(fr, to, db = dbtools.dbHelper(), headers = True): if fr == None: fr = db.getFLDate() if to == None: @@ -56,4 +54,4 @@ def getDataByDate(fr, to, path = tools.dbpath(), headers = True): if __name__ == "__main__": - tools.printCSV(getDataByDate(*dateArgs(), headers = True)) + setuptools.printCSV(getDataByDate(*dateArgs(), headers = True)) diff --git a/makedb.py b/makedb.py index a1b8fb6..30a6520 100755 --- a/makedb.py +++ b/makedb.py @@ -1,12 +1,10 @@ #!/usr/bin/env python3 -import tools +import dbtools import sys -def makeDB(path=tools.dbpath()): - db = tools.dbHelper(path, create = True) - +def makeDB(db=dbtools.dbHelper()): db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") db.commit() @@ -16,6 +14,6 @@ if __name__ == "__main__": if len(sys.argv) > 2: raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.") try: - makeDB(sys.argv[1]) + makeDB(dbtools.dbObject(path=sys.argv[1])) except IndexError: makeDB() diff --git a/tools.py b/setuptools/__init__.py similarity index 57% rename from tools.py rename to setuptools/__init__.py index d39a1eb..c432018 100644 --- a/tools.py +++ b/setuptools/__init__.py @@ -1,20 +1,54 @@ -import configparser, csv, datetime, itertools, os, sqlite3, sys, tweepy +import configparser, csv, datetime, html.parser, itertools, os, sqlite3, sys, tweepy class SetupException(Exception): def __str__(self): - return "Seems like config.cfg has not been created yet. Run setup.py to do so." + return "Seems like config.cfg has not been created yet or contains serious errors. Run setup.py to create it." -def getSetting(section, setting): +def getSetting(section, setting, path = "config.cfg"): config = configparser.RawConfigParser() - config.read('config.cfg') + config.read(path) return config.get(section, setting) +def dbtype(): + try: + return int(getSetting("Database", "type")) + except: + return 0 # for SQLite3 + +### Must only be called AFTER dbtype()! ### + +def dbhost(): + try: + return getSetting("Database", "host") + except: + raise SetupException() + +def dbuser(): + try: + return getSetting("Database", "user") + except: + raise SetupException() + +def dbpass(): + try: + return getSetting("Database", "pass") + except: + raise SetupException() + +def dbname(): + try: + return getSetting("Database", "name") + except: + raise SetupException() + def dbpath(): try: return getSetting("Database", "path") except: - return "Database.db" + return SetupException() + +### def cke(): try: @@ -40,64 +74,6 @@ def ase(): except: raise SetupException() -def user(): - return twObject().whoami() - - -class dbObject: - - def __init__(self, path=dbpath()): - self.conn = sqlite3.connect(path) - self.cur = self.conn.cursor() - self.path = path - - def closeConnection(self): - return self.conn.close() - - def commit(self): - return self.conn.commit() - - def executeQuery(self, query): - return self.cur.execute(query) - - def getNext(self): - return self.cur.fetchone() - - def isInitialized(self): - try: - self.executeQuery("SELECT * FROM tweets") - return True - except: - return False - - def getFLDate(self, val = 0): - if val == 0: - mode = "MIN" - else: - mode = "MAX" - - return getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) - - -class twObject: - - def __init__(self, cke = cke(), cse = cse(), ato = ato(), ase = ase()): - self.auth = tweepy.OAuthHandler(cke, cse) - self.auth.set_access_token(ato, ase) - self.api = tweepy.API(self.auth) - - def delete(self, id): - self.api.destroy_status(id) - - def search(self, query, savepoint = 0): - tweets = list(tweepy.Cursor(self.api.search, q=query, since_id=savepoint).items()) - tweets.reverse() - return tweets - - def whoami(self): - return self.auth.get_username() - - def dbCheck(db, create = False): if (not create and dbInitialized(db)) or (create and not dbInitialized(db)): return True @@ -158,3 +134,6 @@ def printCSV(inlist): writer = csv.writer(sys.stdout) writer.writerows(inlist) +def unescapeText(text): + return html.parser.HTMLParser().unescape(text).replace("'","''") + diff --git a/twitools/__init__.py b/twitools/__init__.py new file mode 100644 index 0000000..ff18d7a --- /dev/null +++ b/twitools/__init__.py @@ -0,0 +1,32 @@ +import tweepy, setuptools + +class twObject: + + def __init__(self, cke = setuptools.cke(), cse = setuptools.cse(), ato = setuptools.ato(), ase = setuptools.ase()): + self.auth = tweepy.OAuthHandler(cke, cse) + self.auth.set_access_token(ato, ase) + self.api = tweepy.API(self.auth) + + def delete(self, id): + self.api.destroy_status(id) + + def search(self, query, savepoint = 0): + tweets = list(tweepy.Cursor(self.api.search, q=query, since_id=savepoint).items()) + tweets.reverse() + return tweets + + def whoami(self): + return self.auth.get_username() + +def getFollowerIDs(two=twObject()): + ''' Returns 5,000 follower IDs at most ''' + return two.api.followers_ids(screen_name=twObject().whoami()) + +def getFollowingIDs(two=twObject()): + return two.api.friends_ids(screen_name=twObject().whoami()) + +def getNamesByIDs(fids=getFollowerIDs(), two=twObject()): + for page in setuptools.paginate(fids, 100): + followers = two.api.lookup_users(user_ids=page) + for follower in followers: + yield follower.screen_name