Merge branch 'master' of git.klaus-uwe.me:kumitterer/twitools

This commit is contained in:
Klaus-Uwe Mitterer 2016-12-06 22:30:18 +01:00
commit c1bb677d40
11 changed files with 322 additions and 185 deletions

View file

@ -1,35 +1,25 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import tools import dbtools
import sqlite3, csv, sys import sqlite3, csv, sys
def makeDB(path=tools.dbpath()): def makeDB(dbo=dbtools.dbHelper(), infile='tweets.csv'):
try: try:
infile = open('tweets.csv') infile = open(infile)
except IOError: except IOError:
raise IOError("Please make sure that the tweets.csv from the Twitter download is located in this directory.") raise IOError("Unable to read %s." % infile)
input = list(csv.reader(infile)) infile = list(csv.reader(infile))
conn = sqlite3.connect(path) for row in infile[1:]:
cur = conn.cursor() try:
dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');")
try: except:
cur.execute("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") pass
except sqlite3.OperationalError:
raise IOError("%s already exists. Please delete it before trying to create a new one." % path)
for row in input[1:]: dbo.commit()
cur.execute("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');")
conn.commit()
if __name__ == "__main__": if __name__ == "__main__":
if len(sys.argv) > 2: makeDB()
raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.")
try:
makeDB(sys.argv[1])
except IndexError:
makeDB()

108
dbtools/__init__.py Normal file
View file

@ -0,0 +1,108 @@
import setuptools
import sqlite3, pymysql, pymysql.cursors
SQLITE = 0
MYSQL = 1
MARIADB = MYSQL
MIN = 0
MAX = 1
class dbObject:
# --------------------------------------------- Initialization -------------------------------------------------
def initMySQL(self, host, port, user, pwd, db):
self.conn = pymysql.connect(host = host, port = port, user = user, password = pwd, db = db, charset = "utf8mb4", cursorclass = pymysql.cursors.DictCursor)
self.cur = self.conn.cursor()
self.dbtype = MYSQL
self.host = host
self.port = port
self.user = user
self.pwd = pwd
self.db = db
def initSQLite(self, path):
self.conn = sqlite3.connect(path)
self.cur = self.conn.cursor()
self.dbtype = SQLITE
self.path = path
def __init__(self, dbtype = SQLITE, path = None, host = None, port = None, user = None, pwd = None, db = None):
if dbtype == SQLITE:
self.initSQLite(path or 'Database.db')
elif dbtype == MYSQL:
self.initMySQL(host or 'localhost', port or 3306, user, pwd, db)
else:
raise ValueError("Unknown database type %s." % str(dbtype))
# ---------------------------------------------- No more initialization ----------------------------------------
def closeConnection(self):
return self.conn.close()
def commit(self):
return self.conn.commit()
def executeQuery(self, query):
return self.cur.execute(query)
def getAll(self):
return self.cur.fetchall()
def getNext(self):
return self.cur.fetchone()
def isInitialized(self):
try:
self.executeQuery("SELECT * FROM tweets")
return True
except:
return False
def getFLDate(self, val = MIN):
if val == MIN:
mode = "MIN"
else:
mode = "MAX"
if self.dbtype == SQLITE:
return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0]))
else:
self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode)
return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode]))
def getFollowers(db):
db.executeQuery("SELECT id FROM followers WHERE `until` = 0;")
for i in db.getAll():
yield i[0]
def getFollowing(db):
db.executeQuery("SELECT id FROM following WHERE `until` = 0;")
for i in db.getAll():
yield i[0]
def getLatestMessage(db):
db.executeQuery("SELECT max(id) FROM messages")
try:
return int(db.getNext()[0])
except:
return 0
def getLatestTweet(db):
db.executeQuery("SELECT max(tweet_id) FROM tweets")
try:
return int(db.getNext()[0])
except:
return 0
def dbHelper():
if setuptools.dbtype() == SQLITE:
return dbObject(dbtype=SQLITE, path=setuptools.dbpath())
elif setuptools.dbtype() == MYSQL:
return dbObject(dbtype=MYSQL, host=setuptools.dbhost(), user=setuptools.dbuser(), pwd=setuptools.dbpass(), db=setuptools.dbname())
else:
raise setuptools.SetupException()

View file

@ -1,10 +1,9 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import tools import twitools
import tkinter, tkinter.messagebox, html.parser, os import tkinter, tkinter.messagebox, html.parser, os
two = tools.twObject() two = twitools.twObject()
top = tkinter.Tk() top = tkinter.Tk()
top.title("Tweet Deleter") top.title("Tweet Deleter")
scrollbar = tkinter.Scrollbar(top) scrollbar = tkinter.Scrollbar(top)
@ -41,7 +40,7 @@ def addStatus(id, text):
list.insert(0, element.encode("UTF-8")) list.insert(0, element.encode("UTF-8"))
def getTweets(): def getTweets():
query = "from:" + tools.user() query = "from:" + twitools.twObject().whoami()
try: try:
timeline = two.search(query, 0) timeline = two.search(query, 0)

121
filler.py
View file

@ -1,26 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import tools import argparse, dbtools, setuptools, time, twitools
import html.parser, os def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()):
def getSavepoint(db):
db.executeQuery("SELECT max(tweet_id) FROM tweets")
try:
return int(db.getNext()[0])
except:
print("No tweets stored yet.")
return 0
def unescapeText(text):
return html.parser.HTMLParser().unescape(text).replace("'","''")
def fill(dbpath=tools.dbpath(), user=tools.user(), two=tools.twObject()):
query = "from:" + user query = "from:" + user
savepoint = db.getLatestTweet() + 1
db = tools.dbHelper(dbpath)
savepoint = getSavepoint(db)
last = savepoint last = savepoint
timeline = two.search(query, savepoint) timeline = two.search(query, savepoint)
@ -29,18 +13,105 @@ def fill(dbpath=tools.dbpath(), user=tools.user(), two=tools.twObject()):
for status in timeline: for status in timeline:
timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000"
text = unescapeText(status.text) text = setuptools.unescapeText(status.text)
db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") try:
db.executeQuery("INSERT INTO tweets(tweet_id,timestamp,text) VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
except:
pass
db.commit() db.commit()
last = status.id last = status.id
tw_counter = tw_counter + 1 tw_counter = tw_counter + 1
db.closeConnection()
return tw_counter, last, savepoint return tw_counter, last, savepoint
def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()):
mcount = 0
savepoint = db.getLatestMessage() + 1
new_messages = two.api.direct_messages(since_id=savepoint, count=200, full_text=True)
new_out_messages = two.api.sent_direct_messages(since_id=savepoint, count=200, full_text=True)
for m in new_messages:
try:
db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at))
mcount += 1
except:
pass
for m in new_out_messages:
try:
db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at))
mcount += 1
except:
pass
db.commit()
return mcount, savepoint or 0, db.getLatestMessage()
def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False):
current = list(db.getFollowers())
new = list(twitools.getNamesByIDs(twitools.getFollowerIDs()))
gained = 0
lost = 0
if (len(current) == 0 or len(new) == 0) and not firstrun:
print("Something went wrong.")
return 0,0
for follower in new:
if follower not in current:
db.executeQuery("INSERT INTO followers VALUES('%s', %i, 0)" % (follower, int(time.time())))
print("New follower: %s" % follower)
gained += 1
for follower in current:
if follower not in new:
db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), follower))
print("Lost follower: %s" % follower)
lost += 1
db.commit()
return gained, lost
def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False):
current = list(db.getFollowing())
new = list(twitools.getNamesByIDs(twitools.getFollowingIDs()))
gained = 0
lost = 0
if (len(current) == 0 or len(new) == 0) and not firstrun:
print("Something went wrong.")
return 0,0
for following in new:
if following not in current:
db.executeQuery("INSERT INTO following VALUES('%s', %i, 0)" % (following, int(time.time())))
print("You started following: %s" % following)
gained += 1
for following in current:
if following not in new:
db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), following))
print("You no longer follow: %s" % following)
lost += 1
db.commit()
return gained, lost
if __name__ == "__main__": if __name__ == "__main__":
count, last, first = fill() parser = argparse.ArgumentParser()
print("Stored %i tweets after %i until %i." % (count, first, last)) parser.add_argument("-f", "--first", help="first run: ignore empty databases", action="store_true")
args = parser.parse_args()
db = dbtools.dbHelper()
count, last, first = getTweets(db)
print("Stored %i tweets." % count)
count, last, first = getMessages(db)
print("Stored %i messages." % count)
gained, lost = getFollowers(db, firstrun=args.first)
print("Gained %i followers, lost %i." % (gained, lost))
gained, lost = getFollowing(db, firstrun=args.first)
print("Started following %i, stopped following %i." % (gained, lost))

View file

@ -1,30 +0,0 @@
#!/usr/bin/env python3
import tools
import os, time, tweepy
def getFollowerIDs(two=tools.twObject()):
''' Returns 5,000 follower IDs at most '''
return two.api.followers_ids(screen_name=tools.user())
def getNamesByIDs(fids=getFollowerIDs(), two=tools.twObject()):
for page in tools.paginate(fids, 100):
followers = two.api.lookup_users(user_ids=page)
for follower in followers:
yield follower.screen_name
def getOutDir(dirname="followers"):
if not os.path.isdir(dirname):
os.mkdir(dirname)
def getOutFile(dirname="followers"):
getOutDir(dirname)
return os.path.join(dirname, str(int(time.time())) + ".txt")
def writeOutFile(outfile=getOutFile()):
with open(getOutFile(), 'a') as f:
for follower in getNamesByIDs(getFollowerIDs()):
f.write(follower + "\n")
if __name__ == "__main__":
writeOutFile()

View file

@ -1,7 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import tools import dbtools, setuptools
import sys, datetime import sys, datetime
def dateArgs(argv = sys.argv[1:]): def dateArgs(argv = sys.argv[1:]):
@ -30,10 +29,10 @@ def dateArgs(argv = sys.argv[1:]):
raise ValueError("Number of days for running average must be an integer.") raise ValueError("Number of days for running average must be an integer.")
mode = 0 mode = 0
elif mode == 1: elif mode == 1:
fr = tools.getDate(arg) fr = setuptools.getDate(arg)
mode = 0 mode = 0
else: else:
to = tools.getDate(arg) to = setuptools.getDate(arg)
mode = 0 mode = 0
if mode in (1, 2): if mode in (1, 2):
@ -92,9 +91,7 @@ def getHeaders(strings, av):
return [headers] return [headers]
def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.dbpath(), headers = False): def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbHelper(), headers = False):
db = tools.dbHelper(path)
if fr == None: if fr == None:
fr = db.getFLDate() fr = db.getFLDate()
if to == None: if to == None:
@ -111,4 +108,4 @@ def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.dbp
return cur return cur
if __name__ == "__main__": if __name__ == "__main__":
tools.printCSV(getTweetsByDate(*dateArgs(), headers = True)) setuptools.printCSV(getTweetsByDate(*dateArgs(), headers = True))

View file

@ -1,12 +1,9 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import tools import dbtools
import argparse, operator, re, sys
import operator, re, sys def getTweets(mode = "@", db = dbtools.dbHelper()):
def getTweets(mode = "@", path = tools.dbpath()):
db = tools.dbHelper(path)
handles = dict() handles = dict()
tweets = db.executeQuery("SELECT text FROM tweets") tweets = db.executeQuery("SELECT text FROM tweets")
@ -26,20 +23,18 @@ def getTweets(mode = "@", path = tools.dbpath()):
return handles return handles
if __name__ == "__main__": if __name__ == "__main__":
mode = "@" parser = argparse.ArgumentParser()
path = tools.dbpath() g = parser.add_mutually_exclusive_group()
g.add_argument("-t", "--hashtags", help="count only #hashtags", action="store_true")
g.add_argument("-w", "--words", help="count all words", action="store_true")
g.add_argument("-m", "--mentions", help="count only @mentions (default)", action="store_true")
args = parser.parse_args()
if args.hashtags:
mode = "#"
elif args.words:
mode = ""
else:
mode = "@"
if len(sys.argv) > 1: for handle, tweets in sorted(list(getTweets(mode=mode).items()), key=operator.itemgetter(1), reverse=True):
if len(sys.argv) > 3 or (len(sys.argv) == 3 and "-h" not in sys.argv):
raise ValueError("Invalid arguments passed.")
for arg in sys.argv[1:]:
if arg == "-h":
mode = "#"
if arg == "-w":
mode = ""
else:
path = arg
for handle, tweets in sorted(list(getTweets(mode,path).items()), key=operator.itemgetter(1), reverse=True):
print(handle + "," + str(tweets)) print(handle + "," + str(tweets))

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import tools import dbtools, setuptools
import sys, datetime import sys, datetime
@ -20,10 +20,10 @@ def dateArgs(argv = sys.argv[1:]):
strings += [arg] strings += [arg]
mode = 0 mode = 0
elif mode == 1: elif mode == 1:
fr = tools.getDate(arg) fr = setuptools.getDate(arg)
mode = 0 mode = 0
else: else:
to = tools.getDate(arg) to = setuptools.getDate(arg)
mode = 0 mode = 0
if mode in (1, 2): if mode in (1, 2):
@ -39,9 +39,7 @@ def queryBuilder(fr, to):
return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to) return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to)
def getDataByDate(fr, to, path = tools.dbpath(), headers = True): def getDataByDate(fr, to, db = dbtools.dbHelper(), headers = True):
db = tools.dbHelper(path)
if fr == None: if fr == None:
fr = db.getFLDate() fr = db.getFLDate()
if to == None: if to == None:
@ -56,4 +54,4 @@ def getDataByDate(fr, to, path = tools.dbpath(), headers = True):
if __name__ == "__main__": if __name__ == "__main__":
tools.printCSV(getDataByDate(*dateArgs(), headers = True)) setuptools.printCSV(getDataByDate(*dateArgs(), headers = True))

View file

@ -1,12 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import tools import dbtools
import sys import sys
def makeDB(path=tools.dbpath()): def makeDB(db=dbtools.dbHelper()):
db = tools.dbHelper(path, create = True)
db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));") db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));")
db.commit() db.commit()
@ -16,6 +14,6 @@ if __name__ == "__main__":
if len(sys.argv) > 2: if len(sys.argv) > 2:
raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.") raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.")
try: try:
makeDB(sys.argv[1]) makeDB(dbtools.dbObject(path=sys.argv[1]))
except IndexError: except IndexError:
makeDB() makeDB()

View file

@ -1,20 +1,54 @@
import configparser, csv, datetime, itertools, os, sqlite3, sys, tweepy import configparser, csv, datetime, html.parser, itertools, os, sqlite3, sys, tweepy
class SetupException(Exception): class SetupException(Exception):
def __str__(self): def __str__(self):
return "Seems like config.cfg has not been created yet. Run setup.py to do so." return "Seems like config.cfg has not been created yet or contains serious errors. Run setup.py to create it."
def getSetting(section, setting): def getSetting(section, setting, path = "config.cfg"):
config = configparser.RawConfigParser() config = configparser.RawConfigParser()
config.read('config.cfg') config.read(path)
return config.get(section, setting) return config.get(section, setting)
def dbtype():
try:
return int(getSetting("Database", "type"))
except:
return 0 # for SQLite3
### Must only be called AFTER dbtype()! ###
def dbhost():
try:
return getSetting("Database", "host")
except:
raise SetupException()
def dbuser():
try:
return getSetting("Database", "user")
except:
raise SetupException()
def dbpass():
try:
return getSetting("Database", "pass")
except:
raise SetupException()
def dbname():
try:
return getSetting("Database", "name")
except:
raise SetupException()
def dbpath(): def dbpath():
try: try:
return getSetting("Database", "path") return getSetting("Database", "path")
except: except:
return "Database.db" return SetupException()
###
def cke(): def cke():
try: try:
@ -40,64 +74,6 @@ def ase():
except: except:
raise SetupException() raise SetupException()
def user():
return twObject().whoami()
class dbObject:
def __init__(self, path=dbpath()):
self.conn = sqlite3.connect(path)
self.cur = self.conn.cursor()
self.path = path
def closeConnection(self):
return self.conn.close()
def commit(self):
return self.conn.commit()
def executeQuery(self, query):
return self.cur.execute(query)
def getNext(self):
return self.cur.fetchone()
def isInitialized(self):
try:
self.executeQuery("SELECT * FROM tweets")
return True
except:
return False
def getFLDate(self, val = 0):
if val == 0:
mode = "MIN"
else:
mode = "MAX"
return getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0]))
class twObject:
def __init__(self, cke = cke(), cse = cse(), ato = ato(), ase = ase()):
self.auth = tweepy.OAuthHandler(cke, cse)
self.auth.set_access_token(ato, ase)
self.api = tweepy.API(self.auth)
def delete(self, id):
self.api.destroy_status(id)
def search(self, query, savepoint = 0):
tweets = list(tweepy.Cursor(self.api.search, q=query, since_id=savepoint).items())
tweets.reverse()
return tweets
def whoami(self):
return self.auth.get_username()
def dbCheck(db, create = False): def dbCheck(db, create = False):
if (not create and dbInitialized(db)) or (create and not dbInitialized(db)): if (not create and dbInitialized(db)) or (create and not dbInitialized(db)):
return True return True
@ -158,3 +134,6 @@ def printCSV(inlist):
writer = csv.writer(sys.stdout) writer = csv.writer(sys.stdout)
writer.writerows(inlist) writer.writerows(inlist)
def unescapeText(text):
return html.parser.HTMLParser().unescape(text).replace("'","''")

32
twitools/__init__.py Normal file
View file

@ -0,0 +1,32 @@
import tweepy, setuptools
class twObject:
def __init__(self, cke = setuptools.cke(), cse = setuptools.cse(), ato = setuptools.ato(), ase = setuptools.ase()):
self.auth = tweepy.OAuthHandler(cke, cse)
self.auth.set_access_token(ato, ase)
self.api = tweepy.API(self.auth)
def delete(self, id):
self.api.destroy_status(id)
def search(self, query, savepoint = 0):
tweets = list(tweepy.Cursor(self.api.search, q=query, since_id=savepoint).items())
tweets.reverse()
return tweets
def whoami(self):
return self.auth.get_username()
def getFollowerIDs(two=twObject()):
''' Returns 5,000 follower IDs at most '''
return two.api.followers_ids(screen_name=twObject().whoami())
def getFollowingIDs(two=twObject()):
return two.api.friends_ids(screen_name=twObject().whoami())
def getNamesByIDs(fids=getFollowerIDs(), two=twObject()):
for page in setuptools.paginate(fids, 100):
followers = two.api.lookup_users(user_ids=page)
for follower in followers:
yield follower.screen_name