Merge branch 'master' of git.klaus-uwe.me:kumitterer/twitools
This commit is contained in:
commit
1372c05736
12 changed files with 366 additions and 202 deletions
30
csvdb.py
30
csvdb.py
|
@ -1,35 +1,25 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import tools
|
import dbtools
|
||||||
|
|
||||||
import sqlite3, csv, sys
|
import sqlite3, csv, sys
|
||||||
|
|
||||||
def makeDB(path=tools.dbpath()):
|
def makeDB(dbo=dbtools.dbHelper(), infile='tweets.csv'):
|
||||||
try:
|
try:
|
||||||
infile = open('tweets.csv')
|
infile = open(infile)
|
||||||
except IOError:
|
except IOError:
|
||||||
raise IOError("Please make sure that the tweets.csv from the Twitter download is located in this directory.")
|
raise IOError("Unable to read %s." % infile)
|
||||||
|
|
||||||
input = list(csv.reader(infile))
|
infile = list(csv.reader(infile))
|
||||||
|
|
||||||
conn = sqlite3.connect(path)
|
|
||||||
cur = conn.cursor()
|
|
||||||
|
|
||||||
|
for row in infile[1:]:
|
||||||
try:
|
try:
|
||||||
cur.execute("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));")
|
dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');")
|
||||||
except sqlite3.OperationalError:
|
except:
|
||||||
raise IOError("%s already exists. Please delete it before trying to create a new one." % path)
|
pass
|
||||||
|
|
||||||
for row in input[1:]:
|
dbo.commit()
|
||||||
cur.execute("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');")
|
|
||||||
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) > 2:
|
|
||||||
raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.")
|
|
||||||
try:
|
|
||||||
makeDB(sys.argv[1])
|
|
||||||
except IndexError:
|
|
||||||
makeDB()
|
makeDB()
|
||||||
|
|
||||||
|
|
108
dbtools/__init__.py
Normal file
108
dbtools/__init__.py
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
import setuptools
|
||||||
|
import sqlite3, pymysql, pymysql.cursors
|
||||||
|
|
||||||
|
SQLITE = 0
|
||||||
|
MYSQL = 1
|
||||||
|
MARIADB = MYSQL
|
||||||
|
|
||||||
|
MIN = 0
|
||||||
|
MAX = 1
|
||||||
|
|
||||||
|
class dbObject:
|
||||||
|
|
||||||
|
# --------------------------------------------- Initialization -------------------------------------------------
|
||||||
|
|
||||||
|
def initMySQL(self, host, port, user, pwd, db):
|
||||||
|
self.conn = pymysql.connect(host = host, port = port, user = user, password = pwd, db = db, charset = "utf8mb4", cursorclass = pymysql.cursors.DictCursor)
|
||||||
|
self.cur = self.conn.cursor()
|
||||||
|
self.dbtype = MYSQL
|
||||||
|
self.host = host
|
||||||
|
self.port = port
|
||||||
|
self.user = user
|
||||||
|
self.pwd = pwd
|
||||||
|
self.db = db
|
||||||
|
|
||||||
|
def initSQLite(self, path):
|
||||||
|
self.conn = sqlite3.connect(path)
|
||||||
|
self.cur = self.conn.cursor()
|
||||||
|
self.dbtype = SQLITE
|
||||||
|
self.path = path
|
||||||
|
|
||||||
|
def __init__(self, dbtype = SQLITE, path = None, host = None, port = None, user = None, pwd = None, db = None):
|
||||||
|
|
||||||
|
if dbtype == SQLITE:
|
||||||
|
self.initSQLite(path or 'Database.db')
|
||||||
|
|
||||||
|
elif dbtype == MYSQL:
|
||||||
|
self.initMySQL(host or 'localhost', port or 3306, user, pwd, db)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError("Unknown database type %s." % str(dbtype))
|
||||||
|
|
||||||
|
# ---------------------------------------------- No more initialization ----------------------------------------
|
||||||
|
|
||||||
|
def closeConnection(self):
|
||||||
|
return self.conn.close()
|
||||||
|
|
||||||
|
def commit(self):
|
||||||
|
return self.conn.commit()
|
||||||
|
|
||||||
|
def executeQuery(self, query):
|
||||||
|
return self.cur.execute(query)
|
||||||
|
|
||||||
|
def getAll(self):
|
||||||
|
return self.cur.fetchall()
|
||||||
|
|
||||||
|
def getNext(self):
|
||||||
|
return self.cur.fetchone()
|
||||||
|
|
||||||
|
def isInitialized(self):
|
||||||
|
try:
|
||||||
|
self.executeQuery("SELECT * FROM tweets")
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def getFLDate(self, val = MIN):
|
||||||
|
if val == MIN:
|
||||||
|
mode = "MIN"
|
||||||
|
else:
|
||||||
|
mode = "MAX"
|
||||||
|
if self.dbtype == SQLITE:
|
||||||
|
return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0]))
|
||||||
|
else:
|
||||||
|
self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode)
|
||||||
|
return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode]))
|
||||||
|
|
||||||
|
def getFollowers(db):
|
||||||
|
db.executeQuery("SELECT id FROM followers WHERE `until` = 0;")
|
||||||
|
for i in db.getAll():
|
||||||
|
yield i[0]
|
||||||
|
|
||||||
|
def getFollowing(db):
|
||||||
|
db.executeQuery("SELECT id FROM following WHERE `until` = 0;")
|
||||||
|
for i in db.getAll():
|
||||||
|
yield i[0]
|
||||||
|
|
||||||
|
|
||||||
|
def getLatestMessage(db):
|
||||||
|
db.executeQuery("SELECT max(id) FROM messages")
|
||||||
|
try:
|
||||||
|
return int(db.getNext()[0])
|
||||||
|
except:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def getLatestTweet(db):
|
||||||
|
db.executeQuery("SELECT max(tweet_id) FROM tweets")
|
||||||
|
try:
|
||||||
|
return int(db.getNext()[0])
|
||||||
|
except:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def dbHelper():
|
||||||
|
if setuptools.dbtype() == SQLITE:
|
||||||
|
return dbObject(dbtype=SQLITE, path=setuptools.dbpath())
|
||||||
|
elif setuptools.dbtype() == MYSQL:
|
||||||
|
return dbObject(dbtype=MYSQL, host=setuptools.dbhost(), user=setuptools.dbuser(), pwd=setuptools.dbpass(), db=setuptools.dbname())
|
||||||
|
else:
|
||||||
|
raise setuptools.SetupException()
|
|
@ -1,10 +1,9 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import tools
|
import twitools
|
||||||
|
|
||||||
import tkinter, tkinter.messagebox, html.parser, os
|
import tkinter, tkinter.messagebox, html.parser, os
|
||||||
|
|
||||||
two = tools.twObject()
|
two = twitools.twObject()
|
||||||
top = tkinter.Tk()
|
top = tkinter.Tk()
|
||||||
top.title("Tweet Deleter")
|
top.title("Tweet Deleter")
|
||||||
scrollbar = tkinter.Scrollbar(top)
|
scrollbar = tkinter.Scrollbar(top)
|
||||||
|
@ -41,7 +40,7 @@ def addStatus(id, text):
|
||||||
list.insert(0, element.encode("UTF-8"))
|
list.insert(0, element.encode("UTF-8"))
|
||||||
|
|
||||||
def getTweets():
|
def getTweets():
|
||||||
query = "from:" + tools.user()
|
query = "from:" + twitools.twObject().whoami()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
timeline = two.search(query, 0)
|
timeline = two.search(query, 0)
|
121
filler.py
121
filler.py
|
@ -1,26 +1,10 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import tools
|
import argparse, dbtools, setuptools, time, twitools
|
||||||
|
|
||||||
import html.parser, os
|
def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()):
|
||||||
|
|
||||||
def getSavepoint(db):
|
|
||||||
db.executeQuery("SELECT max(tweet_id) FROM tweets")
|
|
||||||
try:
|
|
||||||
return int(db.getNext()[0])
|
|
||||||
except:
|
|
||||||
print("No tweets stored yet.")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def unescapeText(text):
|
|
||||||
return html.parser.HTMLParser().unescape(text).replace("'","''")
|
|
||||||
|
|
||||||
def fill(dbpath=tools.dbpath(), user=tools.user(), two=tools.twObject()):
|
|
||||||
query = "from:" + user
|
query = "from:" + user
|
||||||
|
savepoint = db.getLatestTweet() + 1
|
||||||
db = tools.dbHelper(dbpath)
|
|
||||||
|
|
||||||
savepoint = getSavepoint(db)
|
|
||||||
last = savepoint
|
last = savepoint
|
||||||
|
|
||||||
timeline = two.search(query, savepoint)
|
timeline = two.search(query, savepoint)
|
||||||
|
@ -29,18 +13,105 @@ def fill(dbpath=tools.dbpath(), user=tools.user(), two=tools.twObject()):
|
||||||
|
|
||||||
for status in timeline:
|
for status in timeline:
|
||||||
timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000"
|
timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000"
|
||||||
text = unescapeText(status.text)
|
text = setuptools.unescapeText(status.text)
|
||||||
|
|
||||||
db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
|
try:
|
||||||
|
db.executeQuery("INSERT INTO tweets(tweet_id,timestamp,text) VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
last = status.id
|
last = status.id
|
||||||
tw_counter = tw_counter + 1
|
tw_counter = tw_counter + 1
|
||||||
|
|
||||||
db.closeConnection()
|
|
||||||
|
|
||||||
return tw_counter, last, savepoint
|
return tw_counter, last, savepoint
|
||||||
|
|
||||||
|
def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()):
|
||||||
|
mcount = 0
|
||||||
|
savepoint = db.getLatestMessage() + 1
|
||||||
|
new_messages = two.api.direct_messages(since_id=savepoint, count=200, full_text=True)
|
||||||
|
new_out_messages = two.api.sent_direct_messages(since_id=savepoint, count=200, full_text=True)
|
||||||
|
|
||||||
|
for m in new_messages:
|
||||||
|
try:
|
||||||
|
db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at))
|
||||||
|
mcount += 1
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
for m in new_out_messages:
|
||||||
|
try:
|
||||||
|
db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at))
|
||||||
|
mcount += 1
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
return mcount, savepoint or 0, db.getLatestMessage()
|
||||||
|
|
||||||
|
def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False):
|
||||||
|
current = list(db.getFollowers())
|
||||||
|
new = list(twitools.getNamesByIDs(twitools.getFollowerIDs()))
|
||||||
|
gained = 0
|
||||||
|
lost = 0
|
||||||
|
|
||||||
|
if (len(current) == 0 or len(new) == 0) and not firstrun:
|
||||||
|
print("Something went wrong.")
|
||||||
|
return 0,0
|
||||||
|
|
||||||
|
for follower in new:
|
||||||
|
if follower not in current:
|
||||||
|
db.executeQuery("INSERT INTO followers VALUES('%s', %i, 0)" % (follower, int(time.time())))
|
||||||
|
print("New follower: %s" % follower)
|
||||||
|
gained += 1
|
||||||
|
|
||||||
|
for follower in current:
|
||||||
|
if follower not in new:
|
||||||
|
db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), follower))
|
||||||
|
print("Lost follower: %s" % follower)
|
||||||
|
lost += 1
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
return gained, lost
|
||||||
|
|
||||||
|
def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False):
|
||||||
|
current = list(db.getFollowing())
|
||||||
|
new = list(twitools.getNamesByIDs(twitools.getFollowingIDs()))
|
||||||
|
gained = 0
|
||||||
|
lost = 0
|
||||||
|
|
||||||
|
if (len(current) == 0 or len(new) == 0) and not firstrun:
|
||||||
|
print("Something went wrong.")
|
||||||
|
return 0,0
|
||||||
|
|
||||||
|
for following in new:
|
||||||
|
if following not in current:
|
||||||
|
db.executeQuery("INSERT INTO following VALUES('%s', %i, 0)" % (following, int(time.time())))
|
||||||
|
print("You started following: %s" % following)
|
||||||
|
gained += 1
|
||||||
|
|
||||||
|
for following in current:
|
||||||
|
if following not in new:
|
||||||
|
db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), following))
|
||||||
|
print("You no longer follow: %s" % following)
|
||||||
|
lost += 1
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
return gained, lost
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
count, last, first = fill()
|
parser = argparse.ArgumentParser()
|
||||||
print("Stored %i tweets after %i until %i." % (count, first, last))
|
parser.add_argument("-f", "--first", help="first run: ignore empty databases", action="store_true")
|
||||||
|
args = parser.parse_args()
|
||||||
|
db = dbtools.dbHelper()
|
||||||
|
count, last, first = getTweets(db)
|
||||||
|
print("Stored %i tweets." % count)
|
||||||
|
count, last, first = getMessages(db)
|
||||||
|
print("Stored %i messages." % count)
|
||||||
|
gained, lost = getFollowers(db, firstrun=args.first)
|
||||||
|
print("Gained %i followers, lost %i." % (gained, lost))
|
||||||
|
gained, lost = getFollowing(db, firstrun=args.first)
|
||||||
|
print("Started following %i, stopped following %i." % (gained, lost))
|
||||||
|
|
|
@ -1,30 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
import tools
|
|
||||||
import os, time, tweepy
|
|
||||||
|
|
||||||
def getFollowerIDs(two=tools.twObject()):
|
|
||||||
''' Returns 5,000 follower IDs at most '''
|
|
||||||
return two.api.followers_ids(screen_name=tools.user())
|
|
||||||
|
|
||||||
def getNamesByIDs(fids=getFollowerIDs(), two=tools.twObject()):
|
|
||||||
for page in tools.paginate(fids, 100):
|
|
||||||
followers = two.api.lookup_users(user_ids=page)
|
|
||||||
for follower in followers:
|
|
||||||
yield follower.screen_name
|
|
||||||
|
|
||||||
def getOutDir(dirname="followers"):
|
|
||||||
if not os.path.isdir(dirname):
|
|
||||||
os.mkdir(dirname)
|
|
||||||
|
|
||||||
def getOutFile(dirname="followers"):
|
|
||||||
getOutDir(dirname)
|
|
||||||
return os.path.join(dirname, str(int(time.time())) + ".txt")
|
|
||||||
|
|
||||||
def writeOutFile(outfile=getOutFile()):
|
|
||||||
with open(getOutFile(), 'a') as f:
|
|
||||||
for follower in getNamesByIDs(getFollowerIDs()):
|
|
||||||
f.write(follower + "\n")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
writeOutFile()
|
|
13
getdates.py
13
getdates.py
|
@ -1,7 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import tools
|
import dbtools, setuptools
|
||||||
|
|
||||||
import sys, datetime
|
import sys, datetime
|
||||||
|
|
||||||
def dateArgs(argv = sys.argv[1:]):
|
def dateArgs(argv = sys.argv[1:]):
|
||||||
|
@ -30,10 +29,10 @@ def dateArgs(argv = sys.argv[1:]):
|
||||||
raise ValueError("Number of days for running average must be an integer.")
|
raise ValueError("Number of days for running average must be an integer.")
|
||||||
mode = 0
|
mode = 0
|
||||||
elif mode == 1:
|
elif mode == 1:
|
||||||
fr = tools.getDate(arg)
|
fr = setuptools.getDate(arg)
|
||||||
mode = 0
|
mode = 0
|
||||||
else:
|
else:
|
||||||
to = tools.getDate(arg)
|
to = setuptools.getDate(arg)
|
||||||
mode = 0
|
mode = 0
|
||||||
|
|
||||||
if mode in (1, 2):
|
if mode in (1, 2):
|
||||||
|
@ -92,9 +91,7 @@ def getHeaders(strings, av):
|
||||||
return [headers]
|
return [headers]
|
||||||
|
|
||||||
|
|
||||||
def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.dbpath(), headers = False):
|
def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbHelper(), headers = False):
|
||||||
db = tools.dbHelper(path)
|
|
||||||
|
|
||||||
if fr == None:
|
if fr == None:
|
||||||
fr = db.getFLDate()
|
fr = db.getFLDate()
|
||||||
if to == None:
|
if to == None:
|
||||||
|
@ -111,4 +108,4 @@ def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.dbp
|
||||||
return cur
|
return cur
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
tools.printCSV(getTweetsByDate(*dateArgs(), headers = True))
|
setuptools.printCSV(getTweetsByDate(*dateArgs(), headers = True))
|
||||||
|
|
|
@ -1,12 +1,9 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import tools
|
import dbtools
|
||||||
|
import argparse, operator, re, sys
|
||||||
import operator, re, sys
|
|
||||||
|
|
||||||
def getTweets(mode = "@", path = tools.dbpath()):
|
|
||||||
db = tools.dbHelper(path)
|
|
||||||
|
|
||||||
|
def getTweets(mode = "@", db = dbtools.dbHelper()):
|
||||||
handles = dict()
|
handles = dict()
|
||||||
tweets = db.executeQuery("SELECT text FROM tweets")
|
tweets = db.executeQuery("SELECT text FROM tweets")
|
||||||
|
|
||||||
|
@ -26,20 +23,18 @@ def getTweets(mode = "@", path = tools.dbpath()):
|
||||||
return handles
|
return handles
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
mode = "@"
|
parser = argparse.ArgumentParser()
|
||||||
path = tools.dbpath()
|
g = parser.add_mutually_exclusive_group()
|
||||||
|
g.add_argument("-t", "--hashtags", help="count only #hashtags", action="store_true")
|
||||||
if len(sys.argv) > 1:
|
g.add_argument("-w", "--words", help="count all words", action="store_true")
|
||||||
if len(sys.argv) > 3 or (len(sys.argv) == 3 and "-h" not in sys.argv):
|
g.add_argument("-m", "--mentions", help="count only @mentions (default)", action="store_true")
|
||||||
raise ValueError("Invalid arguments passed.")
|
args = parser.parse_args()
|
||||||
|
if args.hashtags:
|
||||||
for arg in sys.argv[1:]:
|
|
||||||
if arg == "-h":
|
|
||||||
mode = "#"
|
mode = "#"
|
||||||
if arg == "-w":
|
elif args.words:
|
||||||
mode = ""
|
mode = ""
|
||||||
else:
|
else:
|
||||||
path = arg
|
mode = "@"
|
||||||
|
|
||||||
for handle, tweets in sorted(list(getTweets(mode,path).items()), key=operator.itemgetter(1), reverse=True):
|
for handle, tweets in sorted(list(getTweets(mode=mode).items()), key=operator.itemgetter(1), reverse=True):
|
||||||
print(handle + "," + str(tweets))
|
print(handle + "," + str(tweets))
|
||||||
|
|
12
gettweets.py
12
gettweets.py
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import tools
|
import dbtools, setuptools
|
||||||
|
|
||||||
import sys, datetime
|
import sys, datetime
|
||||||
|
|
||||||
|
@ -20,10 +20,10 @@ def dateArgs(argv = sys.argv[1:]):
|
||||||
strings += [arg]
|
strings += [arg]
|
||||||
mode = 0
|
mode = 0
|
||||||
elif mode == 1:
|
elif mode == 1:
|
||||||
fr = tools.getDate(arg)
|
fr = setuptools.getDate(arg)
|
||||||
mode = 0
|
mode = 0
|
||||||
else:
|
else:
|
||||||
to = tools.getDate(arg)
|
to = setuptools.getDate(arg)
|
||||||
mode = 0
|
mode = 0
|
||||||
|
|
||||||
if mode in (1, 2):
|
if mode in (1, 2):
|
||||||
|
@ -39,9 +39,7 @@ def queryBuilder(fr, to):
|
||||||
return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to)
|
return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to)
|
||||||
|
|
||||||
|
|
||||||
def getDataByDate(fr, to, path = tools.dbpath(), headers = True):
|
def getDataByDate(fr, to, db = dbtools.dbHelper(), headers = True):
|
||||||
db = tools.dbHelper(path)
|
|
||||||
|
|
||||||
if fr == None:
|
if fr == None:
|
||||||
fr = db.getFLDate()
|
fr = db.getFLDate()
|
||||||
if to == None:
|
if to == None:
|
||||||
|
@ -56,4 +54,4 @@ def getDataByDate(fr, to, path = tools.dbpath(), headers = True):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
tools.printCSV(getDataByDate(*dateArgs(), headers = True))
|
setuptools.printCSV(getDataByDate(*dateArgs(), headers = True))
|
||||||
|
|
|
@ -1,12 +1,10 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import tools
|
import dbtools
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
def makeDB(path=tools.dbpath()):
|
def makeDB(db=dbtools.dbHelper()):
|
||||||
db = tools.dbHelper(path, create = True)
|
|
||||||
|
|
||||||
db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));")
|
db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));")
|
||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
|
@ -16,6 +14,6 @@ if __name__ == "__main__":
|
||||||
if len(sys.argv) > 2:
|
if len(sys.argv) > 2:
|
||||||
raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.")
|
raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.")
|
||||||
try:
|
try:
|
||||||
makeDB(sys.argv[1])
|
makeDB(dbtools.dbObject(path=sys.argv[1]))
|
||||||
except IndexError:
|
except IndexError:
|
||||||
makeDB()
|
makeDB()
|
||||||
|
|
55
setup.py
55
setup.py
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import configparser, os.path, sqlite3, tweepy
|
import configparser, os.path, tweepy, dbtools, getpass
|
||||||
|
|
||||||
if os.path.isfile("config.cfg"):
|
if os.path.isfile("config.cfg"):
|
||||||
print("config.cfg already exists. Please remove it before running this script.")
|
print("config.cfg already exists. Please remove it before running this script.")
|
||||||
|
@ -10,27 +10,54 @@ config = configparser.RawConfigParser()
|
||||||
|
|
||||||
config.add_section('Database')
|
config.add_section('Database')
|
||||||
|
|
||||||
print('''Twitools will use a database for certain tasks. If this file does not exist yet,
|
print('''Twitools will use a database for certain tasks. You can use a file or a MySQL database for this purpose.
|
||||||
it will be created in this process. The file name defaults to 'Database.db'.
|
|
||||||
|
If you wish to use a MySQL database, you will need the credentials for it. If you don't know what any of that means, stick with the default value and just press Enter.
|
||||||
''')
|
''')
|
||||||
|
|
||||||
dbpath = input("Name of the database file [Database.db]: ")
|
dbtype = input("Database type: %i (file), %i (MySQL) [%i]: " % (dbtools.SQLITE, dbtools.MYSQL, dbtools.SQLITE))
|
||||||
print()
|
print()
|
||||||
|
|
||||||
if dbpath == "":
|
try:
|
||||||
dbpath = "Database.db"
|
dbtype = int(dbtype)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if dbtype == dbtools.MYSQL:
|
||||||
|
dbhost = input("MySQL host [localhost]: ") or "localhost"
|
||||||
|
dbuser = input("MySQL username [twitools]: ") or "twitools"
|
||||||
|
dbpass = getpass.getpass("MySQL password (not echoed!): ")
|
||||||
|
dbname = input("MySQL database name [twitools]: ") or "twitools"
|
||||||
|
print()
|
||||||
|
|
||||||
|
config.set('Database', 'type', dbtype)
|
||||||
|
config.set('Database', 'host', dbhost)
|
||||||
|
config.set('Database', 'user', dbuser)
|
||||||
|
config.set('Database', 'pass', dbpass)
|
||||||
|
config.set('Database', 'name', dbname)
|
||||||
|
|
||||||
|
else:
|
||||||
|
dbtype = dbtools.SQLITE
|
||||||
|
dbpath = input("Name of the database file [Database.db]: ") or "Database.db"
|
||||||
|
print()
|
||||||
|
|
||||||
|
config.set('Database', 'type', dbtype)
|
||||||
config.set('Database', 'path', dbpath)
|
config.set('Database', 'path', dbpath)
|
||||||
|
|
||||||
if os.path.isfile(dbpath):
|
if dbtype == dbtools.MYSQL:
|
||||||
pass
|
db = dbtools.dbObject(dbtype=dbtype, host=dbhost, user=dbuser, pwd=dbpass, db=dbname)
|
||||||
else:
|
else:
|
||||||
conn = sqlite3.connect(dbpath)
|
db = dbtools.dbObject(dbtype=dbtype, path=dbpath)
|
||||||
cur = conn.cursor()
|
if not db.isInitialized():
|
||||||
cur.execute("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));")
|
db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));")
|
||||||
cur.execute("CREATE TABLE IF NOT EXISTS retweets(id INT PRIMARY KEY, author VARCHAR(30), created_at VARCHAR(30), text TEXT);")
|
db.executeQuery("CREATE TABLE messages(`id` INTEGER NOT NULL, `text` TEXT, `sender_id` INTEGER, `recipient_id` INTEGER, `created_at` TEXT, PRIMARY KEY(id));")
|
||||||
conn.commit()
|
db.executeQuery("CREATE TABLE followers(`id` TEXT NOT NULL, `since` INTEGER NOT NULL, `until` INTEGER, PRIMARY KEY(id, until));")
|
||||||
conn.close()
|
db.executeQuery("CREATE TABLE following(`id` TEXT NOT NULL, `since` INTEGER NOT NULL, `until` INTEGER, PRIMARY KEY(id, until));")
|
||||||
|
db.executeQuery("CREATE TABLE IF NOT EXISTS retweets(id INT PRIMARY KEY, author VARCHAR(30), created_at VARCHAR(30), text TEXT);")
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
db.closeConnection()
|
||||||
|
|
||||||
config.add_section("Twitter")
|
config.add_section("Twitter")
|
||||||
|
|
||||||
|
|
|
@ -1,20 +1,54 @@
|
||||||
import configparser, csv, datetime, itertools, os, sqlite3, sys, tweepy
|
import configparser, csv, datetime, html.parser, itertools, os, sqlite3, sys, tweepy
|
||||||
|
|
||||||
class SetupException(Exception):
|
class SetupException(Exception):
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Seems like config.cfg has not been created yet. Run setup.py to do so."
|
return "Seems like config.cfg has not been created yet or contains serious errors. Run setup.py to create it."
|
||||||
|
|
||||||
|
|
||||||
def getSetting(section, setting):
|
def getSetting(section, setting, path = "config.cfg"):
|
||||||
config = configparser.RawConfigParser()
|
config = configparser.RawConfigParser()
|
||||||
config.read('config.cfg')
|
config.read(path)
|
||||||
return config.get(section, setting)
|
return config.get(section, setting)
|
||||||
|
|
||||||
|
def dbtype():
|
||||||
|
try:
|
||||||
|
return int(getSetting("Database", "type"))
|
||||||
|
except:
|
||||||
|
return 0 # for SQLite3
|
||||||
|
|
||||||
|
### Must only be called AFTER dbtype()! ###
|
||||||
|
|
||||||
|
def dbhost():
|
||||||
|
try:
|
||||||
|
return getSetting("Database", "host")
|
||||||
|
except:
|
||||||
|
raise SetupException()
|
||||||
|
|
||||||
|
def dbuser():
|
||||||
|
try:
|
||||||
|
return getSetting("Database", "user")
|
||||||
|
except:
|
||||||
|
raise SetupException()
|
||||||
|
|
||||||
|
def dbpass():
|
||||||
|
try:
|
||||||
|
return getSetting("Database", "pass")
|
||||||
|
except:
|
||||||
|
raise SetupException()
|
||||||
|
|
||||||
|
def dbname():
|
||||||
|
try:
|
||||||
|
return getSetting("Database", "name")
|
||||||
|
except:
|
||||||
|
raise SetupException()
|
||||||
|
|
||||||
def dbpath():
|
def dbpath():
|
||||||
try:
|
try:
|
||||||
return getSetting("Database", "path")
|
return getSetting("Database", "path")
|
||||||
except:
|
except:
|
||||||
return "Database.db"
|
return SetupException()
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
def cke():
|
def cke():
|
||||||
try:
|
try:
|
||||||
|
@ -40,64 +74,6 @@ def ase():
|
||||||
except:
|
except:
|
||||||
raise SetupException()
|
raise SetupException()
|
||||||
|
|
||||||
def user():
|
|
||||||
return twObject().whoami()
|
|
||||||
|
|
||||||
|
|
||||||
class dbObject:
|
|
||||||
|
|
||||||
def __init__(self, path=dbpath()):
|
|
||||||
self.conn = sqlite3.connect(path)
|
|
||||||
self.cur = self.conn.cursor()
|
|
||||||
self.path = path
|
|
||||||
|
|
||||||
def closeConnection(self):
|
|
||||||
return self.conn.close()
|
|
||||||
|
|
||||||
def commit(self):
|
|
||||||
return self.conn.commit()
|
|
||||||
|
|
||||||
def executeQuery(self, query):
|
|
||||||
return self.cur.execute(query)
|
|
||||||
|
|
||||||
def getNext(self):
|
|
||||||
return self.cur.fetchone()
|
|
||||||
|
|
||||||
def isInitialized(self):
|
|
||||||
try:
|
|
||||||
self.executeQuery("SELECT * FROM tweets")
|
|
||||||
return True
|
|
||||||
except:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def getFLDate(self, val = 0):
|
|
||||||
if val == 0:
|
|
||||||
mode = "MIN"
|
|
||||||
else:
|
|
||||||
mode = "MAX"
|
|
||||||
|
|
||||||
return getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0]))
|
|
||||||
|
|
||||||
|
|
||||||
class twObject:
|
|
||||||
|
|
||||||
def __init__(self, cke = cke(), cse = cse(), ato = ato(), ase = ase()):
|
|
||||||
self.auth = tweepy.OAuthHandler(cke, cse)
|
|
||||||
self.auth.set_access_token(ato, ase)
|
|
||||||
self.api = tweepy.API(self.auth)
|
|
||||||
|
|
||||||
def delete(self, id):
|
|
||||||
self.api.destroy_status(id)
|
|
||||||
|
|
||||||
def search(self, query, savepoint = 0):
|
|
||||||
tweets = list(tweepy.Cursor(self.api.search, q=query, since_id=savepoint).items())
|
|
||||||
tweets.reverse()
|
|
||||||
return tweets
|
|
||||||
|
|
||||||
def whoami(self):
|
|
||||||
return self.auth.get_username()
|
|
||||||
|
|
||||||
|
|
||||||
def dbCheck(db, create = False):
|
def dbCheck(db, create = False):
|
||||||
if (not create and dbInitialized(db)) or (create and not dbInitialized(db)):
|
if (not create and dbInitialized(db)) or (create and not dbInitialized(db)):
|
||||||
return True
|
return True
|
||||||
|
@ -158,3 +134,6 @@ def printCSV(inlist):
|
||||||
writer = csv.writer(sys.stdout)
|
writer = csv.writer(sys.stdout)
|
||||||
writer.writerows(inlist)
|
writer.writerows(inlist)
|
||||||
|
|
||||||
|
def unescapeText(text):
|
||||||
|
return html.parser.HTMLParser().unescape(text).replace("'","''")
|
||||||
|
|
32
twitools/__init__.py
Normal file
32
twitools/__init__.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
import tweepy, setuptools
|
||||||
|
|
||||||
|
class twObject:
|
||||||
|
|
||||||
|
def __init__(self, cke = setuptools.cke(), cse = setuptools.cse(), ato = setuptools.ato(), ase = setuptools.ase()):
|
||||||
|
self.auth = tweepy.OAuthHandler(cke, cse)
|
||||||
|
self.auth.set_access_token(ato, ase)
|
||||||
|
self.api = tweepy.API(self.auth)
|
||||||
|
|
||||||
|
def delete(self, id):
|
||||||
|
self.api.destroy_status(id)
|
||||||
|
|
||||||
|
def search(self, query, savepoint = 0):
|
||||||
|
tweets = list(tweepy.Cursor(self.api.search, q=query, since_id=savepoint).items())
|
||||||
|
tweets.reverse()
|
||||||
|
return tweets
|
||||||
|
|
||||||
|
def whoami(self):
|
||||||
|
return self.auth.get_username()
|
||||||
|
|
||||||
|
def getFollowerIDs(two=twObject()):
|
||||||
|
''' Returns 5,000 follower IDs at most '''
|
||||||
|
return two.api.followers_ids(screen_name=twObject().whoami())
|
||||||
|
|
||||||
|
def getFollowingIDs(two=twObject()):
|
||||||
|
return two.api.friends_ids(screen_name=twObject().whoami())
|
||||||
|
|
||||||
|
def getNamesByIDs(fids=getFollowerIDs(), two=twObject()):
|
||||||
|
for page in setuptools.paginate(fids, 100):
|
||||||
|
followers = two.api.lookup_users(user_ids=page)
|
||||||
|
for follower in followers:
|
||||||
|
yield follower.screen_name
|
Loading…
Reference in a new issue