Futher preparation for MySQL, which is not working yet though...
Make filler handle direct messages Add table and functions for direct messages
This commit is contained in:
parent
aaa683aa73
commit
aad1531392
8 changed files with 67 additions and 28 deletions
17
csvdb.py
17
csvdb.py
|
@ -4,29 +4,22 @@ import dbtools
|
||||||
|
|
||||||
import sqlite3, csv, sys
|
import sqlite3, csv, sys
|
||||||
|
|
||||||
def makeDB(dbo=dbtools.dbObject(), infile='tweets.csv'):
|
def makeDB(dbo=dbtools.dbHelper(), infile='tweets.csv'):
|
||||||
try:
|
try:
|
||||||
infile = open(infile)
|
infile = open(infile)
|
||||||
except IOError:
|
except IOError:
|
||||||
raise IOError("Unable to read %s." % infile)
|
raise IOError("Unable to read %s." % infile)
|
||||||
|
|
||||||
input = list(csv.reader(infile))
|
infile = list(csv.reader(infile))
|
||||||
|
|
||||||
|
for row in infile[1:]:
|
||||||
try:
|
try:
|
||||||
dbo.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));")
|
|
||||||
except:
|
|
||||||
raise IOError("%s already exists. Please delete it before trying to create a new one." % path)
|
|
||||||
|
|
||||||
for row in input[1:]:
|
|
||||||
dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');")
|
dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
dbo.commit()
|
dbo.commit()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) > 2:
|
|
||||||
raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.")
|
|
||||||
try:
|
|
||||||
makeDB(sys.argv[1])
|
|
||||||
except IndexError:
|
|
||||||
makeDB()
|
makeDB()
|
||||||
|
|
||||||
|
|
|
@ -65,7 +65,18 @@ class dbObject:
|
||||||
mode = "MIN"
|
mode = "MIN"
|
||||||
else:
|
else:
|
||||||
mode = "MAX"
|
mode = "MAX"
|
||||||
|
if self.dbtype == SQLITE:
|
||||||
return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0]))
|
return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0]))
|
||||||
|
else:
|
||||||
|
self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode)
|
||||||
|
return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode]))
|
||||||
|
|
||||||
|
def getLatestMessage(db):
|
||||||
|
db.executeQuery("SELECT max(id) FROM messages")
|
||||||
|
try:
|
||||||
|
return int(db.getNext()[0])
|
||||||
|
except:
|
||||||
|
return 0
|
||||||
|
|
||||||
def getLatestTweet(db):
|
def getLatestTweet(db):
|
||||||
db.executeQuery("SELECT max(tweet_id) FROM tweets")
|
db.executeQuery("SELECT max(tweet_id) FROM tweets")
|
||||||
|
@ -74,3 +85,10 @@ class dbObject:
|
||||||
except:
|
except:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
def dbHelper():
|
||||||
|
if setuptools.dbtype() == SQLITE:
|
||||||
|
return dbObject(dbtype=SQLITE, path=setuptools.dbpath())
|
||||||
|
elif setuptools.dbtype() == MYSQL:
|
||||||
|
return dbObject(dbtype=MYSQL, host=setuptools.dbhost(), user=setuptools.dbuser(), pwd=setuptools.dbpass(), db=setuptools.dbname())
|
||||||
|
else:
|
||||||
|
raise setuptools.SetupException()
|
||||||
|
|
40
filler.py
40
filler.py
|
@ -2,9 +2,9 @@
|
||||||
|
|
||||||
import dbtools, setuptools, twitools
|
import dbtools, setuptools, twitools
|
||||||
|
|
||||||
def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools.twObject()):
|
def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()):
|
||||||
query = "from:" + user
|
query = "from:" + user
|
||||||
savepoint = db.getLatestTweet()
|
savepoint = db.getLatestTweet() + 1
|
||||||
last = savepoint
|
last = savepoint
|
||||||
|
|
||||||
timeline = two.search(query, savepoint)
|
timeline = two.search(query, savepoint)
|
||||||
|
@ -15,7 +15,10 @@ def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools.
|
||||||
timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000"
|
timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000"
|
||||||
text = setuptools.unescapeText(status.text)
|
text = setuptools.unescapeText(status.text)
|
||||||
|
|
||||||
db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
|
try:
|
||||||
|
db.executeQuery("INSERT INTO tweets(tweet_id,timestamp,text) VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
last = status.id
|
last = status.id
|
||||||
|
@ -25,7 +28,32 @@ def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools.
|
||||||
|
|
||||||
return tw_counter, last, savepoint
|
return tw_counter, last, savepoint
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()):
|
||||||
count, last, first = fill()
|
mcount = 0
|
||||||
print("Stored %i tweets after %i until %i." % (count, first, last))
|
savepoint = db.getLatestMessage() + 1
|
||||||
|
new_messages = two.api.direct_messages(since_id=savepoint, count=200, full_text=True)
|
||||||
|
new_out_messages = two.api.sent_direct_messages(since_id=savepoint, count=200, full_text=True)
|
||||||
|
|
||||||
|
for m in new_messages:
|
||||||
|
try:
|
||||||
|
db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at))
|
||||||
|
mcount += 1
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
for m in new_out_messages:
|
||||||
|
try:
|
||||||
|
db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at))
|
||||||
|
mcount += 1
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
return mcount, savepoint or 0, db.getLatestMessage()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
count, last, first = getTweets()
|
||||||
|
print("Stored %i tweets after %i until %i." % (count, first, last))
|
||||||
|
count, last, first = getMessages()
|
||||||
|
print("Stored %i messages after %i until %i." % (count, first, last))
|
||||||
|
|
|
@ -91,7 +91,7 @@ def getHeaders(strings, av):
|
||||||
return [headers]
|
return [headers]
|
||||||
|
|
||||||
|
|
||||||
def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbObject(), headers = False):
|
def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbHelper(), headers = False):
|
||||||
if fr == None:
|
if fr == None:
|
||||||
fr = db.getFLDate()
|
fr = db.getFLDate()
|
||||||
if to == None:
|
if to == None:
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
import dbtools
|
import dbtools
|
||||||
import argparse, operator, re, sys
|
import argparse, operator, re, sys
|
||||||
|
|
||||||
def getTweets(mode = "@", db = dbtools.dbObject()):
|
def getTweets(mode = "@", db = dbtools.dbHelper()):
|
||||||
handles = dict()
|
handles = dict()
|
||||||
tweets = db.executeQuery("SELECT text FROM tweets")
|
tweets = db.executeQuery("SELECT text FROM tweets")
|
||||||
|
|
||||||
|
@ -36,5 +36,5 @@ if __name__ == "__main__":
|
||||||
else:
|
else:
|
||||||
mode = "@"
|
mode = "@"
|
||||||
|
|
||||||
for handle, tweets in sorted(list(getTweets(mode,dbtools.dbObject()).items()), key=operator.itemgetter(1), reverse=True):
|
for handle, tweets in sorted(list(getTweets(mode=mode).items()), key=operator.itemgetter(1), reverse=True):
|
||||||
print(handle + "," + str(tweets))
|
print(handle + "," + str(tweets))
|
||||||
|
|
|
@ -39,7 +39,7 @@ def queryBuilder(fr, to):
|
||||||
return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to)
|
return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to)
|
||||||
|
|
||||||
|
|
||||||
def getDataByDate(fr, to, db = dbtools.dbObject(), headers = True):
|
def getDataByDate(fr, to, db = dbtools.dbHelper(), headers = True):
|
||||||
if fr == None:
|
if fr == None:
|
||||||
fr = db.getFLDate()
|
fr = db.getFLDate()
|
||||||
if to == None:
|
if to == None:
|
||||||
|
|
|
@ -4,7 +4,7 @@ import dbtools
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
def makeDB(db=dbtools.dbObject()):
|
def makeDB(db=dbtools.dbHelper()):
|
||||||
db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));")
|
db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));")
|
||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
|
@ -12,7 +12,7 @@ def getSetting(section, setting, path = "config.cfg"):
|
||||||
|
|
||||||
def dbtype():
|
def dbtype():
|
||||||
try:
|
try:
|
||||||
return getSetting("Database", "type")
|
return int(getSetting("Database", "type"))
|
||||||
except:
|
except:
|
||||||
return 0 # for SQLite3
|
return 0 # for SQLite3
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue