2015-04-13 20:58:32 +00:00
|
|
|
#!/usr/bin/env python3
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-22 00:06:32 +00:00
|
|
|
import tools
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-22 00:06:32 +00:00
|
|
|
import html.parser, os
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-22 00:06:32 +00:00
|
|
|
def getSavepoint(db):
|
|
|
|
db.executeQuery("SELECT max(tweet_id) FROM tweets")
|
2015-04-15 22:53:15 +00:00
|
|
|
try:
|
2015-04-22 00:06:32 +00:00
|
|
|
return int(db.getNext()[0])
|
2015-04-15 22:53:15 +00:00
|
|
|
except:
|
2015-04-22 00:06:32 +00:00
|
|
|
print("No tweets stored yet.")
|
|
|
|
return 0
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-22 00:06:32 +00:00
|
|
|
def unescapeText(text):
|
|
|
|
return html.parser.HTMLParser().unescape(text).replace("'","''")
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-22 00:06:32 +00:00
|
|
|
def fill(dbpath=tools.config.dbpath, user=tools.config.user, two=tools.twObject()):
|
|
|
|
query = "from:" + user
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-22 00:06:32 +00:00
|
|
|
db = tools.dbHelper(dbpath)
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-22 00:06:32 +00:00
|
|
|
savepoint = getSavepoint(db)
|
|
|
|
last = savepoint
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-22 00:06:32 +00:00
|
|
|
timeline = two.search(query, savepoint)
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-15 22:53:15 +00:00
|
|
|
tw_counter = 0
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-15 22:53:15 +00:00
|
|
|
for status in timeline:
|
|
|
|
timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000"
|
2015-04-22 00:06:32 +00:00
|
|
|
text = unescapeText(status.text)
|
|
|
|
|
|
|
|
db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
|
|
|
|
db.commit()
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-15 22:53:15 +00:00
|
|
|
last = status.id
|
|
|
|
tw_counter = tw_counter + 1
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-22 00:06:32 +00:00
|
|
|
db.closeConnection()
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-15 22:53:15 +00:00
|
|
|
return tw_counter, last, savepoint
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-15 22:53:15 +00:00
|
|
|
if __name__ == "__main__":
|
|
|
|
count, last, first = fill()
|
|
|
|
print("Stored %i tweets after %i until %i." % (count, first, last))
|