284e64daee
Initial (non-functional) check-in of the CSV to sqlite3 converter
59 lines
1.4 KiB
Python
Executable file
59 lines
1.4 KiB
Python
Executable file
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import HTMLParser, sqlite3, time, tweepy, os
|
|
|
|
user = "Username"
|
|
cke = "Consumer Key"
|
|
cse = "Consumer Secret"
|
|
ato = "Access Token"
|
|
ase = "Access Secret"
|
|
|
|
search = "from:" + user
|
|
|
|
database_filename = "Database.db"
|
|
|
|
sql_conn = sqlite3.connect(database_filename)
|
|
cur = sql_conn.cursor()
|
|
|
|
try:
|
|
cur.execute("SELECT max(tweet_id) FROM tweets")
|
|
except sqlite3.OperationalError:
|
|
print "Please run ./makedb.py or ./csvdb.py before trying to populate the database."
|
|
|
|
try:
|
|
savepoint = int(cur.fetchone()[0])
|
|
except:
|
|
savepoint = 0
|
|
|
|
auth = tweepy.OAuthHandler(cke, cse)
|
|
auth.set_access_token(ato, ase)
|
|
api = tweepy.API(auth)
|
|
|
|
timelineIterator = tweepy.Cursor(api.search, q=search, since_id=savepoint).items()
|
|
|
|
timeline = []
|
|
|
|
for status in timelineIterator:
|
|
timeline.append(status)
|
|
|
|
timeline.reverse()
|
|
|
|
tw_counter = 0
|
|
|
|
for status in timeline:
|
|
print "(%(date)s) %(name)s: %(message)s\n" % \
|
|
{ "date" : status.created_at,
|
|
"name" : status.author.screen_name.encode('utf-8'),
|
|
"message" : status.text.encode('utf-8') }
|
|
|
|
timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000"
|
|
text = HTMLParser.HTMLParser().unescape(status.text).replace("'", "''")
|
|
|
|
cur.execute("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
|
|
tw_counter = tw_counter + 1
|
|
|
|
sql_conn.commit()
|
|
sql_conn.close()
|
|
|
|
print "Finished. %d Tweets stored" % (tw_counter)
|