Make filler use tools

This commit is contained in:
Klaus-Uwe Mitterer 2015-04-22 02:06:32 +02:00
parent 5325d064da
commit 4fd256cde3
2 changed files with 41 additions and 37 deletions

View file

@ -1,51 +1,44 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import config, html.parser, sqlite3, time, tweepy, os import tools
def fill(user=config.user,dbpath=config.dbpath): import html.parser, os
search = "from:" + user
conn = sqlite3.connect(dbpath)
cur = conn.cursor()
def getSavepoint(db):
db.executeQuery("SELECT max(tweet_id) FROM tweets")
try: try:
cur.execute("SELECT max(tweet_id) FROM tweets") return int(db.getNext()[0])
except: except:
raise IOError("Database file " + dbpath + " does not exist or is in the wrong format. Please create it before trying to populate it.") print("No tweets stored yet.")
return 0
try: def unescapeText(text):
savepoint = int(cur.fetchone()[0]) return html.parser.HTMLParser().unescape(text).replace("'","''")
except:
savepoint = 0
def fill(dbpath=tools.config.dbpath, user=tools.config.user, two=tools.twObject()):
query = "from:" + user
db = tools.dbHelper(dbpath)
savepoint = getSavepoint(db)
last = savepoint last = savepoint
auth = tweepy.OAuthHandler(config.cke, config.cse) timeline = two.search(query, savepoint)
auth.set_access_token(config.ato, config.ase)
api = tweepy.API(auth)
timelineIterator = list(tweepy.Cursor(api.search, q=search, since_id=savepoint).items())
timeline = []
for status in timelineIterator:
timeline.append(status)
timeline.reverse()
tw_counter = 0 tw_counter = 0
for status in timeline: for status in timeline:
timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000"
text = html.parser.HTMLParser().unescape(status.text).replace("'", "''") text = unescapeText(status.text)
db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
db.commit()
cur.execute("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
last = status.id last = status.id
tw_counter = tw_counter + 1 tw_counter = tw_counter + 1
conn.commit() db.closeConnection()
conn.close()
return tw_counter, last, savepoint return tw_counter, last, savepoint

View file

@ -1,12 +1,13 @@
import config import config
import os, sqlite3 import os, sqlite3, tweepy
class dbObject: class dbObject:
def __init__(self, path=config.dbpath): def __init__(self, path=config.dbpath):
self.conn = sqlite3.connect(path) self.conn = sqlite3.connect(path)
self.cur = self.conn.cursor() self.cur = self.conn.cursor()
self.path = path
def closeConnection(self): def closeConnection(self):
return self.conn.close() return self.conn.close()
@ -17,11 +18,8 @@ class dbObject:
def executeQuery(self, query): def executeQuery(self, query):
return self.cur.execute(query) return self.cur.execute(query)
def GetConnection(self): def getNext(self):
return self.conn return self.cur.fetchone()
def GetCursor(self):
return self.cur
def isInitialized(self): def isInitialized(self):
try: try:
@ -31,12 +29,25 @@ class dbObject:
return False return False
class twObject:
def __init__(self, cke = config.cke, cse = config.cse, ato = config.ato, ase = config.ase):
self.auth = tweepy.OAuthHandler(config.cke, config.cse)
self.auth.set_access_token(config.ato, config.ase)
self.api = tweepy.API(self.auth)
def search(self, query, savepoint = 0):
tweets = list(tweepy.Cursor(self.api.search, q=query, since_id=savepoint).items())
tweets.reverse()
return tweets
def dbCheck(db, create = False): def dbCheck(db, create = False):
if (!create and dbInitialized(db)) or (create and !dbInitialized(db)): if (not create and dbInitialized(db)) or (create and not dbInitialized(db)):
return True return True
if create: if create:
raise ValueError("Provided database file " + path + " is already initialized. Remove it manually before trying to recreate it.") raise ValueError("Provided database file " + db.path + " is already initialized. Remove it manually before trying to recreate it.")
raise ValueError("Provided database file " + path + " is not initialized. Create it using makedb.py or csvdb.py") raise ValueError("Provided database file " + db.path + " is not initialized. Create it using makedb.py or csvdb.py")
def dbHelper(path, create = False): def dbHelper(path, create = False):