Make filler use tools
This commit is contained in:
parent
5325d064da
commit
4fd256cde3
2 changed files with 41 additions and 37 deletions
49
filler.py
49
filler.py
|
@ -1,51 +1,44 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import config, html.parser, sqlite3, time, tweepy, os
|
import tools
|
||||||
|
|
||||||
def fill(user=config.user,dbpath=config.dbpath):
|
import html.parser, os
|
||||||
search = "from:" + user
|
|
||||||
|
|
||||||
conn = sqlite3.connect(dbpath)
|
|
||||||
cur = conn.cursor()
|
|
||||||
|
|
||||||
|
def getSavepoint(db):
|
||||||
|
db.executeQuery("SELECT max(tweet_id) FROM tweets")
|
||||||
try:
|
try:
|
||||||
cur.execute("SELECT max(tweet_id) FROM tweets")
|
return int(db.getNext()[0])
|
||||||
except:
|
except:
|
||||||
raise IOError("Database file " + dbpath + " does not exist or is in the wrong format. Please create it before trying to populate it.")
|
print("No tweets stored yet.")
|
||||||
|
return 0
|
||||||
|
|
||||||
try:
|
def unescapeText(text):
|
||||||
savepoint = int(cur.fetchone()[0])
|
return html.parser.HTMLParser().unescape(text).replace("'","''")
|
||||||
except:
|
|
||||||
savepoint = 0
|
|
||||||
|
|
||||||
|
def fill(dbpath=tools.config.dbpath, user=tools.config.user, two=tools.twObject()):
|
||||||
|
query = "from:" + user
|
||||||
|
|
||||||
|
db = tools.dbHelper(dbpath)
|
||||||
|
|
||||||
|
savepoint = getSavepoint(db)
|
||||||
last = savepoint
|
last = savepoint
|
||||||
|
|
||||||
auth = tweepy.OAuthHandler(config.cke, config.cse)
|
timeline = two.search(query, savepoint)
|
||||||
auth.set_access_token(config.ato, config.ase)
|
|
||||||
api = tweepy.API(auth)
|
|
||||||
|
|
||||||
timelineIterator = list(tweepy.Cursor(api.search, q=search, since_id=savepoint).items())
|
|
||||||
|
|
||||||
timeline = []
|
|
||||||
|
|
||||||
for status in timelineIterator:
|
|
||||||
timeline.append(status)
|
|
||||||
|
|
||||||
timeline.reverse()
|
|
||||||
|
|
||||||
tw_counter = 0
|
tw_counter = 0
|
||||||
|
|
||||||
for status in timeline:
|
for status in timeline:
|
||||||
timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000"
|
timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000"
|
||||||
text = html.parser.HTMLParser().unescape(status.text).replace("'", "''")
|
text = unescapeText(status.text)
|
||||||
|
|
||||||
|
db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
|
||||||
|
db.commit()
|
||||||
|
|
||||||
cur.execute("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
|
|
||||||
last = status.id
|
last = status.id
|
||||||
tw_counter = tw_counter + 1
|
tw_counter = tw_counter + 1
|
||||||
|
|
||||||
conn.commit()
|
db.closeConnection()
|
||||||
conn.close()
|
|
||||||
|
|
||||||
return tw_counter, last, savepoint
|
return tw_counter, last, savepoint
|
||||||
|
|
||||||
|
|
29
tools.py
29
tools.py
|
@ -1,12 +1,13 @@
|
||||||
import config
|
import config
|
||||||
|
|
||||||
import os, sqlite3
|
import os, sqlite3, tweepy
|
||||||
|
|
||||||
class dbObject:
|
class dbObject:
|
||||||
|
|
||||||
def __init__(self, path=config.dbpath):
|
def __init__(self, path=config.dbpath):
|
||||||
self.conn = sqlite3.connect(path)
|
self.conn = sqlite3.connect(path)
|
||||||
self.cur = self.conn.cursor()
|
self.cur = self.conn.cursor()
|
||||||
|
self.path = path
|
||||||
|
|
||||||
def closeConnection(self):
|
def closeConnection(self):
|
||||||
return self.conn.close()
|
return self.conn.close()
|
||||||
|
@ -17,11 +18,8 @@ class dbObject:
|
||||||
def executeQuery(self, query):
|
def executeQuery(self, query):
|
||||||
return self.cur.execute(query)
|
return self.cur.execute(query)
|
||||||
|
|
||||||
def GetConnection(self):
|
def getNext(self):
|
||||||
return self.conn
|
return self.cur.fetchone()
|
||||||
|
|
||||||
def GetCursor(self):
|
|
||||||
return self.cur
|
|
||||||
|
|
||||||
def isInitialized(self):
|
def isInitialized(self):
|
||||||
try:
|
try:
|
||||||
|
@ -31,12 +29,25 @@ class dbObject:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class twObject:
|
||||||
|
|
||||||
|
def __init__(self, cke = config.cke, cse = config.cse, ato = config.ato, ase = config.ase):
|
||||||
|
self.auth = tweepy.OAuthHandler(config.cke, config.cse)
|
||||||
|
self.auth.set_access_token(config.ato, config.ase)
|
||||||
|
self.api = tweepy.API(self.auth)
|
||||||
|
|
||||||
|
def search(self, query, savepoint = 0):
|
||||||
|
tweets = list(tweepy.Cursor(self.api.search, q=query, since_id=savepoint).items())
|
||||||
|
tweets.reverse()
|
||||||
|
return tweets
|
||||||
|
|
||||||
|
|
||||||
def dbCheck(db, create = False):
|
def dbCheck(db, create = False):
|
||||||
if (!create and dbInitialized(db)) or (create and !dbInitialized(db)):
|
if (not create and dbInitialized(db)) or (create and not dbInitialized(db)):
|
||||||
return True
|
return True
|
||||||
if create:
|
if create:
|
||||||
raise ValueError("Provided database file " + path + " is already initialized. Remove it manually before trying to recreate it.")
|
raise ValueError("Provided database file " + db.path + " is already initialized. Remove it manually before trying to recreate it.")
|
||||||
raise ValueError("Provided database file " + path + " is not initialized. Create it using makedb.py or csvdb.py")
|
raise ValueError("Provided database file " + db.path + " is not initialized. Create it using makedb.py or csvdb.py")
|
||||||
|
|
||||||
|
|
||||||
def dbHelper(path, create = False):
|
def dbHelper(path, create = False):
|
||||||
|
|
Loading…
Reference in a new issue