2015-04-13 20:58:32 +00:00
|
|
|
#!/usr/bin/env python3
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2017-03-17 21:37:07 +00:00
|
|
|
import argparse, dbtools, filters.filler, mediatools, requests, setuptools, time, twitools
|
2017-02-26 23:39:34 +00:00
|
|
|
|
2016-06-30 10:33:09 +00:00
|
|
|
def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()):
|
2015-04-22 00:06:32 +00:00
|
|
|
query = "from:" + user
|
2016-06-30 10:33:09 +00:00
|
|
|
savepoint = db.getLatestTweet() + 1
|
2015-04-22 00:06:32 +00:00
|
|
|
last = savepoint
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-22 00:06:32 +00:00
|
|
|
timeline = two.search(query, savepoint)
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-15 22:53:15 +00:00
|
|
|
tw_counter = 0
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-15 22:53:15 +00:00
|
|
|
for status in timeline:
|
|
|
|
timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000"
|
2016-05-30 18:40:21 +00:00
|
|
|
text = setuptools.unescapeText(status.text)
|
2015-04-22 00:06:32 +00:00
|
|
|
|
2017-03-07 13:17:21 +00:00
|
|
|
if filters.filler.tweetFilter(status):
|
2017-02-21 01:27:14 +00:00
|
|
|
try:
|
|
|
|
db.executeQuery("INSERT INTO tweets(tweet_id,timestamp,text) VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
|
|
|
|
except:
|
2017-02-26 23:44:50 +00:00
|
|
|
print("Failed to insert %s into database." % str(status.id))
|
|
|
|
|
|
|
|
if 'media' in status.entities:
|
|
|
|
mid = 0
|
|
|
|
for m in status.entities['media']:
|
2017-03-15 19:43:36 +00:00
|
|
|
if "video" in m["expanded_url"]:
|
|
|
|
mediatools.videoHandler(m["expanded_url"], status.id, mid)
|
|
|
|
else:
|
|
|
|
mediatools.photoHandler(m['media_url'], status.id, mid)
|
2017-02-26 23:44:50 +00:00
|
|
|
mid += 1
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2017-02-21 01:27:14 +00:00
|
|
|
last = status.id
|
|
|
|
tw_counter = tw_counter + 1
|
|
|
|
|
|
|
|
db.commit()
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2015-04-15 22:53:15 +00:00
|
|
|
return tw_counter, last, savepoint
|
2015-03-09 17:32:24 +00:00
|
|
|
|
2016-06-30 10:33:09 +00:00
|
|
|
def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()):
|
|
|
|
mcount = 0
|
|
|
|
savepoint = db.getLatestMessage() + 1
|
2017-02-26 23:57:29 +00:00
|
|
|
new_messages = two.api.direct_messages(since_id=savepoint, count=200, full_text=True, include_entities=True)
|
|
|
|
new_out_messages = two.api.sent_direct_messages(since_id=savepoint, count=200, full_text=True, include_entities=True)
|
2016-06-30 10:33:09 +00:00
|
|
|
|
2017-02-26 23:56:37 +00:00
|
|
|
for m in (new_messages + new_out_messages):
|
2017-03-07 13:17:21 +00:00
|
|
|
if filters.filler.messageFilter(m, True):
|
2017-02-21 01:27:14 +00:00
|
|
|
try:
|
|
|
|
db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at))
|
|
|
|
mcount += 1
|
|
|
|
except:
|
|
|
|
pass
|
2016-06-30 10:33:09 +00:00
|
|
|
|
2017-02-26 23:56:37 +00:00
|
|
|
if 'media' in m.entities:
|
|
|
|
mid = 0
|
|
|
|
for med in m.entities['media']:
|
2017-02-26 23:59:04 +00:00
|
|
|
downloadMedia(med['media_url'], "m%i" % int(m.id), mid)
|
2017-02-26 23:56:37 +00:00
|
|
|
mid += 1
|
2016-06-30 10:33:09 +00:00
|
|
|
|
2017-02-21 01:27:14 +00:00
|
|
|
db.commit()
|
2016-06-30 10:33:09 +00:00
|
|
|
|
2017-01-30 02:04:51 +00:00
|
|
|
return mcount, savepoint or 0, db.getLatestMessage
|
2016-06-30 10:33:09 +00:00
|
|
|
|
2016-08-05 21:13:31 +00:00
|
|
|
def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False):
|
2016-08-05 21:20:18 +00:00
|
|
|
current = list(db.getFollowers())
|
2017-01-30 02:04:51 +00:00
|
|
|
new = list(twitools.getFollowerIDs())
|
2016-08-01 19:46:57 +00:00
|
|
|
gained = 0
|
|
|
|
lost = 0
|
|
|
|
|
2016-08-05 21:13:31 +00:00
|
|
|
if (len(current) == 0 or len(new) == 0) and not firstrun:
|
|
|
|
print("Something went wrong.")
|
|
|
|
return 0,0
|
|
|
|
|
2016-08-01 16:02:11 +00:00
|
|
|
for follower in new:
|
2017-03-07 13:17:21 +00:00
|
|
|
if follower not in current and filters.filler.followerFilter(follower, True):
|
2017-01-30 02:04:51 +00:00
|
|
|
db.executeQuery("INSERT INTO followers VALUES('%s', %i, 0)" % (str(follower), int(time.time())))
|
|
|
|
db.commit()
|
2016-08-01 19:46:57 +00:00
|
|
|
gained += 1
|
2016-08-01 16:02:11 +00:00
|
|
|
|
|
|
|
for follower in current:
|
2017-03-07 13:17:21 +00:00
|
|
|
if follower not in new and filters.filler.followerFilter(follower, False):
|
2017-01-30 02:04:51 +00:00
|
|
|
db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), str(follower)))
|
|
|
|
db.commit()
|
2016-08-01 19:46:57 +00:00
|
|
|
lost += 1
|
2016-08-01 16:02:11 +00:00
|
|
|
|
2016-08-01 19:46:57 +00:00
|
|
|
return gained, lost
|
|
|
|
|
2016-08-05 21:13:31 +00:00
|
|
|
def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False):
|
2016-08-05 21:20:18 +00:00
|
|
|
current = list(db.getFollowing())
|
2017-01-30 02:04:51 +00:00
|
|
|
new = list(twitools.getFollowingIDs())
|
2016-08-01 19:46:57 +00:00
|
|
|
gained = 0
|
|
|
|
lost = 0
|
2016-08-01 16:02:11 +00:00
|
|
|
|
2016-08-05 21:13:31 +00:00
|
|
|
if (len(current) == 0 or len(new) == 0) and not firstrun:
|
|
|
|
print("Something went wrong.")
|
|
|
|
return 0,0
|
|
|
|
|
2016-08-01 16:02:11 +00:00
|
|
|
for following in new:
|
2017-03-07 13:17:21 +00:00
|
|
|
if following not in current and filters.filler.followingFilter(following, True):
|
2017-01-30 02:04:51 +00:00
|
|
|
db.executeQuery("INSERT INTO following VALUES('%s', %i, 0)" % (str(following), int(time.time())))
|
|
|
|
db.commit()
|
2016-08-01 19:46:57 +00:00
|
|
|
gained += 1
|
2016-08-01 16:02:11 +00:00
|
|
|
|
|
|
|
for following in current:
|
2017-03-07 13:17:21 +00:00
|
|
|
if following not in new and filters.filler.followingFilter(following, False):
|
2017-01-30 02:04:51 +00:00
|
|
|
db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), str(following)))
|
|
|
|
db.commit()
|
2016-08-01 19:46:57 +00:00
|
|
|
lost += 1
|
2016-08-01 16:02:11 +00:00
|
|
|
|
|
|
|
db.commit()
|
2016-08-01 19:46:57 +00:00
|
|
|
|
|
|
|
return gained, lost
|
2016-08-01 16:02:11 +00:00
|
|
|
|
2017-01-30 02:04:51 +00:00
|
|
|
def getNames(db = dbtools.dbHelper(), two = twitools.twObject()):
|
|
|
|
for user in twitools.getNamesByIDs(list(set(list(db.getFollowing()) + list(db.getFollowers())))):
|
|
|
|
if not db.matchNameID(user["name"], user["id"]):
|
2017-02-18 19:41:15 +00:00
|
|
|
db.executeQuery("UPDATE names SET `until` = %i WHERE `id` = '%s' AND `until` = 0;" % (int(time.time()), str(user["id"])))
|
2017-01-30 02:04:51 +00:00
|
|
|
db.executeQuery("INSERT INTO names VALUES('%s', '%s', %i, 0)" % (str(user["id"]), str(user["name"]), int(time.time())))
|
|
|
|
db.commit()
|
|
|
|
|
2015-04-15 22:53:15 +00:00
|
|
|
if __name__ == "__main__":
|
2016-08-07 15:11:22 +00:00
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument("-f", "--first", help="first run: ignore empty databases", action="store_true")
|
|
|
|
args = parser.parse_args()
|
2016-08-04 16:17:59 +00:00
|
|
|
db = dbtools.dbHelper()
|
|
|
|
count, last, first = getTweets(db)
|
2016-08-01 23:17:08 +00:00
|
|
|
print("Stored %i tweets." % count)
|
2016-08-04 16:17:59 +00:00
|
|
|
count, last, first = getMessages(db)
|
2016-08-01 23:17:08 +00:00
|
|
|
print("Stored %i messages." % count)
|
2016-08-07 15:11:22 +00:00
|
|
|
gained, lost = getFollowers(db, firstrun=args.first)
|
2016-08-01 19:46:57 +00:00
|
|
|
print("Gained %i followers, lost %i." % (gained, lost))
|
2016-08-07 15:11:22 +00:00
|
|
|
gained, lost = getFollowing(db, firstrun=args.first)
|
2016-08-01 19:46:57 +00:00
|
|
|
print("Started following %i, stopped following %i." % (gained, lost))
|
2017-01-30 02:04:51 +00:00
|
|
|
getNames(db)
|
|
|
|
print("Stored handles of following/followers.")
|