#!/usr/bin/env python3 import argparse, dbtools, dbtools.fillerfilter, requests, setuptools, time, twitools def downloadMedia(url, tid, mid): remote = requests.get(url, stream=True) filename = "media/%s_%i.%s" % (str(tid), int(mid), url.split(".")[-1]) with open(filename, 'wb') as outfile: for chunk in remote.iter_content(chunk_size=1024): if chunk: outfile.write(chunk) outfile.flush() def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()): query = "from:" + user savepoint = db.getLatestTweet() + 1 last = savepoint timeline = two.search(query, savepoint) tw_counter = 0 for status in timeline: timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" text = setuptools.unescapeText(status.text) if dbtools.fillerfilter.tweetFilter(status): try: db.executeQuery("INSERT INTO tweets(tweet_id,timestamp,text) VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") except: print("Failed to insert %s into database." % str(status.id)) if 'media' in status.entities: mid = 0 for m in status.entities['media']: downloadMedia(m['media_url'], status.id, mid) mid += 1 last = status.id tw_counter = tw_counter + 1 db.commit() return tw_counter, last, savepoint def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()): mcount = 0 savepoint = db.getLatestMessage() + 1 new_messages = two.api.direct_messages(since_id=savepoint, count=200, full_text=True, include_entities=True) new_out_messages = two.api.sent_direct_messages(since_id=savepoint, count=200, full_text=True, include_entities=True) for m in (new_messages + new_out_messages): if dbtools.fillerfilter.messageFilter(m, True): try: db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at)) mcount += 1 except: pass if 'media' in m.entities: mid = 0 for med in m.entities['media']: downloadMedia(med['media_url'], "m%i" % int(m.id), mid) mid += 1 db.commit() return mcount, savepoint or 0, db.getLatestMessage def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): current = list(db.getFollowers()) new = list(twitools.getFollowerIDs()) gained = 0 lost = 0 if (len(current) == 0 or len(new) == 0) and not firstrun: print("Something went wrong.") return 0,0 for follower in new: if follower not in current and dbtools.fillerfilter.followerFilter(follower, True): db.executeQuery("INSERT INTO followers VALUES('%s', %i, 0)" % (str(follower), int(time.time()))) db.commit() gained += 1 for follower in current: if follower not in new and dbtools.fillerfilter.followerFilter(follower, False): db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), str(follower))) db.commit() lost += 1 return gained, lost def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False): current = list(db.getFollowing()) new = list(twitools.getFollowingIDs()) gained = 0 lost = 0 if (len(current) == 0 or len(new) == 0) and not firstrun: print("Something went wrong.") return 0,0 for following in new: if following not in current and dbtools.fillerfilter.followingFilter(following, True): db.executeQuery("INSERT INTO following VALUES('%s', %i, 0)" % (str(following), int(time.time()))) db.commit() gained += 1 for following in current: if following not in new and dbtools.fillerfilter.followingFilter(following, False): db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), str(following))) db.commit() lost += 1 db.commit() return gained, lost def getNames(db = dbtools.dbHelper(), two = twitools.twObject()): for user in twitools.getNamesByIDs(list(set(list(db.getFollowing()) + list(db.getFollowers())))): if not db.matchNameID(user["name"], user["id"]): db.executeQuery("UPDATE names SET `until` = %i WHERE `id` = '%s' AND `until` = 0;" % (int(time.time()), str(user["id"]))) db.executeQuery("INSERT INTO names VALUES('%s', '%s', %i, 0)" % (str(user["id"]), str(user["name"]), int(time.time()))) db.commit() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-f", "--first", help="first run: ignore empty databases", action="store_true") args = parser.parse_args() db = dbtools.dbHelper() count, last, first = getTweets(db) print("Stored %i tweets." % count) count, last, first = getMessages(db) print("Stored %i messages." % count) gained, lost = getFollowers(db, firstrun=args.first) print("Gained %i followers, lost %i." % (gained, lost)) gained, lost = getFollowing(db, firstrun=args.first) print("Started following %i, stopped following %i." % (gained, lost)) getNames(db) print("Stored handles of following/followers.")