2017-02-15 18:09:48 +00:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
2017-03-08 01:43:00 +00:00
|
|
|
|
import dbtools, setuptools, twitools, filters.markov
|
2017-02-21 21:22:44 +00:00
|
|
|
|
import argparse, datetime, html, markovify, nltk, operator, os, random, re, string, sys, time
|
2017-02-15 18:09:48 +00:00
|
|
|
|
|
2017-02-15 21:19:20 +00:00
|
|
|
|
class Possy(markovify.NewlineText):
|
|
|
|
|
def word_split(self, sentence):
|
|
|
|
|
words = re.split(self.word_split_pattern, sentence)
|
|
|
|
|
words = [ "::".join(tag) for tag in nltk.pos_tag(words) ]
|
|
|
|
|
return words
|
|
|
|
|
|
|
|
|
|
def word_join(self, words):
|
|
|
|
|
sentence = " ".join(word.split("::")[0] for word in words)
|
|
|
|
|
return sentence
|
2017-02-15 18:09:48 +00:00
|
|
|
|
|
2017-02-18 21:14:35 +00:00
|
|
|
|
def sanitizeText(text):
|
2017-02-21 15:10:39 +00:00
|
|
|
|
split = text.split()
|
2017-02-18 21:14:35 +00:00
|
|
|
|
try:
|
2017-02-23 15:16:58 +00:00
|
|
|
|
if text[0] == "@" or text[1] == "@":
|
2017-02-21 15:10:39 +00:00
|
|
|
|
if split[1][0] not in string.ascii_lowercase:
|
2017-02-23 15:10:14 +00:00
|
|
|
|
return sanitizeText(" ".join(split[1:]))
|
2017-02-27 13:45:51 +00:00
|
|
|
|
if split[-1][0] == "@" and text[-1] in (string.digits + string.ascii_letters + "_"):
|
2017-02-21 15:10:39 +00:00
|
|
|
|
return sanitizeText(" ".join(split[:-1]))
|
|
|
|
|
if text[:4] == "RT @":
|
|
|
|
|
return sanitizeText(text.partition(":")[2])
|
2017-02-18 21:14:35 +00:00
|
|
|
|
except:
|
|
|
|
|
return ""
|
|
|
|
|
return text
|
|
|
|
|
|
2017-02-15 18:09:48 +00:00
|
|
|
|
def getText(db = dbtools.dbHelper()):
|
2017-02-15 18:16:19 +00:00
|
|
|
|
text = ""
|
2017-02-21 15:10:39 +00:00
|
|
|
|
for string in db.executeQuery('SELECT text FROM tweets;'):
|
2017-02-18 21:14:35 +00:00
|
|
|
|
text += sanitizeText(string[0]) + "\n"
|
2017-02-16 11:21:25 +00:00
|
|
|
|
return html.unescape("".join([s for s in text.strip().splitlines(True) if s.strip()]))
|
2017-02-15 18:09:48 +00:00
|
|
|
|
|
2017-02-15 18:16:19 +00:00
|
|
|
|
def markovifyText(text):
|
2017-02-15 21:19:20 +00:00
|
|
|
|
return Possy(text).make_short_sentence(130).replace("@", "@")
|
2017-02-15 18:09:48 +00:00
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2017-02-21 21:26:09 +00:00
|
|
|
|
text = markovifyText(getText())
|
2017-03-08 23:48:09 +00:00
|
|
|
|
outtext = filters.markov.textFilter(text)
|
|
|
|
|
if outtext:
|
|
|
|
|
filters.markov.tweetFilter(twitools.tweet(outtext, section = setuptools.MARKOV), text)
|