43 lines
1.4 KiB
Python
Executable file
43 lines
1.4 KiB
Python
Executable file
#!/usr/bin/env python3
|
||
|
||
import dbtools, setuptools, twitools, filters.markov
|
||
import argparse, datetime, html, markovify, nltk, operator, os, random, re, string, sys, time
|
||
|
||
class Possy(markovify.NewlineText):
|
||
def word_split(self, sentence):
|
||
words = re.split(self.word_split_pattern, sentence)
|
||
words = [ "::".join(tag) for tag in nltk.pos_tag(words) ]
|
||
return words
|
||
|
||
def word_join(self, words):
|
||
sentence = " ".join(word.split("::")[0] for word in words)
|
||
return sentence
|
||
|
||
def sanitizeText(text):
|
||
split = text.split()
|
||
try:
|
||
if text[0] == "@" or text[1] == "@":
|
||
if split[1][0] not in string.ascii_lowercase:
|
||
return sanitizeText(" ".join(split[1:]))
|
||
if split[-1][0] == "@" and text[-1] in (string.digits + string.ascii_letters + "_"):
|
||
return sanitizeText(" ".join(split[:-1]))
|
||
if text[:4] == "RT @":
|
||
return sanitizeText(text.partition(":")[2])
|
||
except:
|
||
return ""
|
||
return text
|
||
|
||
def getText(db = dbtools.dbHelper()):
|
||
text = ""
|
||
for string in db.executeQuery('SELECT text FROM tweets;'):
|
||
text += sanitizeText(string[0]) + "\n"
|
||
return html.unescape("".join([s for s in text.strip().splitlines(True) if s.strip()]))
|
||
|
||
def markovifyText(text):
|
||
return Possy(text).make_short_sentence(130).replace("@", "@")
|
||
|
||
if __name__ == "__main__":
|
||
text = markovifyText(getText())
|
||
outtext = filters.markov.textFilter(text)
|
||
if outtext:
|
||
filters.markov.tweetFilter(twitools.tweet(outtext, section = setuptools.MARKOV), text)
|