twitools/markov.py

71 lines
1.6 KiB
Python
Raw Permalink Normal View History

2017-02-15 18:09:48 +00:00
#!/usr/bin/env python3
import dbtools
import setuptools
import twitools
import filters.markov
import argparse
import datetime
import html
import markovify
import multiprocessing
import nltk
import operator
import os
import random
import re
import string
import sys
import time
2017-02-15 18:09:48 +00:00
2017-02-15 21:19:20 +00:00
class Possy(markovify.NewlineText):
def word_split(self, sentence):
words = re.split(self.word_split_pattern, sentence)
words = [ "::".join(tag) for tag in nltk.pos_tag(words) ]
return words
def word_join(self, words):
sentence = " ".join(word.split("::")[0] for word in words)
return sentence
2017-02-15 18:09:48 +00:00
def sanitizeText(text):
2017-02-21 15:10:39 +00:00
split = text.split()
try:
2017-02-23 15:16:58 +00:00
if text[0] == "@" or text[1] == "@":
2017-02-21 15:10:39 +00:00
if split[1][0] not in string.ascii_lowercase:
return sanitizeText(" ".join(split[1:]))
2017-02-27 13:45:51 +00:00
if split[-1][0] == "@" and text[-1] in (string.digits + string.ascii_letters + "_"):
2017-02-21 15:10:39 +00:00
return sanitizeText(" ".join(split[:-1]))
if text[:4] == "RT @":
return sanitizeText(text.partition(":")[2])
except:
return ""
return text
2017-02-15 18:09:48 +00:00
def getText(db = dbtools.dbHelper()):
2017-02-15 18:16:19 +00:00
text = ""
2017-02-21 15:10:39 +00:00
for string in db.executeQuery('SELECT text FROM tweets;'):
text += sanitizeText(string[0]) + "\n"
2017-02-16 11:21:25 +00:00
return html.unescape("".join([s for s in text.strip().splitlines(True) if s.strip()]))
2017-02-15 18:09:48 +00:00
2017-02-15 18:16:19 +00:00
def markovifyText(text):
return Possy(text).make_short_sentence(270).replace("@", "@")
2017-02-15 18:09:48 +00:00
def worker():
2017-02-21 21:26:09 +00:00
text = markovifyText(getText())
outtext = filters.markov.textFilter(text)
if outtext:
filters.markov.tweetFilter(twitools.tweet(outtext, section = setuptools.MARKOV), text)
if __name__ == "__main__":
p = multiprocessing.Process(target=worker)
p.start()
time.sleep(20*60)
if p.is_alive():
p.terminate()
p.join()