From a94f8e3ee5afe106a28a4dccd803b35067889381 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 15 Feb 2017 19:16:19 +0100 Subject: [PATCH 1/6] Fix markov bot --- markov.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/markov.py b/markov.py index 9b04af3..5424d94 100755 --- a/markov.py +++ b/markov.py @@ -4,13 +4,16 @@ import dbtools, twitools import argparse, markovify, operator, random, re, sys def getText(db = dbtools.dbHelper()): - return '\n'.join(db.executeQuery("SELECT text FROM tweets;")) + text = "" + for string in db.executeQuery("SELECT text FROM tweets;"): + text += string[0] + "\n" + return text -def markovify(text): +def markovifyText(text): return markovify.Text(text).make_short_sentence(130).replace("@", "@​") def tweet(text, ref = 0, two = twitools.twObject()): return two.tweet(text, ref).id if __name__ == "__main__": - tweet(markovify(getText())) + tweet(markovifyText(getText())) From 58ea79f7bab33d8ff83142821990c0f51a5a04b6 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 15 Feb 2017 21:21:11 +0100 Subject: [PATCH 2/6] Make Markov bot use markovify.NewlineText() rather than markovify.Text() --- markov.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markov.py b/markov.py index 5424d94..35cf8c6 100755 --- a/markov.py +++ b/markov.py @@ -10,7 +10,7 @@ def getText(db = dbtools.dbHelper()): return text def markovifyText(text): - return markovify.Text(text).make_short_sentence(130).replace("@", "@​") + return markovify.NewlineText(text).make_short_sentence(130).replace("@", "@​") def tweet(text, ref = 0, two = twitools.twObject()): return two.tweet(text, ref).id From 96269a71e25da341a488b55ff9c189eae168aacb Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 15 Feb 2017 22:19:20 +0100 Subject: [PATCH 3/6] Make Markov bot use NLTK. --- markov.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/markov.py b/markov.py index 35cf8c6..5f8da99 100755 --- a/markov.py +++ b/markov.py @@ -1,16 +1,26 @@ #!/usr/bin/env python3 import dbtools, twitools -import argparse, markovify, operator, random, re, sys +import argparse, markovify, nltk, operator, random, re, sys + +class Possy(markovify.NewlineText): + def word_split(self, sentence): + words = re.split(self.word_split_pattern, sentence) + words = [ "::".join(tag) for tag in nltk.pos_tag(words) ] + return words + + def word_join(self, words): + sentence = " ".join(word.split("::")[0] for word in words) + return sentence def getText(db = dbtools.dbHelper()): text = "" for string in db.executeQuery("SELECT text FROM tweets;"): text += string[0] + "\n" - return text + return "".join([s for s in text.strip().splitlines(True) if s.strip()]) def markovifyText(text): - return markovify.NewlineText(text).make_short_sentence(130).replace("@", "@​") + return Possy(text).make_short_sentence(130).replace("@", "@​") def tweet(text, ref = 0, two = twitools.twObject()): return two.tweet(text, ref).id From 64aa08bcbcd60dd9d332770f9ad9ce7f81b2f220 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 15 Feb 2017 22:42:38 +0100 Subject: [PATCH 4/6] Move tweet() to twitools --- lyricsbot.py | 5 +---- markov.py | 5 +---- twitools/__init__.py | 2 ++ 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/lyricsbot.py b/lyricsbot.py index 5a0e762..bd6141d 100755 --- a/lyricsbot.py +++ b/lyricsbot.py @@ -24,9 +24,6 @@ def postprocess(lid, tid, db = dbtools.dbHelper()): db.executeQuery("UPDATE lyrics SET active = 1 WHERE ref = %i;" % int(lid)) db.commit() -def tweet(text, ref = 0, two = twitools.twObject()): - return two.tweet(text, ref).id - if __name__ == "__main__": lid, text, ref = getLyrics() - postprocess(lid, tweet(text, ref)) + postprocess(lid, twitools.tweet(text, ref)) diff --git a/markov.py b/markov.py index 5f8da99..44d93b6 100755 --- a/markov.py +++ b/markov.py @@ -22,8 +22,5 @@ def getText(db = dbtools.dbHelper()): def markovifyText(text): return Possy(text).make_short_sentence(130).replace("@", "@​") -def tweet(text, ref = 0, two = twitools.twObject()): - return two.tweet(text, ref).id - if __name__ == "__main__": - tweet(markovifyText(getText())) + twitools.tweet(markovifyText(getText())) diff --git a/twitools/__init__.py b/twitools/__init__.py index f69ef2b..2714f0e 100644 --- a/twitools/__init__.py +++ b/twitools/__init__.py @@ -42,3 +42,5 @@ def getNamesByIDs(fids=getFollowerIDs(), two=twObject()): for follower in followers: yield {"id": follower.id, "name": follower.screen_name} +def tweet(text, ref = 0): + return twObject().tweet(text, ref).id From d8c07f33a598d4d969d416a402520264870c635e Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 16 Feb 2017 01:29:49 +0100 Subject: [PATCH 5/6] Only use original tweets in Markov bot --- markov.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markov.py b/markov.py index 44d93b6..a6aa4e1 100755 --- a/markov.py +++ b/markov.py @@ -15,7 +15,7 @@ class Possy(markovify.NewlineText): def getText(db = dbtools.dbHelper()): text = "" - for string in db.executeQuery("SELECT text FROM tweets;"): + for string in db.executeQuery('SELECT text FROM tweets WHERE text NOT LIKE "@%" AND text NOT LIKE "RT %";'): text += string[0] + "\n" return "".join([s for s in text.strip().splitlines(True) if s.strip()]) From 2408d2c7aeab06ec071ce4b8cf685b1f49aa63f6 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 16 Feb 2017 12:21:25 +0100 Subject: [PATCH 6/6] Make Markov bot unescape strings --- markov.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/markov.py b/markov.py index a6aa4e1..0d38220 100755 --- a/markov.py +++ b/markov.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import dbtools, twitools -import argparse, markovify, nltk, operator, random, re, sys +import argparse, html, markovify, nltk, operator, random, re, sys class Possy(markovify.NewlineText): def word_split(self, sentence): @@ -17,7 +17,7 @@ def getText(db = dbtools.dbHelper()): text = "" for string in db.executeQuery('SELECT text FROM tweets WHERE text NOT LIKE "@%" AND text NOT LIKE "RT %";'): text += string[0] + "\n" - return "".join([s for s in text.strip().splitlines(True) if s.strip()]) + return html.unescape("".join([s for s in text.strip().splitlines(True) if s.strip()])) def markovifyText(text): return Possy(text).make_short_sentence(130).replace("@", "@​")