2023-04-16 14:08:57 +00:00
import os
import inspect
2023-04-17 20:28:29 +00:00
import logging
import signal
2023-04-19 06:11:28 +00:00
import random
2023-04-23 13:26:46 +00:00
import uuid
2023-04-16 14:08:57 +00:00
import openai
import asyncio
import markdown2
import tiktoken
2023-04-17 20:28:29 +00:00
import duckdb
2023-04-16 14:08:57 +00:00
2023-04-23 13:26:46 +00:00
from nio import AsyncClient , RoomMessageText , MatrixRoom , Event , InviteEvent , AsyncClientConfig , MegolmEvent , GroupEncryptionError , EncryptionError , HttpClient , Api
2023-04-16 14:08:57 +00:00
from nio . api import MessageDirection
2023-04-23 13:26:46 +00:00
from nio . responses import RoomMessagesError , SyncResponse , RoomRedactError , WhoamiResponse , JoinResponse , RoomSendResponse
from nio . crypto import Olm
2023-04-16 14:08:57 +00:00
from configparser import ConfigParser
from datetime import datetime
2023-04-17 20:28:29 +00:00
from argparse import ArgumentParser
2023-04-19 06:11:28 +00:00
from typing import List , Dict , Union , Optional
2023-04-16 14:08:57 +00:00
2023-04-19 06:11:28 +00:00
from commands import COMMANDS
2023-04-23 13:26:46 +00:00
from classes import DuckDBStore
2023-04-16 14:08:57 +00:00
2023-04-19 06:11:28 +00:00
def logging ( message : str , log_level : str = " info " ) :
2023-04-16 14:08:57 +00:00
caller = inspect . currentframe ( ) . f_back . f_code . co_name
timestamp = datetime . now ( ) . strftime ( " % Y- % m- %d % H: % M: % S: %f " )
2023-04-17 20:28:29 +00:00
print ( f " [ { timestamp } ] - { caller } - [ { log_level . upper ( ) } ] { message } " )
2023-04-16 14:08:57 +00:00
2023-04-19 06:11:28 +00:00
CONTEXT = {
" database " : False ,
" default_room_name " : " GPTBot " ,
" system_message " : " You are a helpful assistant. " ,
" max_tokens " : 3000 ,
" max_messages " : 20 ,
" model " : " gpt-3.5-turbo " ,
" client " : None ,
" sync_token " : None ,
" logger " : logging
}
async def gpt_query ( messages : list , model : Optional [ str ] = None ) :
model = model or CONTEXT [ " model " ]
2023-04-16 14:08:57 +00:00
logging ( f " Querying GPT with { len ( messages ) } messages " )
try :
response = openai . ChatCompletion . create (
2023-04-17 20:28:29 +00:00
model = model ,
2023-04-16 14:08:57 +00:00
messages = messages
)
result_text = response . choices [ 0 ] . message [ ' content ' ]
tokens_used = response . usage [ " total_tokens " ]
logging ( f " Used { tokens_used } tokens " )
return result_text , tokens_used
except Exception as e :
logging ( f " Error during GPT API call: { e } " , " error " )
return None , 0
2023-04-19 06:11:28 +00:00
async def fetch_last_n_messages ( room_id : str , n : Optional [ int ] = None ,
client : Optional [ AsyncClient ] = None , sync_token : Optional [ str ] = None ) :
2023-04-16 14:08:57 +00:00
messages = [ ]
2023-04-19 06:11:28 +00:00
n = n or CONTEXT [ " max_messages " ]
client = client or CONTEXT [ " client " ]
sync_token = sync_token or CONTEXT [ " sync_token " ]
2023-04-17 20:28:29 +00:00
logging (
2023-04-19 06:11:28 +00:00
f " Fetching last { 2 * n } messages from room { room_id } (starting at { sync_token } )... " )
2023-04-16 14:08:57 +00:00
2023-04-19 06:11:28 +00:00
response = await client . room_messages (
2023-04-16 14:08:57 +00:00
room_id = room_id ,
2023-04-19 06:11:28 +00:00
start = sync_token ,
2023-04-17 20:28:29 +00:00
limit = 2 * n ,
2023-04-16 14:08:57 +00:00
)
if isinstance ( response , RoomMessagesError ) :
logging (
f " Error fetching messages: { response . message } (status code { response . status_code } ) " , " error " )
return [ ]
for event in response . chunk :
2023-04-17 20:28:29 +00:00
if len ( messages ) > = n :
break
2023-04-23 13:26:46 +00:00
if isinstance ( event , MegolmEvent ) :
try :
event = await client . decrypt_event ( event )
except ( GroupEncryptionError , EncryptionError ) :
logging (
f " Could not decrypt message { event . event_id } in room { room_id } " , " error " )
continue
2023-04-16 14:08:57 +00:00
if isinstance ( event , RoomMessageText ) :
2023-04-19 06:11:28 +00:00
if event . body . startswith ( " !gptbot ignoreolder " ) :
break
2023-04-17 20:28:29 +00:00
if not event . body . startswith ( " ! " ) :
messages . append ( event )
2023-04-16 14:08:57 +00:00
2023-04-17 20:28:29 +00:00
logging ( f " Found { len ( messages ) } messages (limit: { n } ) " )
2023-04-16 14:08:57 +00:00
# Reverse the list so that messages are in chronological order
return messages [ : : - 1 ]
2023-04-19 06:11:28 +00:00
def truncate_messages_to_fit_tokens ( messages : list , max_tokens : Optional [ int ] = None ,
model : Optional [ str ] = None , system_message : Optional [ str ] = None ) :
max_tokens = max_tokens or CONTEXT [ " max_tokens " ]
model = model or CONTEXT [ " model " ]
system_message = system_message or CONTEXT [ " system_message " ]
2023-04-17 20:28:29 +00:00
encoding = tiktoken . encoding_for_model ( model )
2023-04-16 14:08:57 +00:00
total_tokens = 0
2023-04-19 06:11:28 +00:00
system_message_tokens = len ( encoding . encode ( system_message ) ) + 1
2023-04-16 14:08:57 +00:00
if system_message_tokens > max_tokens :
logging (
f " System message is too long to fit within token limit ( { system_message_tokens } tokens) - cannot proceed " , " error " )
return [ ]
total_tokens + = system_message_tokens
2023-04-19 06:11:28 +00:00
total_tokens = len ( system_message ) + 1
2023-04-16 14:08:57 +00:00
truncated_messages = [ ]
2023-04-16 14:29:28 +00:00
for message in [ messages [ 0 ] ] + list ( reversed ( messages [ 1 : ] ) ) :
2023-04-16 14:08:57 +00:00
content = message [ " content " ]
tokens = len ( encoding . encode ( content ) ) + 1
if total_tokens + tokens > max_tokens :
break
total_tokens + = tokens
truncated_messages . append ( message )
2023-04-16 14:29:28 +00:00
return [ truncated_messages [ 0 ] ] + list ( reversed ( truncated_messages [ 1 : ] ) )
2023-04-16 14:08:57 +00:00
2023-04-19 06:11:28 +00:00
async def process_query ( room : MatrixRoom , event : RoomMessageText , * * kwargs ) :
client = kwargs . get ( " client " ) or CONTEXT [ " client " ]
database = kwargs . get ( " database " ) or CONTEXT [ " database " ]
system_message = kwargs . get ( " system_message " ) or CONTEXT [ " system_message " ]
max_tokens = kwargs . get ( " max_tokens " ) or CONTEXT [ " max_tokens " ]
2023-04-16 14:08:57 +00:00
2023-04-19 06:11:28 +00:00
await client . room_typing ( room . room_id , True )
2023-04-16 14:08:57 +00:00
2023-04-19 06:11:28 +00:00
await client . room_read_markers ( room . room_id , event . event_id )
2023-04-16 14:08:57 +00:00
last_messages = await fetch_last_n_messages ( room . room_id , 20 )
2023-04-19 06:11:28 +00:00
chat_messages = [ { " role " : " system " , " content " : system_message } ]
2023-04-16 14:08:57 +00:00
for message in last_messages :
2023-04-19 06:11:28 +00:00
role = " assistant " if message . sender == client . user_id else " user "
2023-04-16 14:08:57 +00:00
if not message . event_id == event . event_id :
chat_messages . append ( { " role " : role , " content " : message . body } )
chat_messages . append ( { " role " : " user " , " content " : event . body } )
# Truncate messages to fit within the token limit
truncated_messages = truncate_messages_to_fit_tokens (
2023-04-19 06:11:28 +00:00
chat_messages , max_tokens - 1 )
2023-04-16 14:08:57 +00:00
response , tokens_used = await gpt_query ( truncated_messages )
if response :
logging ( f " Sending response to room { room . room_id } ... " )
2023-04-17 20:28:29 +00:00
# Convert markdown to HTML
2023-04-23 13:26:46 +00:00
message = await send_message ( room , response )
2023-04-16 14:08:57 +00:00
2023-04-19 06:11:28 +00:00
if database :
2023-04-17 20:28:29 +00:00
logging ( " Logging tokens used... " )
2023-04-16 14:08:57 +00:00
2023-04-19 06:11:28 +00:00
with database . cursor ( ) as cursor :
2023-04-17 20:28:29 +00:00
cursor . execute (
2023-04-19 06:11:28 +00:00
" INSERT INTO token_usage (message_id, room_id, tokens, timestamp) VALUES (?, ?, ?, ?) " ,
2023-04-17 20:28:29 +00:00
( message . event_id , room . room_id , tokens_used , datetime . now ( ) ) )
2023-04-19 06:11:28 +00:00
database . commit ( )
2023-04-16 14:08:57 +00:00
else :
# Send a notice to the room if there was an error
logging ( " Error during GPT API call - sending notice to room " )
2023-04-23 13:26:46 +00:00
send_message (
room , " Sorry, I ' m having trouble connecting to the GPT API right now. Please try again later. " , True )
2023-04-16 14:08:57 +00:00
print ( " No response from GPT API " )
2023-04-19 06:11:28 +00:00
await client . room_typing ( room . room_id , False )
2023-04-17 20:28:29 +00:00
2023-04-19 06:11:28 +00:00
async def process_command ( room : MatrixRoom , event : RoomMessageText , context : Optional [ dict ] = None ) :
context = context or CONTEXT
2023-04-17 20:28:29 +00:00
logging (
f " Received command { event . body } from { event . sender } in room { room . room_id } " )
command = event . body . split ( ) [ 1 ] if event . body . split ( ) [ 1 : ] else None
2023-04-23 13:26:46 +00:00
message = await COMMANDS . get ( command , COMMANDS [ None ] ) ( room , event , context )
if message :
room_id , event , content = message
await send_message ( context [ " client " ] . rooms [ room_id ] , content [ " body " ] ,
True if content [ " msgtype " ] == " m.notice " else False , context [ " client " ] )
2023-04-17 20:28:29 +00:00
2023-04-23 13:26:46 +00:00
async def message_callback ( room : MatrixRoom , event : RoomMessageText | MegolmEvent , * * kwargs ) :
2023-04-19 06:11:28 +00:00
context = kwargs . get ( " context " ) or CONTEXT
2023-04-23 13:26:46 +00:00
2023-04-17 20:28:29 +00:00
logging ( f " Received message from { event . sender } in room { room . room_id } " )
2023-04-23 13:26:46 +00:00
if isinstance ( event , MegolmEvent ) :
try :
event = await context [ " client " ] . decrypt_event ( event )
except Exception as e :
try :
logging ( " Requesting new encryption keys... " )
await context [ " client " ] . request_room_key ( event )
except :
pass
logging ( f " Error decrypting message: { e } " , " error " )
await send_message ( room , " Sorry, I couldn ' t decrypt that message. Please try again later or switch to a room without encryption. " , True , context [ " client " ] )
return
2023-04-19 06:11:28 +00:00
if event . sender == context [ " client " ] . user_id :
2023-04-17 20:28:29 +00:00
logging ( " Message is from bot itself - ignoring " )
elif event . body . startswith ( " !gptbot " ) :
await process_command ( room , event )
elif event . body . startswith ( " ! " ) :
logging ( " Might be a command, but not for this bot - ignoring " )
else :
2023-04-19 06:11:28 +00:00
await process_query ( room , event , context = context )
2023-04-16 14:08:57 +00:00
2023-04-19 06:11:28 +00:00
async def room_invite_callback ( room : MatrixRoom , event : InviteEvent , * * kwargs ) :
2023-04-23 13:26:46 +00:00
client : AsyncClient = kwargs . get ( " client " ) or CONTEXT [ " client " ]
if room . room_id in client . rooms :
logging ( f " Already in room { room . room_id } - ignoring invite " )
return
2023-04-17 20:28:29 +00:00
2023-04-16 14:08:57 +00:00
logging ( f " Received invite to room { room . room_id } - joining... " )
2023-04-23 13:26:46 +00:00
response = await client . join ( room . room_id )
if isinstance ( response , JoinResponse ) :
await send_message ( room , " Hello! I ' m a helpful assistant. How can I help you today? " , client )
else :
logging ( f " Error joining room { room . room_id } : { response } " , " error " )
async def send_message ( room : MatrixRoom , message : str , notice : bool = False , client : Optional [ AsyncClient ] = None ) :
client = client or CONTEXT [ " client " ]
markdowner = markdown2 . Markdown ( extras = [ " fenced-code-blocks " ] )
formatted_body = markdowner . convert ( message )
msgtype = " m.notice " if notice else " m.text "
msgcontent = { " msgtype " : msgtype , " body " : message ,
" format " : " org.matrix.custom.html " , " formatted_body " : formatted_body }
content = None
if client . olm and room . encrypted :
try :
if not room . members_synced :
responses = [ ]
responses . append ( await client . joined_members ( room . room_id ) )
if client . olm . should_share_group_session ( room . room_id ) :
try :
event = client . sharing_session [ room . room_id ]
await event . wait ( )
except KeyError :
await client . share_group_session (
room . room_id ,
ignore_unverified_devices = True ,
)
if msgtype != " m.reaction " :
response = client . encrypt ( room . room_id , " m.room.message " , msgcontent )
msgtype , content = response
except Exception as e :
logging (
f " Error encrypting message: { e } - sending unencrypted " , " error " )
raise
if not content :
msgtype = " m.room.message "
content = msgcontent
method , path , data = Api . room_send (
client . access_token , room . room_id , msgtype , content , uuid . uuid4 ( )
2023-04-16 14:08:57 +00:00
)
2023-04-23 13:26:46 +00:00
return await client . _send ( RoomSendResponse , method , path , data , ( room . room_id , ) )
2023-04-16 14:08:57 +00:00
2023-04-19 06:11:28 +00:00
async def accept_pending_invites ( client : Optional [ AsyncClient ] = None ) :
client = client or CONTEXT [ " client " ]
2023-04-17 20:28:29 +00:00
2023-04-16 14:08:57 +00:00
logging ( " Accepting pending invites... " )
2023-04-19 06:11:28 +00:00
for room_id in list ( client . invited_rooms . keys ( ) ) :
2023-04-16 14:08:57 +00:00
logging ( f " Joining room { room_id } ... " )
2023-04-23 13:26:46 +00:00
response = await client . join ( room_id )
if isinstance ( response , JoinResponse ) :
logging ( response , " debug " )
rooms = await client . joined_rooms ( )
await send_message ( rooms [ room_id ] , " Hello! I ' m a helpful assistant. How can I help you today? " , client )
else :
logging ( f " Error joining room { room_id } : { response } " , " error " )
2023-04-16 14:08:57 +00:00
2023-04-19 06:11:28 +00:00
async def sync_cb ( response , write_global : bool = True ) :
2023-04-17 20:28:29 +00:00
logging (
f " Sync response received (next batch: { response . next_batch } ) " , " debug " )
2023-04-16 14:08:57 +00:00
SYNC_TOKEN = response . next_batch
2023-04-19 06:11:28 +00:00
if write_global :
global CONTEXT
CONTEXT [ " sync_token " ] = SYNC_TOKEN
2023-04-16 14:08:57 +00:00
2023-04-23 13:26:46 +00:00
async def test_callback ( room : MatrixRoom , event : Event , * * kwargs ) :
logging (
f " Received event { event . __class__ . __name__ } in room { room . room_id } " , " debug " )
2023-04-16 14:08:57 +00:00
2023-04-23 13:26:46 +00:00
async def init ( config : ConfigParser ) :
# Set up Matrix client
try :
assert " Matrix " in config
assert " Homeserver " in config [ " Matrix " ]
assert " AccessToken " in config [ " Matrix " ]
except :
logging ( " Matrix config not found or incomplete " , " critical " )
exit ( 1 )
homeserver = config [ " Matrix " ] [ " Homeserver " ]
access_token = config [ " Matrix " ] [ " AccessToken " ]
device_id , user_id = await get_device_id ( access_token , homeserver )
device_id = config [ " Matrix " ] . get ( " DeviceID " , device_id )
user_id = config [ " Matrix " ] . get ( " UserID " , user_id )
# Set up database
if " Database " in config and config [ " Database " ] . get ( " Path " ) :
database = CONTEXT [ " database " ] = initialize_database (
config [ " Database " ] [ " Path " ] )
matrix_store = DuckDBStore
client_config = AsyncClientConfig (
store_sync_tokens = True , encryption_enabled = True , store = matrix_store )
else :
client_config = AsyncClientConfig (
store_sync_tokens = True , encryption_enabled = False )
client = AsyncClient (
config [ " Matrix " ] [ " Homeserver " ] , config = client_config )
if client . config . encryption_enabled :
client . store = client . config . store (
user_id ,
device_id ,
database
)
assert client . store
client . olm = Olm ( client . user_id , client . device_id , client . store )
client . encrypted_rooms = client . store . load_encrypted_rooms ( )
CONTEXT [ " client " ] = client
CONTEXT [ " client " ] . access_token = config [ " Matrix " ] [ " AccessToken " ]
CONTEXT [ " client " ] . user_id = user_id
CONTEXT [ " client " ] . device_id = device_id
# Set up GPT API
try :
assert " OpenAI " in config
assert " APIKey " in config [ " OpenAI " ]
except :
logging ( " OpenAI config not found or incomplete " , " critical " )
exit ( 1 )
openai . api_key = config [ " OpenAI " ] [ " APIKey " ]
if " Model " in config [ " OpenAI " ] :
CONTEXT [ " model " ] = config [ " OpenAI " ] [ " Model " ]
if " MaxTokens " in config [ " OpenAI " ] :
CONTEXT [ " max_tokens " ] = int ( config [ " OpenAI " ] [ " MaxTokens " ] )
if " MaxMessages " in config [ " OpenAI " ] :
CONTEXT [ " max_messages " ] = int ( config [ " OpenAI " ] [ " MaxMessages " ] )
# Listen for SIGTERM
def sigterm_handler ( _signo , _stack_frame ) :
logging ( " Received SIGTERM - exiting... " )
exit ( )
signal . signal ( signal . SIGTERM , sigterm_handler )
async def main ( config : Optional [ ConfigParser ] = None , client : Optional [ AsyncClient ] = None ) :
if not client and not CONTEXT . get ( " client " ) :
await init ( config )
client = client or CONTEXT [ " client " ]
2023-04-16 14:08:57 +00:00
2023-04-17 20:28:29 +00:00
try :
2023-04-19 06:11:28 +00:00
assert client . user_id
2023-04-17 20:28:29 +00:00
except AssertionError :
logging (
" Failed to get user ID - check your access token or try setting it manually " , " critical " )
2023-04-19 06:11:28 +00:00
await client . close ( )
2023-04-17 20:28:29 +00:00
return
logging ( " Starting bot... " )
2023-04-19 06:11:28 +00:00
client . add_response_callback ( sync_cb , SyncResponse )
2023-04-16 14:08:57 +00:00
logging ( " Syncing... " )
2023-04-19 06:11:28 +00:00
await client . sync ( timeout = 30000 )
2023-04-16 14:08:57 +00:00
2023-04-19 06:11:28 +00:00
client . add_event_callback ( message_callback , RoomMessageText )
2023-04-23 13:26:46 +00:00
client . add_event_callback ( message_callback , MegolmEvent )
2023-04-19 06:11:28 +00:00
client . add_event_callback ( room_invite_callback , InviteEvent )
2023-04-23 13:26:46 +00:00
client . add_event_callback ( test_callback , Event )
2023-04-16 14:08:57 +00:00
await accept_pending_invites ( ) # Accept pending invites
logging ( " Bot started " )
try :
2023-04-17 20:28:29 +00:00
# Continue syncing events
2023-04-19 06:11:28 +00:00
await client . sync_forever ( timeout = 30000 )
2023-04-16 14:08:57 +00:00
finally :
2023-04-17 20:28:29 +00:00
logging ( " Syncing one last time... " )
2023-04-19 06:11:28 +00:00
await client . sync ( timeout = 30000 )
await client . close ( ) # Properly close the aiohttp client session
2023-04-16 14:08:57 +00:00
logging ( " Bot stopped " )
2023-04-17 20:28:29 +00:00
2023-04-19 06:11:28 +00:00
def initialize_database ( path : os . PathLike ) :
2023-04-17 20:28:29 +00:00
logging ( " Initializing database... " )
2023-04-19 06:11:28 +00:00
database = duckdb . connect ( path )
2023-04-17 20:28:29 +00:00
2023-04-19 06:11:28 +00:00
with database . cursor ( ) as cursor :
2023-04-17 20:28:29 +00:00
# Get the latest migration ID if the migrations table exists
try :
cursor . execute (
"""
SELECT MAX ( id ) FROM migrations
"""
)
latest_migration = int ( cursor . fetchone ( ) [ 0 ] )
except :
latest_migration = 0
# Version 1
if latest_migration < 1 :
cursor . execute (
"""
CREATE TABLE IF NOT EXISTS token_usage (
message_id TEXT PRIMARY KEY ,
room_id TEXT NOT NULL ,
tokens INTEGER NOT NULL ,
timestamp TIMESTAMP NOT NULL
)
"""
)
cursor . execute (
"""
CREATE TABLE IF NOT EXISTS migrations (
id INTEGER NOT NULL ,
timestamp TIMESTAMP NOT NULL
)
"""
)
cursor . execute (
" INSERT INTO migrations (id, timestamp) VALUES (1, ?) " ,
( datetime . now ( ) , )
)
2023-04-19 06:11:28 +00:00
database . commit ( )
return database
2023-04-17 20:28:29 +00:00
2023-04-23 13:26:46 +00:00
async def get_device_id ( access_token , homeserver ) :
client = AsyncClient ( homeserver )
client . access_token = access_token
logging ( f " Obtaining device ID for access token { access_token } ... " , " debug " )
response = await client . whoami ( )
if isinstance ( response , WhoamiResponse ) :
logging (
f " Authenticated as { response . user_id } . " )
user_id = response . user_id
devices = await client . devices ( )
device_id = devices . devices [ 0 ] . id
await client . close ( )
return device_id , user_id
else :
logging ( f " Failed to obtain device ID: { response } " , " error " )
await client . close ( )
return None , None
2023-04-16 14:08:57 +00:00
if __name__ == " __main__ " :
2023-04-17 20:28:29 +00:00
# Parse command line arguments
parser = ArgumentParser ( )
parser . add_argument (
" --config " , help = " Path to config file (default: config.ini in working directory) " , default = " config.ini " )
args = parser . parse_args ( )
# Read config file
config = ConfigParser ( )
config . read ( args . config )
# Start bot loop
2023-04-16 14:08:57 +00:00
try :
2023-04-23 13:26:46 +00:00
asyncio . run ( main ( config ) )
2023-04-17 20:28:29 +00:00
except KeyboardInterrupt :
logging ( " Received KeyboardInterrupt - exiting... " )
2023-04-19 06:11:28 +00:00
except SystemExit :
2023-04-17 20:28:29 +00:00
logging ( " Received SIGTERM - exiting... " )
2023-04-16 14:08:57 +00:00
finally :
2023-04-19 06:11:28 +00:00
if CONTEXT [ " database " ] :
CONTEXT [ " database " ] . close ( )