fix(truncation): correct message handling and token calc

Updated message truncation logic to correctly return a system message dictionary and adjust token calculations. Improved model encoding fallback strategy to utilize "gpt-4o" instead of "gpt-3.5-turbo" for greater compatibility. This addresses message mishandling and ensures more robust operation. Resolves the need for better error handling with encoding defaults.
2024-11-06 16:18:30 +01:00 · 2024-11-06 16:18:30 +01:00 · 5fef1ab59c
commit 5fef1ab59c
parent 571031002c
1 changed files with 16 additions and 4 deletions
--- a/src/gptbot/classes/ai/openai.py
+++ b/src/gptbot/classes/ai/openai.py
@ -402,7 +402,7 @@ class OpenAI(BaseAI):
        self.logger.log(f"Prepared messages: {chat_messages}", "debug")

        # Truncate messages to fit within the token limit
-        self._truncate(
+        chat_messages = self._truncate(
            messages=chat_messages,
            max_tokens=self.max_tokens - 1,
            system_message=system_message,
@ -441,7 +441,7 @@ class OpenAI(BaseAI):
            encoding = tiktoken.encoding_for_model(model)
        except Exception:
            # TODO: Handle this more gracefully
-            encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
+            encoding = tiktoken.encoding_for_model("gpt-4o")

        total_tokens = 0

@ -458,7 +458,6 @@ class OpenAI(BaseAI):

        total_tokens += system_message_tokens

-        total_tokens = len(system_message) + 1
        truncated_messages = []

        self.logger.log(f"Messages: {messages}", "debug")
@ -479,7 +478,20 @@ class OpenAI(BaseAI):
            total_tokens += tokens
            truncated_messages.append(message)

-        return [truncated_messages[0]] + list(reversed(truncated_messages[1:]))
+        system_message_dict = {
+            "role": "system",
+            "content": (
+                system_message
+                if isinstance(messages[0]["content"], str)
+                else [{"type": "text", "text": system_message}]
+            ),
+        }
+
+        return (
+            system_message_dict
+            + [truncated_messages[0]]
+            + list(reversed(truncated_messages[1:]))
+        )

    async def generate_chat_response(
        self,