feat: enhance tool and image handling
Introduced changes to the tool request behavior and image processing. Now, the configuration allows a dedicated model for tool requests (`ToolModel`) and enforces automatic resizing of context images to maximal dimensions, improving compatibility and performance with the AI model. The update shifts away from a rigid tool model use, accommodating varied model support for tool requests, and optimizes image handling for network and processing efficiency. These adjustments aim to enhance user experience with more flexible tool usage and efficient image handling in chat interactions.
This commit is contained in:
parent
89f06268a5
commit
3f084ffdd3
5 changed files with 108 additions and 48 deletions
|
@ -1,5 +1,11 @@
|
|||
# Changelog
|
||||
|
||||
### 0.3.13 (2024-05-20)
|
||||
|
||||
- **Breaking Change**: The `ForceTools` configuration option behavior has changed. Instead of using a separate model for tools, the bot will now try to use the default chat model for tool requests, even if that model is not known to support tools.
|
||||
- Added `ToolModel` to OpenAI configuration to allow specifying a separate model for tool requests
|
||||
- Automatically resize context images to a default maximum of 2000x768 pixels before sending them to the AI model
|
||||
|
||||
### 0.3.12 (2024-05-17)
|
||||
|
||||
- Added `ForceVision` to OpenAI configuration to allow third-party models to be used for image recognition
|
||||
|
|
|
@ -107,11 +107,20 @@ APIKey = sk-yoursecretkey
|
|||
|
||||
# Whether to force the use of tools in the chat completion model
|
||||
#
|
||||
# Currently, only gpt-3.5-turbo supports tools. If you set this to 1, the bot
|
||||
# will use that model for tools even if you have a different model set as the
|
||||
# default. It will only generate the final result using the default model.
|
||||
# This will make the bot allow the use of tools in the chat completion model,
|
||||
# even if the model you are using isn't known to support tools. This is useful
|
||||
# if you are using a self-hosted model that supports tools, but the bot doesn't
|
||||
# know about it.
|
||||
#
|
||||
# ForceTools = 0
|
||||
# ForceTools = 1
|
||||
|
||||
# Whether a dedicated model should be used for tools
|
||||
#
|
||||
# This will make the bot use a dedicated model for tools. This is useful if you
|
||||
# want to use a model that doesn't support tools, but still want to be able to
|
||||
# use tools.
|
||||
#
|
||||
# ToolModel = gpt-4o
|
||||
|
||||
# Whether to emulate tools in the chat completion model
|
||||
#
|
||||
|
@ -130,6 +139,16 @@ APIKey = sk-yoursecretkey
|
|||
#
|
||||
# ForceVision = 0
|
||||
|
||||
# Maximum width and height of images sent to the API if vision is enabled
|
||||
#
|
||||
# The OpenAI API has a limit of 2000 pixels for the long side of an image, and
|
||||
# 768 pixels for the short side. You may have to adjust these values if you're
|
||||
# using a self-hosted model that has different limits. You can also set these
|
||||
# to 0 to disable image resizing.
|
||||
#
|
||||
# MaxImageLongSide = 2000
|
||||
# MaxImageShortSide = 768
|
||||
|
||||
# Advanced settings for the OpenAI API
|
||||
#
|
||||
# These settings are not required for normal operation, but can be used to
|
||||
|
|
|
@ -7,7 +7,7 @@ allow-direct-references = true
|
|||
|
||||
[project]
|
||||
name = "matrix-gptbot"
|
||||
version = "0.3.12"
|
||||
version = "0.3.13"
|
||||
|
||||
authors = [
|
||||
{ name = "Kumi Mitterer", email = "gptbot@kumi.email" },
|
||||
|
|
|
@ -97,6 +97,14 @@ class OpenAI(BaseAI):
|
|||
def force_tools(self):
|
||||
return self._config.getboolean("ForceTools", fallback=False)
|
||||
|
||||
@property
|
||||
def tool_model(self):
|
||||
return self._config.get("ToolModel")
|
||||
|
||||
@property
|
||||
def vision_model(self):
|
||||
return self._config.get("VisionModel")
|
||||
|
||||
@property
|
||||
def emulate_tools(self):
|
||||
return self._config.getboolean("EmulateTools", fallback=False)
|
||||
|
@ -106,12 +114,26 @@ class OpenAI(BaseAI):
|
|||
# TODO: This should be model-specific
|
||||
return self._config.getint("MaxTokens", fallback=4000)
|
||||
|
||||
@property
|
||||
def max_messages(self):
|
||||
return self._config.getint("MaxMessages", fallback=30)
|
||||
|
||||
@property
|
||||
def max_image_long_side(self):
|
||||
return self._config.getint("MaxImageLongSide", fallback=2000)
|
||||
|
||||
@property
|
||||
def max_image_short_side(self):
|
||||
return self._config.getint("MaxImageShortSide", fallback=768)
|
||||
|
||||
def _is_tool_model(self, model: str) -> bool:
|
||||
return model in ("gpt-3.5-turbo", "gpt-4-turbo", "gpt-4o")
|
||||
|
||||
def _is_vision_model(self, model: str) -> bool:
|
||||
return model in ("gpt-4-turbo", "gpt-4o") or "vision" in model
|
||||
|
||||
def supports_chat_images(self):
|
||||
return (
|
||||
"vision" in self.chat_model
|
||||
or self.chat_model in ("gpt-4o",)
|
||||
or self.force_vision
|
||||
)
|
||||
return self._is_vision_model(self.chat_model) or self.force_vision
|
||||
|
||||
def json_decode(self, data):
|
||||
if data.startswith("```json\n"):
|
||||
|
@ -194,11 +216,15 @@ class OpenAI(BaseAI):
|
|||
|
||||
original_messages = messages
|
||||
|
||||
# TODO: I believe more models support tools now, so this could be adapted
|
||||
if allow_override and "gpt-3.5-turbo" not in original_model:
|
||||
if self.force_tools:
|
||||
if (
|
||||
allow_override
|
||||
and use_tools
|
||||
and self.tool_model
|
||||
and not (self._is_tool_model(chat_model) or self.force_tools)
|
||||
):
|
||||
if self.tool_model:
|
||||
self.logger.log("Overriding chat model to use tools")
|
||||
chat_model = "gpt-3.5-turbo"
|
||||
chat_model = self.tool_model
|
||||
|
||||
out_messages = []
|
||||
|
||||
|
@ -225,8 +251,8 @@ class OpenAI(BaseAI):
|
|||
use_tools
|
||||
and self.emulate_tools
|
||||
and not self.force_tools
|
||||
and "gpt-3.5-turbo" not in chat_model
|
||||
): # TODO: This should be adapted to use tools with more models
|
||||
and not self._is_tool_model(chat_model)
|
||||
):
|
||||
self.bot.logger.log("Using tool emulation mode.", "debug")
|
||||
|
||||
messages = (
|
||||
|
@ -272,9 +298,10 @@ class OpenAI(BaseAI):
|
|||
"presence_penalty": self.presence_penalty,
|
||||
}
|
||||
|
||||
if "gpt-3.5-turbo" in chat_model and use_tools:
|
||||
if (self._is_tool_model(chat_model) and use_tools) or self.force_tools:
|
||||
kwargs["tools"] = tools
|
||||
|
||||
# TODO: Look into this
|
||||
if "gpt-4" in chat_model:
|
||||
kwargs["max_tokens"] = self.max_tokens
|
||||
|
||||
|
@ -685,10 +712,10 @@ Only the event_types mentioned above are allowed, you must not respond in any ot
|
|||
|
||||
messages = [{"role": "system", "content": system_message}] + messages[1:]
|
||||
|
||||
if "vision" not in (chat_model := self.chat_model) and chat_model not in (
|
||||
"gpt-4o",
|
||||
):
|
||||
chat_model = "gpt-4o"
|
||||
chat_model = self.chat_model
|
||||
|
||||
if not self._is_vision_model(chat_model):
|
||||
chat_model = self.vision_model or "gpt-4o"
|
||||
|
||||
chat_partial = partial(
|
||||
self.openai_api.chat.completions.create,
|
||||
|
|
|
@ -29,11 +29,13 @@ from nio import (
|
|||
RoomMessageAudio,
|
||||
DownloadError,
|
||||
RoomGetStateError,
|
||||
DiskDownloadResponse,
|
||||
MemoryDownloadResponse,
|
||||
)
|
||||
from nio.store import SqliteStore
|
||||
|
||||
|
||||
from typing import Optional, List, Any
|
||||
from typing import Optional, List, Any, Union
|
||||
from configparser import ConfigParser
|
||||
from datetime import datetime
|
||||
from io import BytesIO
|
||||
|
@ -126,26 +128,6 @@ class GPTBot:
|
|||
"""
|
||||
return self.config["GPTBot"].getboolean("ForceSystemMessage", False)
|
||||
|
||||
@property
|
||||
def max_tokens(self) -> int:
|
||||
"""Maximum number of input tokens.
|
||||
|
||||
Returns:
|
||||
int: The maximum number of input tokens. Defaults to 3000.
|
||||
"""
|
||||
return self.config["OpenAI"].getint("MaxTokens", 3000)
|
||||
# TODO: Move this to OpenAI class
|
||||
|
||||
@property
|
||||
def max_messages(self) -> int:
|
||||
"""Maximum number of messages to consider as input.
|
||||
|
||||
Returns:
|
||||
int: The maximum number of messages to consider as input. Defaults to 30.
|
||||
"""
|
||||
return self.config["OpenAI"].getint("MaxMessages", 30)
|
||||
# TODO: Move this to OpenAI class
|
||||
|
||||
@property
|
||||
def operator(self) -> Optional[str]:
|
||||
"""Operator of the bot.
|
||||
|
@ -309,7 +291,7 @@ class GPTBot:
|
|||
ignore_notices: bool = True,
|
||||
):
|
||||
messages = []
|
||||
n = n or self.max_messages
|
||||
n = n or self.chat_api.max_messages
|
||||
room_id = room.room_id if isinstance(room, MatrixRoom) else room
|
||||
|
||||
self.logger.log(
|
||||
|
@ -378,7 +360,7 @@ class GPTBot:
|
|||
model: Optional[str] = None,
|
||||
system_message: Optional[str] = None,
|
||||
):
|
||||
max_tokens = max_tokens or self.max_tokens
|
||||
max_tokens = max_tokens or self.chat_api.max_tokens
|
||||
model = model or self.chat_api.chat_model
|
||||
system_message = (
|
||||
self.default_system_message if system_message is None else system_message
|
||||
|
@ -1168,7 +1150,9 @@ class GPTBot:
|
|||
return
|
||||
|
||||
try:
|
||||
last_messages = await self._last_n_messages(room.room_id, self.max_messages)
|
||||
last_messages = await self._last_n_messages(
|
||||
room.room_id, self.chat_api.max_messages
|
||||
)
|
||||
except Exception as e:
|
||||
self.logger.log(f"Error getting last messages: {e}", "error")
|
||||
await self.send_message(
|
||||
|
@ -1271,7 +1255,27 @@ class GPTBot:
|
|||
download = await self.download_file(image_url)
|
||||
|
||||
if download:
|
||||
encoded_url = f"data:{download.content_type};base64,{base64.b64encode(download.body).decode('utf-8')}"
|
||||
pil_image = Image.open(BytesIO(download.body))
|
||||
|
||||
file_format = pil_image.format or "PNG"
|
||||
|
||||
max_long_side = self.chat_api.max_image_long_side
|
||||
max_short_side = self.chat_api.max_image_short_side
|
||||
|
||||
if max_long_side and max_short_side:
|
||||
if pil_image.width > pil_image.height:
|
||||
if pil_image.width > max_long_side:
|
||||
pil_image.thumbnail((max_long_side, max_short_side))
|
||||
|
||||
else:
|
||||
if pil_image.height > max_long_side:
|
||||
pil_image.thumbnail((max_short_side, max_long_side))
|
||||
|
||||
bio = BytesIO()
|
||||
|
||||
pil_image.save(bio, format=file_format)
|
||||
|
||||
encoded_url = f"data:{download.content_type};base64,{base64.b64encode(bio.getvalue()).decode('utf-8')}"
|
||||
parent = (
|
||||
chat_messages[-1]
|
||||
if chat_messages
|
||||
|
@ -1312,7 +1316,9 @@ class GPTBot:
|
|||
|
||||
# Truncate messages to fit within the token limit
|
||||
self._truncate(
|
||||
chat_messages[1:], self.max_tokens - 1, system_message=system_message
|
||||
chat_messages[1:],
|
||||
self.chat_api.max_tokens - 1,
|
||||
system_message=system_message,
|
||||
)
|
||||
|
||||
# Check for a model override
|
||||
|
@ -1362,7 +1368,9 @@ class GPTBot:
|
|||
|
||||
await self.matrix_client.room_typing(room.room_id, False)
|
||||
|
||||
async def download_file(self, mxc) -> Optional[bytes]:
|
||||
async def download_file(
|
||||
self, mxc
|
||||
) -> Union[DiskDownloadResponse, MemoryDownloadResponse]:
|
||||
"""Download a file from the homeserver.
|
||||
|
||||
Args:
|
||||
|
|
Loading…
Reference in a new issue