feat: enable third-party model vision support

Introduced the `ForceVision` configuration option to allow usage of third-party models for image recognition within the OpenAI setup. This change broadens the flexibility and applicability of the bot's image processing capabilities by not restricting to predefined vision models only. Also, added missing properties to the `OpenAI` class to provide comprehensive control over the bot's behavior, including options for forcing vision and tools usage, along with emulating tool capabilities in models not officially supporting them. These enhancements make the bot more adaptable to various models and user needs, especially for self-hosted setups. Additionally, updated documentation and increment version to 0.3.12 to reflect these changes and improvements.
2024-05-17 11:37:10 +02:00 · 2024-05-17 11:37:10 +02:00 · 1cd7043a36
commit 1cd7043a36
parent 8e0cffe02a
4 changed files with 36 additions and 6 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,10 @@
 # Changelog

+### 0.3.12 (2024-05-17)
+
+- Added `ForceVision` to OpenAI configuration to allow third-party models to be used for image recognition
+- Added some missing properties to `OpenAI` class
+
 ### 0.3.11 (2024-05-17)

 - Refactoring of AI provider handling in preparation for multiple AI providers: Introduced a `BaseAI` class that all AI providers must inherit from
--- a/config.dist.ini
+++ b/config.dist.ini
@ -121,6 +121,15 @@ APIKey = sk-yoursecretkey
 #
 # EmulateTools = 0

+# Force vision in the chat completion model
+#
+# By default, the bot only supports image recognition in known vision models.
+# If you set this to 1, the bot will assume that the model you're using supports
+# vision, and will send images to the model as well. This may be required for
+# some self-hosted models.
+#
+# ForceVision = 0
+
 # Advanced settings for the OpenAI API
 #
 # These settings are not required for normal operation, but can be used to
--- a/pyproject.toml
+++ b/pyproject.toml
@ -7,7 +7,7 @@ allow-direct-references = true

 [project]
 name = "matrix-gptbot"
-version = "0.3.11"
+version = "0.3.12"

 authors = [
  { name="Kumi Mitterer", email="gptbot@kumi.email" },
--- a/src/gptbot/classes/ai/openai.py
+++ b/src/gptbot/classes/ai/openai.py
@ -89,13 +89,29 @@ class OpenAI(BaseAI):
    def presence_penalty(self):
        return self._config.getfloat("PresencePenalty", fallback=0.0)

+    @property
+    def force_vision(self):
+        return self._config.getboolean("ForceVision", fallback=False)
+
+    @property
+    def force_tools(self):
+        return self._config.getboolean("ForceTools", fallback=False)
+
+    @property
+    def emulate_tools(self):
+        return self._config.getboolean("EmulateTools", fallback=False)
+
    @property
    def max_tokens(self):
        # TODO: This should be model-specific
        return self._config.getint("MaxTokens", fallback=4000)

    def supports_chat_images(self):
-        return "vision" in self.chat_model or self.chat_model in ("gpt-4o",)
+        return (
+            "vision" in self.chat_model
+            or self.chat_model in ("gpt-4o",)
+            or self.force_vision
+        )

    def json_decode(self, data):
        if data.startswith("```json\n"):
@ -180,7 +196,7 @@ class OpenAI(BaseAI):

        # TODO: I believe more models support tools now, so this could be adapted
        if allow_override and "gpt-3.5-turbo" not in original_model:
-            if self.bot.config.getboolean("OpenAI", "ForceTools", fallback=False):
+            if self.force_tools:
                self.logger.log("Overriding chat model to use tools")
                chat_model = "gpt-3.5-turbo"

@ -207,10 +223,10 @@ class OpenAI(BaseAI):

        if (
            use_tools
-            and self.bot.config.getboolean("OpenAI", "EmulateTools", fallback=False)
-            and not self.bot.config.getboolean("OpenAI", "ForceTools", fallback=False)
+            and self.emulate_tools
+            and not self.force_tools
            and "gpt-3.5-turbo" not in chat_model
-        ):
+        ):  # TODO: This should be adapted to use tools with more models
            self.bot.logger.log("Using tool emulation mode.", "debug")

            messages = (