From 1cd7043a36b131eb7ed50b5d7818f23cdff6bff1 Mon Sep 17 00:00:00 2001
From: Kumi <git@kumi.email>
Date: Fri, 17 May 2024 11:37:10 +0200
Subject: [PATCH] feat: enable third-party model vision support

Introduced the `ForceVision` configuration option to allow usage of third-party models for image recognition within the OpenAI setup. This change broadens the flexibility and applicability of the bot's image processing capabilities by not restricting to predefined vision models only. Also, added missing properties to the `OpenAI` class to provide comprehensive control over the bot's behavior, including options for forcing vision and tools usage, along with emulating tool capabilities in models not officially supporting them. These enhancements make the bot more adaptable to various models and user needs, especially for self-hosted setups.

Additionally, updated documentation and increment version to 0.3.12 to reflect these changes and improvements.
---
 CHANGELOG.md                    |  5 +++++
 config.dist.ini                 |  9 +++++++++
 pyproject.toml                  |  2 +-
 src/gptbot/classes/ai/openai.py | 26 +++++++++++++++++++++-----
 4 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d1d8183..ab034d4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # Changelog
 
+### 0.3.12 (2024-05-17)
+
+- Added `ForceVision` to OpenAI configuration to allow third-party models to be used for image recognition
+- Added some missing properties to `OpenAI` class
+
 ### 0.3.11 (2024-05-17)
 
 - Refactoring of AI provider handling in preparation for multiple AI providers: Introduced a `BaseAI` class that all AI providers must inherit from
diff --git a/config.dist.ini b/config.dist.ini
index dc18262..aad7f62 100644
--- a/config.dist.ini
+++ b/config.dist.ini
@@ -121,6 +121,15 @@ APIKey = sk-yoursecretkey
 #
 # EmulateTools = 0
 
+# Force vision in the chat completion model
+#
+# By default, the bot only supports image recognition in known vision models.
+# If you set this to 1, the bot will assume that the model you're using supports
+# vision, and will send images to the model as well. This may be required for
+# some self-hosted models.
+#
+# ForceVision = 0
+
 # Advanced settings for the OpenAI API
 #
 # These settings are not required for normal operation, but can be used to
diff --git a/pyproject.toml b/pyproject.toml
index 7333f3c..c28e26b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ allow-direct-references = true
 
 [project]
 name = "matrix-gptbot"
-version = "0.3.11"
+version = "0.3.12"
 
 authors = [
   { name="Kumi Mitterer", email="gptbot@kumi.email" },
diff --git a/src/gptbot/classes/ai/openai.py b/src/gptbot/classes/ai/openai.py
index 1401eb8..47b3427 100644
--- a/src/gptbot/classes/ai/openai.py
+++ b/src/gptbot/classes/ai/openai.py
@@ -89,13 +89,29 @@ class OpenAI(BaseAI):
     def presence_penalty(self):
         return self._config.getfloat("PresencePenalty", fallback=0.0)
 
+    @property
+    def force_vision(self):
+        return self._config.getboolean("ForceVision", fallback=False)
+
+    @property
+    def force_tools(self):
+        return self._config.getboolean("ForceTools", fallback=False)
+
+    @property
+    def emulate_tools(self):
+        return self._config.getboolean("EmulateTools", fallback=False)
+
     @property
     def max_tokens(self):
         # TODO: This should be model-specific
         return self._config.getint("MaxTokens", fallback=4000)
 
     def supports_chat_images(self):
-        return "vision" in self.chat_model or self.chat_model in ("gpt-4o",)
+        return (
+            "vision" in self.chat_model
+            or self.chat_model in ("gpt-4o",)
+            or self.force_vision
+        )
 
     def json_decode(self, data):
         if data.startswith("```json\n"):
@@ -180,7 +196,7 @@ class OpenAI(BaseAI):
 
         # TODO: I believe more models support tools now, so this could be adapted
         if allow_override and "gpt-3.5-turbo" not in original_model:
-            if self.bot.config.getboolean("OpenAI", "ForceTools", fallback=False):
+            if self.force_tools:
                 self.logger.log("Overriding chat model to use tools")
                 chat_model = "gpt-3.5-turbo"
 
@@ -207,10 +223,10 @@ class OpenAI(BaseAI):
 
         if (
             use_tools
-            and self.bot.config.getboolean("OpenAI", "EmulateTools", fallback=False)
-            and not self.bot.config.getboolean("OpenAI", "ForceTools", fallback=False)
+            and self.emulate_tools
+            and not self.force_tools
             and "gpt-3.5-turbo" not in chat_model
-        ):
+        ):  # TODO: This should be adapted to use tools with more models
             self.bot.logger.log("Using tool emulation mode.", "debug")
 
             messages = (