diff --git a/src/gptbot/classes/openai.py b/src/gptbot/classes/openai.py
index fa55858..5352230 100644
--- a/src/gptbot/classes/openai.py
+++ b/src/gptbot/classes/openai.py
@@ -4,6 +4,7 @@ import tiktoken
 
 import asyncio
 import json
+import base64
 
 from functools import partial
 from contextlib import closing
@@ -387,7 +388,7 @@ Only the event_types mentioned above are allowed, you must not respond in any ot
         Yields:
             bytes: The audio data.
         """
-        self.logger.log(f"Generating speech from text '{text}'...")
+        self.logger.log(f"Generating speech from text of length: {len(text.split())} words...")
 
         speech = await self.openai_api.audio.speech.create(
             model=self.tts_model,
@@ -475,3 +476,37 @@ Only the event_types mentioned above are allowed, you must not respond in any ot
             images.append(image)
 
         return images, len(images)
+
+    async def describe_images(self, messages: list, user: Optional[str] = None) -> Tuple[str, int]:
+        """Generate a description for an image.
+
+        Args:
+            image (bytes): The image data.
+
+        Returns:
+            Tuple[str, int]: The description and the number of tokens used.
+        """
+        self.logger.log(f"Generating description for images in conversation...")
+
+        system_message = "You are an image description generator. You generate descriptions for all images in the current conversation, one after another."
+
+        messages = [
+            {
+                "role": "system",
+                "content": system_message
+            }
+        ] + messages[1:]
+
+        if not "vision" in (chat_model := self.chat_model):
+            chat_model = self.chat_model + "gpt-4-vision-preview"
+
+        chat_partial = partial(
+            self.openai_api.chat.completions.create,
+                model=self.chat_model,
+                messages=messages,
+                user=user,
+        )
+
+        response = await self._request_with_retries(chat_partial)
+
+        return response.choices[0].message.content, response.usage.total_tokens
\ No newline at end of file
diff --git a/src/gptbot/tools/base.py b/src/gptbot/tools/base.py
index 69975b5..e85754d 100644
--- a/src/gptbot/tools/base.py
+++ b/src/gptbot/tools/base.py
@@ -4,9 +4,10 @@ class BaseTool:
 
     def __init__(self, **kwargs):
         self.kwargs = kwargs
-        self.bot = kwargs["bot"]
-        self.room = kwargs["room"]
-        self.user = kwargs["user"]
+        self.bot = kwargs.get("bot")
+        self.room = kwargs.get("room")
+        self.user = kwargs.get("user")
+        self.messages = kwargs.get("messages", [])
 
     async def run(self):
         raise NotImplementedError()
diff --git a/src/gptbot/tools/imagedescription.py b/src/gptbot/tools/imagedescription.py
index b82f07c..2c83d98 100644
--- a/src/gptbot/tools/imagedescription.py
+++ b/src/gptbot/tools/imagedescription.py
@@ -1,24 +1,15 @@
 from .base import BaseTool, Handover
 
 class Imagedescription(BaseTool):
-    DESCRIPTION = "Describe the content of an image."
+    DESCRIPTION = "Describe the content of the images in the conversation."
     PARAMETERS = {
         "type": "object",
         "properties": {
-            "image": {
-                "type": "string",
-                "description": "The image to describe.",
-            },
         },
-        "required": ["image"],
     }
 
     async def run(self):
-        """Describe an image.
-        
-        This tool only hands over to the original model, if applicable.
-        It is intended to handle the case where GPT-3 thinks it is asked to
-        *generate* an image, but the user actually wants to *describe* an
-        image...
-        """
-        raise Handover()
\ No newline at end of file
+        """Describe images in the conversation."""
+        image_api = self.bot.image_api
+
+        return (await image_api.describe_images(self.messages, self.user))[0]
\ No newline at end of file