diff --git a/README.md b/README.md
index 6f3122a..3ad94b6 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,10 @@
 # GPTbot
 
-GPTbot is a simple bot that uses different APIs to generate responses to 
+GPTbot is a simple bot that uses different APIs to generate responses to
 messages in a Matrix room.
 
 It is called GPTbot because it was originally intended to only use GPT-3 to
-generate responses. However, it supports other services/APIs, and I will 
+generate responses. However, it supports other services/APIs, and I will
 probably add more in the future, so the name is a bit misleading.
 
 ## Features
@@ -12,9 +12,12 @@ probably add more in the future, so the name is a bit misleading.
 - AI-generated responses to messages in a Matrix room (chatbot)
   - Currently supports OpenAI (tested with `gpt-3.5-turbo` and `gpt-4`)
 - AI-generated pictures via the `!gptbot imagine` command
-  - Currently supports OpenAI (DALL-E)
+  - Currently supports OpenAI (DALL-E-2/DALL-E-3)
 - Mathematical calculations via the `!gptbot calculate` command
   - Currently supports WolframAlpha
+- Voice input and output
+  - Currently supports OpenAI (TTS and Whisper)
+  - Beta feature, see dedicated section for details
 - Automatic classification of messages (for `imagine`, `calculate`, etc.)
   - Beta feature, see Usage section for details
 - Really useful commands like `!gptbot help` and `!gptbot coin`
@@ -26,9 +29,9 @@ probably add more in the future, so the name is a bit misleading.
 
 ## Installation
 
-To run the bot, you will need Python 3.10 or newer. 
+To run the bot, you will need Python 3.10 or newer.
 
-The bot has been tested with Python 3.11 on Arch, but should work with any 
+The bot has been tested with Python 3.11 on Arch, but should work with any
 current version, and should not require any special dependencies or operating
 system features.
 
@@ -53,7 +56,7 @@ A release to PyPI is planned, but not yet available.
 
 ### Development
 
-Clone the repository and install the requirements to a virtual environment. 
+Clone the repository and install the requirements to a virtual environment.
 
 ```shell
 # Clone the repository
@@ -145,6 +148,14 @@ Also note that this feature conflicts with the `always_reply false` setting -
 or rather, it doesn't make sense then because you already have to explicitly
 specify the command to use.
 
+## Voice input and output
+
+The bot supports voice input and output, but it is disabled by default. To
+enable it, use the `!gptbot roomsettings` command to change the settings for
+the current room. `!gptbot roomsettings stt true` will enable voice input,
+and `!gptbot roomsettings tts true` will enable voice output. Note that this
+may be a little unreliable at the moment, especially voice input.
+
 ## Troubleshooting
 
 **Help, the bot is not responding!**
@@ -181,4 +192,5 @@ please check the logs and open an issue if you can't figure out what's going on.
 
 ## License
 
-This project is licensed under the terms of the MIT license. See the [LICENSE](LICENSE) file for details.
+This project is licensed under the terms of the MIT license. See the [LICENSE](LICENSE)
+file for details.
diff --git a/pyproject.toml b/pyproject.toml
index 405526d..bc2d818 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ allow-direct-references = true
 
 [project]
 name = "matrix-gptbot"
-version = "0.2.0"
+version = "0.2.1"
 
 authors = [
   { name="Kumi Mitterer", email="gptbot@kumi.email" },
@@ -39,6 +39,7 @@ dependencies = [
 [project.optional-dependencies]
 openai = [
     "openai>=1.2",
+    "pydub",
 ]
 
 wolframalpha = [
diff --git a/requirements.txt b/requirements.txt
index 28a5039..c19cce1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,5 +6,6 @@ duckdb
 python-magic
 pillow
 wolframalpha
+pydub
 
 git+https://kumig.it/kumitterer/trackingmore-api-tool.git
\ No newline at end of file
diff --git a/src/gptbot/classes/bot.py b/src/gptbot/classes/bot.py
index 2c318df..7682c60 100644
--- a/src/gptbot/classes/bot.py
+++ b/src/gptbot/classes/bot.py
@@ -83,6 +83,8 @@ class GPTBot:
     chat_api: Optional[OpenAI] = None
     image_api: Optional[OpenAI] = None
     classification_api: Optional[OpenAI] = None
+    tts_api: Optional[OpenAI] = None
+    stt_api: Optional[OpenAI] = None
     parcel_api: Optional[TrackingMore] = None
     operator: Optional[str] = None
     room_ignore_list: List[str] = []  # List of rooms to ignore invites from
@@ -149,9 +151,14 @@ class GPTBot:
             if "AllowedUsers" in config["GPTBot"]:
                 bot.allowed_users = json.loads(config["GPTBot"]["AllowedUsers"])
 
-        bot.chat_api = bot.image_api = bot.classification_api = OpenAI(
-            bot, config["OpenAI"]["APIKey"], config["OpenAI"].get("Model"),
-            config["OpenAI"].get("ImageModel"), config["OpenAI"].get("BaseURL"),  bot.logger
+        bot.chat_api = bot.image_api = bot.classification_api = bot.tts_api = bot.stt_api = OpenAI(
+            bot=bot,
+            api_key=config["OpenAI"]["APIKey"], 
+            chat_model=config["OpenAI"].get("Model"),
+            image_model=config["OpenAI"].get("ImageModel"),
+            tts_model=config["OpenAI"].get("TTSModel"),
+            stt_model=config["OpenAI"].get("STTModel"),
+            base_url=config["OpenAI"].get("BaseURL")
         )
         bot.max_tokens = config["OpenAI"].getint("MaxTokens", bot.max_tokens)
         bot.max_messages = config["OpenAI"].getint("MaxMessages", bot.max_messages)
@@ -207,7 +214,7 @@ class GPTBot:
 
         return user_id
 
-    async def _last_n_messages(self, room: str | MatrixRoom, n: Optional[int], ignore_bot_commands: bool = True):
+    async def _last_n_messages(self, room: str | MatrixRoom, n: Optional[int], ignore_bot_commands: bool = False):
         messages = []
         n = n or self.max_messages
         room_id = room.room_id if isinstance(room, MatrixRoom) else room
@@ -264,8 +271,7 @@ class GPTBot:
                     messages.append(event)
 
             if isinstance(event, RoomMessageMedia):
-                if event.sender != self.matrix_client.user_id:
-                    messages.append(event)
+                messages.append(event)
 
         self.logger.log(f"Found {len(messages)} messages (limit: {n})", "debug")
 
@@ -574,6 +580,39 @@ class GPTBot:
 
         self.logger.log("Sent image", "debug")
 
+    async def send_file(
+        self, room: MatrixRoom, file: bytes, filename: str, mime: str, msgtype: str
+    ):
+        """Send a file to a room.
+
+        Args:
+            room (MatrixRoom): The room to send the file to.
+            file (bytes): The file to send.
+            filename (str): The name of the file.
+            mime (str): The MIME type of the file.
+        """
+
+        self.logger.log(
+            f"Sending file of size {len(file)} bytes to room {room.room_id}", "debug"
+        )
+
+        content_uri = await self.upload_file(file, filename, mime)
+
+        self.logger.log("Uploaded file - sending message...", "debug")
+
+        content = {
+            "body": filename,
+            "info": {"mimetype": mime, "size": len(file)},
+            "msgtype": msgtype,
+            "url": content_uri,
+        }
+
+        status = await self.matrix_client.room_send(
+            room.room_id, "m.room.message", content
+        )
+
+        self.logger.log("Sent file", "debug")
+
     async def send_message(
         self, room: MatrixRoom | str, message: str, notice: bool = False
     ):
@@ -861,6 +900,46 @@ class GPTBot:
                 space,
             )
 
+    def room_uses_stt(self, room: MatrixRoom | str) -> bool:
+        """Check if a room uses STT.
+
+        Args:
+            room (MatrixRoom | str): The room to check.
+
+        Returns:
+            bool: Whether the room uses STT.
+        """
+        room_id = room.room_id if isinstance(room, MatrixRoom) else room
+
+        with closing(self.database.cursor()) as cursor:
+            cursor.execute(
+                "SELECT value FROM room_settings WHERE room_id = ? AND setting = ?",
+                (room_id, "stt"),
+            )
+            result = cursor.fetchone()
+
+        return False if not result else bool(int(result[0]))
+
+    def room_uses_tts(self, room: MatrixRoom | str) -> bool:
+        """Check if a room uses TTS.
+
+        Args:
+            room (MatrixRoom | str): The room to check.
+
+        Returns:
+            bool: Whether the room uses TTS.
+        """
+        room_id = room.room_id if isinstance(room, MatrixRoom) else room
+
+        with closing(self.database.cursor()) as cursor:
+            cursor.execute(
+                "SELECT value FROM room_settings WHERE room_id = ? AND setting = ?",
+                (room_id, "tts"),
+            )
+            result = cursor.fetchone()
+
+        return False if not result else bool(int(result[0]))
+
     def respond_to_room_messages(self, room: MatrixRoom | str) -> bool:
         """Check whether the bot should respond to all messages sent in a room.
 
@@ -955,7 +1034,25 @@ class GPTBot:
                     message_body = message.body if not self.chat_api.supports_chat_images() else [{"type": "text", "text": message.body}]
                     chat_messages.append({"role": role, "content": message_body})
 
-            if self.chat_api.supports_chat_images() and isinstance(message, RoomMessageMedia):
+            if isinstance(message, RoomMessageAudio):
+                role = (
+                    "assistant" if message.sender == self.matrix_client.user_id else "user"
+                )
+                if message == event or (not message.event_id == event.event_id):
+                    if self.room_uses_stt(room):
+                        try:
+                            download = await self.download_file(message.url)
+                            message_text = await self.stt_api.speech_to_text(download.body)
+                        except Exception as e:
+                            self.logger.log(f"Error generating text from audio: {e}", "error")
+                            message_text = message.body
+                    else:
+                        message_text = message.body
+
+                    message_body = message_text if not self.chat_api.supports_chat_images() else [{"type": "text", "text": message_text}]
+                    chat_messages.append({"role": role, "content": message_body})
+
+            if self.chat_api.supports_chat_images() and isinstance(message, RoomMessageImage):
                 image_url = message.url
                 download = await self.download_file(image_url)
 
@@ -1001,6 +1098,20 @@ class GPTBot:
 
             self.logger.log(f"Sending response to room {room.room_id}...")
 
+            if self.room_uses_tts(room):
+                self.logger.log("TTS enabled for room", "debug")
+
+                try:
+                    audio = await self.tts_api.text_to_speech(response)
+                    await self.send_file(room, audio, response, "audio/mpeg", "m.audio")
+                    return
+
+                except Exception as e:
+                    self.logger.log(f"Error generating audio: {e}", "error")
+                    await self.send_message(
+                        room, "Something went wrong generating audio file.", True
+                    )
+
             message = await self.send_message(room, response)
 
         else:
diff --git a/src/gptbot/classes/openai.py b/src/gptbot/classes/openai.py
index 68e2c7d..f788344 100644
--- a/src/gptbot/classes/openai.py
+++ b/src/gptbot/classes/openai.py
@@ -3,13 +3,16 @@ import requests
 
 import asyncio
 import json
+
 from functools import partial
 from contextlib import closing
+from typing import Dict, List, Tuple, Generator, AsyncGenerator, Optional, Any
+from io import BytesIO
+
+from pydub import AudioSegment
 
 from .logging import Logger
 
-from typing import Dict, List, Tuple, Generator, AsyncGenerator, Optional, Any
-
 ASSISTANT_CODE_INTERPRETER = [
     {
         "type": "code_interpreter",
@@ -30,17 +33,23 @@ class OpenAI:
 
     classification_api = chat_api
     image_model: str = "dall-e-2"
+    tts_model: str = "tts-1-hd"
+    tts_voice: str = "alloy"
+    stt_model: str = "whisper-1"
 
     operator: str = "OpenAI ([https://openai.com](https://openai.com))"
 
-    def __init__(self, bot, api_key, chat_model=None, image_model=None, base_url=None, logger=None):
+    def __init__(self, bot, api_key, chat_model=None, image_model=None, tts_model=None, tts_voice=None, stt_model=None, base_url=None, logger=None):
         self.bot = bot
         self.api_key = api_key
         self.chat_model = chat_model or self.chat_model
         self.image_model = image_model or self.image_model
-        self.logger = logger or Logger()
+        self.logger = logger or bot.logger or Logger()
         self.base_url = base_url or openai.base_url
         self.openai_api = openai.AsyncOpenAI(api_key=self.api_key, base_url=self.base_url)
+        self.tts_model = tts_model or self.tts_model
+        self.tts_voice = tts_voice or self.tts_voice
+        self.stt_model = stt_model or self.stt_model
 
     def supports_chat_images(self):
         return "vision" in self.chat_model
@@ -266,6 +275,47 @@ Only the event_types mentioned above are allowed, you must not respond in any ot
 
         return result, tokens_used
 
+    async def text_to_speech(self, text: str, user: Optional[str] = None) -> Generator[bytes, None, None]:
+        """Generate speech from text.
+
+        Args:
+            text (str): The text to use.
+
+        Yields:
+            bytes: The audio data.
+        """
+        self.logger.log(f"Generating speech from text '{text}'...")
+
+        speech = await self.openai_api.audio.speech.create(
+            model=self.tts_model,
+            input=text,
+            voice=self.tts_voice
+        )
+
+        return speech.content
+
+    async def speech_to_text(self, audio: bytes, user: Optional[str] = None) -> Tuple[str, int]:
+        """Generate text from speech.
+
+        Args:
+            audio (bytes): The audio data.
+
+        Returns:
+            Tuple[str, int]: The text and the number of tokens used.
+        """
+        self.logger.log(f"Generating text from speech...")
+
+        response = await self.openai_api.audio.transcriptions.create(
+            model=self.stt_model,
+            file=BytesIO(audio),
+        )
+
+        text = response.text
+
+        self.logger.log(f"Generated text with {tokens_used} tokens.")
+
+        return text
+
     async def generate_image(self, prompt: str, user: Optional[str] = None) -> Generator[bytes, None, None]:
         """Generate an image from a prompt.
 
diff --git a/src/gptbot/commands/__init__.py b/src/gptbot/commands/__init__.py
index 17aa5cb..ca65ec5 100644
--- a/src/gptbot/commands/__init__.py
+++ b/src/gptbot/commands/__init__.py
@@ -22,6 +22,7 @@ for command in [
     "dice",
     "parcel",
     "space",
+    "tts",
 ]:
     function = getattr(import_module(
         "." + command, "gptbot.commands"), "command_" + command)
diff --git a/src/gptbot/commands/help.py b/src/gptbot/commands/help.py
index db1a317..784d6e0 100644
--- a/src/gptbot/commands/help.py
+++ b/src/gptbot/commands/help.py
@@ -19,6 +19,7 @@ async def command_help(room: MatrixRoom, event: RoomMessageText, bot):
 - !gptbot chat \<message\> - Send a message to the chat API
 - !gptbot classify \<message\> - Classify a message using the classification API
 - !gptbot custom \<message\> - Used for custom commands handled by the chat model and defined through the room's system message
+- !gptbot roomsettings [use_classification|use_timing|always_reply|system_message|tts] [true|false|\<message\>] - Get or set room settings
 - !gptbot ignoreolder - Ignore messages before this point as context
 """
 
diff --git a/src/gptbot/commands/roomsettings.py b/src/gptbot/commands/roomsettings.py
index 6d96441..7ea04e3 100644
--- a/src/gptbot/commands/roomsettings.py
+++ b/src/gptbot/commands/roomsettings.py
@@ -25,6 +25,8 @@ async def command_roomsettings(room: MatrixRoom, event: RoomMessageText, bot):
                     (room.room_id, "system_message", value, value)
                 )
 
+            bot.database.commit()
+
             await bot.send_message(room, f"Alright, I've stored the system message: '{value}'.", True)
             return
 
@@ -35,7 +37,7 @@ async def command_roomsettings(room: MatrixRoom, event: RoomMessageText, bot):
         await bot.send_message(room, f"The current system message is: '{system_message}'.", True)
         return
 
-    if setting in ("use_classification", "always_reply", "use_timing"):
+    if setting in ("use_classification", "always_reply", "use_timing", "tts", "stt"):
         if value:
             if value.lower() in ["true", "false"]:
                 value = value.lower() == "true"
@@ -49,6 +51,8 @@ async def command_roomsettings(room: MatrixRoom, event: RoomMessageText, bot):
                         (room.room_id, setting, "1" if value else "0", "1" if value else "0")
                     )
 
+                bot.database.commit()
+
                 await bot.send_message(room, f"Alright, I've set {setting} to: '{value}'.", True)
                 return
 
@@ -81,6 +85,9 @@ async def command_roomsettings(room: MatrixRoom, event: RoomMessageText, bot):
 - system_message [message]: Get or set the system message to be sent to the chat model
 - classification [true/false]: Get or set whether the room uses classification
 - always_reply [true/false]: Get or set whether the bot should reply to all messages (if false, only reply to mentions and commands)
+- tts [true/false]: Get or set whether the bot should generate audio files instead of sending text
+- stt [true/false]: Get or set whether the bot should attempt to process information from audio files
+- timing [true/false]: Get or set whether the bot should return information about the time it took to generate a response
 """
 
     await bot.send_message(room, message, True)
diff --git a/src/gptbot/commands/tts.py b/src/gptbot/commands/tts.py
new file mode 100644
index 0000000..d048a54
--- /dev/null
+++ b/src/gptbot/commands/tts.py
@@ -0,0 +1,23 @@
+from nio.events.room_events import RoomMessageText
+from nio.rooms import MatrixRoom
+
+
+async def command_tts(room: MatrixRoom, event: RoomMessageText, bot):
+    prompt = " ".join(event.body.split()[2:])
+
+    if prompt:
+        bot.logger.log("Generating speech...")
+
+        try:
+            content = await bot.tts_api.text_to_speech(prompt, user=room.room_id)
+        except Exception as e:
+            bot.logger.log(f"Error generating speech: {e}", "error")
+            await bot.send_message(room, "Sorry, I couldn't generate an audio file. Please try again later.", True)
+            return
+
+        bot.logger.log(f"Sending audio file...")
+        await bot.send_file(room, content, "audio.mp3", "audio/mpeg", "m.audio")
+
+        return
+
+    await bot.send_message(room, "You need to provide a prompt.", True)