commit 629718b79f9acf17796c4929c2d04fa5833ebf01
Author: Kumi <git@kumi.email>
Date:   Mon Aug 19 17:08:37 2024 +0200

    feat: add inclusive language Matrix bot
    
    Added an inclusive language bot for Matrix chat, which detects and suggests replacements for non-inclusive or unacceptable language based on configurable terms. Bot can send direct messages to users and notify room admins. Included configuration and default dictionary files.
    
    - Added `.gitignore` for virtual environments, build artifacts, and Python cache
    - Config YAML template and default inclusive dictionary terms
    - Required dependencies listed in `requirements-dev.txt`

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..133ab30
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+venv/
+dist/
+*.pyc
+__pycache__/
\ No newline at end of file
diff --git a/bot.py b/bot.py
new file mode 100644
index 0000000..f09fd3a
--- /dev/null
+++ b/bot.py
@@ -0,0 +1,133 @@
+import asyncio
+import pathlib
+import yaml
+from nio import (
+    AsyncClient,
+    MatrixRoom,
+    RoomMessageText,
+    RoomCreateResponse,
+    RoomGetStateResponse,
+)
+
+
+class InclusiveLanguageBot:
+    def __init__(self, config_path):
+        # Load configuration from YAML file
+        with open(config_path, "r") as config_file:
+            self.config = yaml.safe_load(config_file)
+
+        # Load dictionary from YAML file specified in the config
+        with open(
+            self.config.get(
+                "dictionary_path",
+                pathlib.Path(__file__).parent / "dictionary" / "default.yaml",
+            ),
+            "r",
+        ) as dictionary_file:
+            dictionary = yaml.safe_load(dictionary_file)
+            self.non_inclusive_terms = dictionary["non_inclusive_terms"]
+            self.unacceptable_terms = dictionary["unacceptable_terms"]
+
+        # Initialize the client with server URL and credentials from config
+        self.client = AsyncClient(self.config["server_url"], self.config["username"])
+
+    def check_inclusive_language(self, message):
+        suggestions = []
+        for term, details in self.non_inclusive_terms.items():
+            if term in message:
+                suggestions.append(
+                    {
+                        "term": term,
+                        "replacement": details["replacement"],
+                        "reason": details["reason"],
+                    }
+                )
+        return suggestions
+
+    def check_unacceptable_language(self, message):
+        for entry in self.unacceptable_terms:
+            term = entry["term"]
+            if term in message:
+                return entry
+        return None
+
+    async def get_dm_room(self, user_id):
+        # Check if a DM room already exists
+        for room_id, room in self.client.rooms.items():
+            if user_id in room.users and len(room.users) == 2:
+                return room_id
+
+        # Create a new DM room with the user
+        response = await self.client.room_create(invite=[user_id], is_direct=True)
+
+        if isinstance(response, RoomCreateResponse):
+            return response.room_id
+        else:
+            return None  # Failed to create or invite to a room
+
+    async def send_private_message(self, user_id, message):
+        room_id = await self.get_dm_room(user_id)
+        if room_id:
+            await self.client.room_send(
+                room_id,
+                message_type="m.room.message",
+                content={"msgtype": "m.text", "body": message},
+            )
+
+    async def get_room_admins(self, room_id):
+        response = await self.client.room_get_state(room_id)
+        if isinstance(response, RoomGetStateResponse):
+            admins = []
+            for event in response.events:
+                if event.type == "m.room.power_levels":
+                    power_levels = event.content
+                    for user, level in power_levels.get("users", {}).items():
+                        if level >= 100:  # TODO: Make this configurable or something
+                            admins.append(user)
+            return admins
+        return []
+
+    async def message_callback(self, room: MatrixRoom, event: RoomMessageText):
+        if event.sender == self.client.user:
+            return  # Ignore messages sent by the bot
+
+        # Check for unacceptable language
+        unacceptable_entry = self.check_unacceptable_language(event.body)
+        if unacceptable_entry:
+            admin_response = f"Attention: The term '{unacceptable_entry['term']}' used by {event.sender} in {room.room_id} might be unacceptable ({unacceptable_entry['reason']})."
+
+            # Get room admins and send them a message
+            admins = await self.get_room_admins(room.room_id)
+            for admin in admins:
+                await self.send_private_message(admin, admin_response)
+
+            response = f"Your message in {room.room_id} contains the term '{unacceptable_entry['term']}' which might be unacceptable ({unacceptable_entry['reason']}). Please consider revising it."
+
+            # Send private message to the user
+            await self.send_private_message(event.sender, response)
+
+        # Check for non-inclusive language
+        suggestions = self.check_inclusive_language(event.body)
+        if suggestions:
+            response = f"I've noticed some potentially non-inclusive language in your message in {room.room_id}:\n"
+            for suggestion in suggestions:
+                response += f"- '{suggestion['term']}' could potentially be replaced with '{suggestion['replacement']}' ({suggestion['reason']}).\n"
+
+            response += "Thank you for using inclusive language and making our community a better place! 🌈🦄"
+
+            # Send private message to the user
+            await self.send_private_message(event.sender, response)
+
+    async def run(self):
+        await self.client.login(self.config["password"])
+        self.client.add_event_callback(self.message_callback, RoomMessageText)
+        await self.client.sync_forever(timeout=30000)
+
+
+def main():
+    bot = InclusiveLanguageBot("config.yaml")
+    asyncio.get_event_loop().run_until_complete(bot.run())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/config.dist.yaml b/config.dist.yaml
new file mode 100644
index 0000000..e69de29
diff --git a/dictionary/default.yaml b/dictionary/default.yaml
new file mode 100644
index 0000000..69829b6
--- /dev/null
+++ b/dictionary/default.yaml
@@ -0,0 +1,144 @@
+non_inclusive_terms:
+  blacklist:
+    replacement: "blocklist"
+    reason: "Avoids racial connotations"
+  whitelist:
+    replacement: "allowlist"
+    reason: "Avoids racial connotations"
+  master:
+    replacement: "main"
+    reason: "Avoids references to slavery"
+  slave:
+    replacement: "replica"
+    reason: "Avoids references to slavery"
+  sanity_check:
+    replacement: "quick check"
+    reason: "Avoids ableist language"
+  dummy_value:
+    replacement: "placeholder value"
+    reason: "Avoids ableist language"
+  man_hours:
+    replacement: "person-hours"
+    reason: "Gender-neutral"
+  manpower:
+    replacement: "workforce"
+    reason: "Gender-neutral"
+  grandfathered:
+    replacement: "legacy status"
+    reason: "Avoids ageist language"
+  crazy:
+    replacement: "unreasonable"
+    reason: "Avoids ableist language"
+  insane:
+    replacement: "unreasonable"
+    reason: "Avoids ableist language"
+  crippled:
+    replacement: "restricted"
+    reason: "Avoids ableist language"
+  dumb:
+    replacement: "unwise"
+    reason: "Avoids ableist language"
+  lame:
+    replacement: "unimpressive"
+    reason: "Avoids ableist language"
+  powwow:
+    replacement: "meeting"
+    reason: "Avoids cultural appropriation"
+  spirit animal:
+    replacement: "inspiration"
+    reason: "Avoids cultural appropriation"
+  tribe:
+    replacement: "group"
+    reason: "Avoids cultural appropriation"
+  off the reservation:
+    replacement: "off track"
+    reason: "Avoids cultural appropriation"
+  Eskimo:
+    replacement: "Inuit" or "Yupik"
+    reason: "More accurate and respectful term"
+  gypped:
+    replacement: "cheated"
+    reason: "Avoids ethnic slur"
+  long time no see:
+    replacement: "it's been a while"
+    reason: "Avoids mocking non-native English speakers"
+  no can do:
+    replacement: "unable to do"
+    reason: "Avoids mocking non-native English speakers"
+  black sheep:
+    replacement: "outcast"
+    reason: "Avoids racial connotations"
+  cakewalk:
+    replacement: "easy task"
+    reason: "Avoids references to slavery"
+  peanut gallery:
+    replacement: "audience"
+    reason: "Avoids classist language"
+  sold down the river:
+    replacement: "betrayed"
+    reason: "Avoids references to slavery"
+  uppity:
+    replacement: "arrogant"
+    reason: "Avoids racial connotations"
+  grandfather clause:
+    replacement: "legacy clause"
+    reason: "Avoids ageist language"
+  gyp:
+    replacement: "cheat"
+    reason: "Avoids ethnic slur"
+  # Add more term-improvement pairs here
+
+unacceptable_terms:
+  - term: "nigga"
+    reason: "Racial slur"
+  - term: "nigger"
+    reason: "Racial slur"
+  - term: "fag"
+    reason: "Homophobic slur"
+  - term: "tranny"
+    reason: "Transphobic slur"
+  - term: "retard"
+    reason: "Ableist slur"
+  - term: "spic"
+    reason: "Racial slur"
+  - term: "chink"
+    reason: "Racial slur"
+  - term: "gook"
+    reason: "Racial slur"
+  - term: "kike"
+    reason: "Anti-Semitic slur"
+  - term: "cunt"
+    reason: "Sexist slur"
+  - term: "faggot"
+    reason: "Homophobic slur"
+  - term: "dyke"
+    reason: "Homophobic slur"
+  - term: "coon"
+    reason: "Racial slur"
+  - term: "wetback"
+    reason: "Racial slur"
+  - term: "jap"
+    reason: "Racial slur"
+  - term: "towelhead"
+    reason: "Racial slur"
+  - term: "sandnigger"
+    reason: "Racial slur"
+  - term: "whore"
+    reason: "Sexist slur"
+  - term: "slut"
+    reason: "Sexist slur"
+  - term: "bitch"
+    reason: "Sexist slur"
+  - term: "pussy"
+    reason: "Sexist slur"
+  - term: "dickhead"
+    reason: "Sexist slur"
+  - term: "asshole"
+    reason: "Offensive language"
+  - term: "cock"
+    reason: "Offensive language"
+  - term: "motherfucker"
+    reason: "Offensive language"
+  - term: "cocksucker"
+    reason: "Offensive language"
+  # Add more unacceptable terms here
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..58c6b72
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,4 @@
+matrix-nio
+pyyaml
+black
+ruff
\ No newline at end of file