commit 629718b79f9acf17796c4929c2d04fa5833ebf01 Author: Kumi Date: Mon Aug 19 17:08:37 2024 +0200 feat: add inclusive language Matrix bot Added an inclusive language bot for Matrix chat, which detects and suggests replacements for non-inclusive or unacceptable language based on configurable terms. Bot can send direct messages to users and notify room admins. Included configuration and default dictionary files. - Added `.gitignore` for virtual environments, build artifacts, and Python cache - Config YAML template and default inclusive dictionary terms - Required dependencies listed in `requirements-dev.txt` diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..133ab30 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +venv/ +dist/ +*.pyc +__pycache__/ \ No newline at end of file diff --git a/bot.py b/bot.py new file mode 100644 index 0000000..f09fd3a --- /dev/null +++ b/bot.py @@ -0,0 +1,133 @@ +import asyncio +import pathlib +import yaml +from nio import ( + AsyncClient, + MatrixRoom, + RoomMessageText, + RoomCreateResponse, + RoomGetStateResponse, +) + + +class InclusiveLanguageBot: + def __init__(self, config_path): + # Load configuration from YAML file + with open(config_path, "r") as config_file: + self.config = yaml.safe_load(config_file) + + # Load dictionary from YAML file specified in the config + with open( + self.config.get( + "dictionary_path", + pathlib.Path(__file__).parent / "dictionary" / "default.yaml", + ), + "r", + ) as dictionary_file: + dictionary = yaml.safe_load(dictionary_file) + self.non_inclusive_terms = dictionary["non_inclusive_terms"] + self.unacceptable_terms = dictionary["unacceptable_terms"] + + # Initialize the client with server URL and credentials from config + self.client = AsyncClient(self.config["server_url"], self.config["username"]) + + def check_inclusive_language(self, message): + suggestions = [] + for term, details in self.non_inclusive_terms.items(): + if term in message: + suggestions.append( + { + "term": term, + "replacement": details["replacement"], + "reason": details["reason"], + } + ) + return suggestions + + def check_unacceptable_language(self, message): + for entry in self.unacceptable_terms: + term = entry["term"] + if term in message: + return entry + return None + + async def get_dm_room(self, user_id): + # Check if a DM room already exists + for room_id, room in self.client.rooms.items(): + if user_id in room.users and len(room.users) == 2: + return room_id + + # Create a new DM room with the user + response = await self.client.room_create(invite=[user_id], is_direct=True) + + if isinstance(response, RoomCreateResponse): + return response.room_id + else: + return None # Failed to create or invite to a room + + async def send_private_message(self, user_id, message): + room_id = await self.get_dm_room(user_id) + if room_id: + await self.client.room_send( + room_id, + message_type="m.room.message", + content={"msgtype": "m.text", "body": message}, + ) + + async def get_room_admins(self, room_id): + response = await self.client.room_get_state(room_id) + if isinstance(response, RoomGetStateResponse): + admins = [] + for event in response.events: + if event.type == "m.room.power_levels": + power_levels = event.content + for user, level in power_levels.get("users", {}).items(): + if level >= 100: # TODO: Make this configurable or something + admins.append(user) + return admins + return [] + + async def message_callback(self, room: MatrixRoom, event: RoomMessageText): + if event.sender == self.client.user: + return # Ignore messages sent by the bot + + # Check for unacceptable language + unacceptable_entry = self.check_unacceptable_language(event.body) + if unacceptable_entry: + admin_response = f"Attention: The term '{unacceptable_entry['term']}' used by {event.sender} in {room.room_id} might be unacceptable ({unacceptable_entry['reason']})." + + # Get room admins and send them a message + admins = await self.get_room_admins(room.room_id) + for admin in admins: + await self.send_private_message(admin, admin_response) + + response = f"Your message in {room.room_id} contains the term '{unacceptable_entry['term']}' which might be unacceptable ({unacceptable_entry['reason']}). Please consider revising it." + + # Send private message to the user + await self.send_private_message(event.sender, response) + + # Check for non-inclusive language + suggestions = self.check_inclusive_language(event.body) + if suggestions: + response = f"I've noticed some potentially non-inclusive language in your message in {room.room_id}:\n" + for suggestion in suggestions: + response += f"- '{suggestion['term']}' could potentially be replaced with '{suggestion['replacement']}' ({suggestion['reason']}).\n" + + response += "Thank you for using inclusive language and making our community a better place! 🌈🦄" + + # Send private message to the user + await self.send_private_message(event.sender, response) + + async def run(self): + await self.client.login(self.config["password"]) + self.client.add_event_callback(self.message_callback, RoomMessageText) + await self.client.sync_forever(timeout=30000) + + +def main(): + bot = InclusiveLanguageBot("config.yaml") + asyncio.get_event_loop().run_until_complete(bot.run()) + + +if __name__ == "__main__": + main() diff --git a/config.dist.yaml b/config.dist.yaml new file mode 100644 index 0000000..e69de29 diff --git a/dictionary/default.yaml b/dictionary/default.yaml new file mode 100644 index 0000000..69829b6 --- /dev/null +++ b/dictionary/default.yaml @@ -0,0 +1,144 @@ +non_inclusive_terms: + blacklist: + replacement: "blocklist" + reason: "Avoids racial connotations" + whitelist: + replacement: "allowlist" + reason: "Avoids racial connotations" + master: + replacement: "main" + reason: "Avoids references to slavery" + slave: + replacement: "replica" + reason: "Avoids references to slavery" + sanity_check: + replacement: "quick check" + reason: "Avoids ableist language" + dummy_value: + replacement: "placeholder value" + reason: "Avoids ableist language" + man_hours: + replacement: "person-hours" + reason: "Gender-neutral" + manpower: + replacement: "workforce" + reason: "Gender-neutral" + grandfathered: + replacement: "legacy status" + reason: "Avoids ageist language" + crazy: + replacement: "unreasonable" + reason: "Avoids ableist language" + insane: + replacement: "unreasonable" + reason: "Avoids ableist language" + crippled: + replacement: "restricted" + reason: "Avoids ableist language" + dumb: + replacement: "unwise" + reason: "Avoids ableist language" + lame: + replacement: "unimpressive" + reason: "Avoids ableist language" + powwow: + replacement: "meeting" + reason: "Avoids cultural appropriation" + spirit animal: + replacement: "inspiration" + reason: "Avoids cultural appropriation" + tribe: + replacement: "group" + reason: "Avoids cultural appropriation" + off the reservation: + replacement: "off track" + reason: "Avoids cultural appropriation" + Eskimo: + replacement: "Inuit" or "Yupik" + reason: "More accurate and respectful term" + gypped: + replacement: "cheated" + reason: "Avoids ethnic slur" + long time no see: + replacement: "it's been a while" + reason: "Avoids mocking non-native English speakers" + no can do: + replacement: "unable to do" + reason: "Avoids mocking non-native English speakers" + black sheep: + replacement: "outcast" + reason: "Avoids racial connotations" + cakewalk: + replacement: "easy task" + reason: "Avoids references to slavery" + peanut gallery: + replacement: "audience" + reason: "Avoids classist language" + sold down the river: + replacement: "betrayed" + reason: "Avoids references to slavery" + uppity: + replacement: "arrogant" + reason: "Avoids racial connotations" + grandfather clause: + replacement: "legacy clause" + reason: "Avoids ageist language" + gyp: + replacement: "cheat" + reason: "Avoids ethnic slur" + # Add more term-improvement pairs here + +unacceptable_terms: + - term: "nigga" + reason: "Racial slur" + - term: "nigger" + reason: "Racial slur" + - term: "fag" + reason: "Homophobic slur" + - term: "tranny" + reason: "Transphobic slur" + - term: "retard" + reason: "Ableist slur" + - term: "spic" + reason: "Racial slur" + - term: "chink" + reason: "Racial slur" + - term: "gook" + reason: "Racial slur" + - term: "kike" + reason: "Anti-Semitic slur" + - term: "cunt" + reason: "Sexist slur" + - term: "faggot" + reason: "Homophobic slur" + - term: "dyke" + reason: "Homophobic slur" + - term: "coon" + reason: "Racial slur" + - term: "wetback" + reason: "Racial slur" + - term: "jap" + reason: "Racial slur" + - term: "towelhead" + reason: "Racial slur" + - term: "sandnigger" + reason: "Racial slur" + - term: "whore" + reason: "Sexist slur" + - term: "slut" + reason: "Sexist slur" + - term: "bitch" + reason: "Sexist slur" + - term: "pussy" + reason: "Sexist slur" + - term: "dickhead" + reason: "Sexist slur" + - term: "asshole" + reason: "Offensive language" + - term: "cock" + reason: "Offensive language" + - term: "motherfucker" + reason: "Offensive language" + - term: "cocksucker" + reason: "Offensive language" + # Add more unacceptable terms here diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..58c6b72 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,4 @@ +matrix-nio +pyyaml +black +ruff \ No newline at end of file