feat: add inclusive language Matrix bot

Added an inclusive language bot for Matrix chat, which detects and suggests replacements for non-inclusive or unacceptable language based on configurable terms. Bot can send direct messages to users and notify room admins. Included configuration and default dictionary files.

- Added `.gitignore` for virtual environments, build artifacts, and Python cache
- Config YAML template and default inclusive dictionary terms
- Required dependencies listed in `requirements-dev.txt`
This commit is contained in:
Kumi 2024-08-19 17:08:37 +02:00
commit 629718b79f
Signed by: kumi
GPG key ID: ECBCC9082395383F
5 changed files with 285 additions and 0 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
venv/
dist/
*.pyc
__pycache__/

133
bot.py Normal file
View file

@ -0,0 +1,133 @@
import asyncio
import pathlib
import yaml
from nio import (
AsyncClient,
MatrixRoom,
RoomMessageText,
RoomCreateResponse,
RoomGetStateResponse,
)
class InclusiveLanguageBot:
def __init__(self, config_path):
# Load configuration from YAML file
with open(config_path, "r") as config_file:
self.config = yaml.safe_load(config_file)
# Load dictionary from YAML file specified in the config
with open(
self.config.get(
"dictionary_path",
pathlib.Path(__file__).parent / "dictionary" / "default.yaml",
),
"r",
) as dictionary_file:
dictionary = yaml.safe_load(dictionary_file)
self.non_inclusive_terms = dictionary["non_inclusive_terms"]
self.unacceptable_terms = dictionary["unacceptable_terms"]
# Initialize the client with server URL and credentials from config
self.client = AsyncClient(self.config["server_url"], self.config["username"])
def check_inclusive_language(self, message):
suggestions = []
for term, details in self.non_inclusive_terms.items():
if term in message:
suggestions.append(
{
"term": term,
"replacement": details["replacement"],
"reason": details["reason"],
}
)
return suggestions
def check_unacceptable_language(self, message):
for entry in self.unacceptable_terms:
term = entry["term"]
if term in message:
return entry
return None
async def get_dm_room(self, user_id):
# Check if a DM room already exists
for room_id, room in self.client.rooms.items():
if user_id in room.users and len(room.users) == 2:
return room_id
# Create a new DM room with the user
response = await self.client.room_create(invite=[user_id], is_direct=True)
if isinstance(response, RoomCreateResponse):
return response.room_id
else:
return None # Failed to create or invite to a room
async def send_private_message(self, user_id, message):
room_id = await self.get_dm_room(user_id)
if room_id:
await self.client.room_send(
room_id,
message_type="m.room.message",
content={"msgtype": "m.text", "body": message},
)
async def get_room_admins(self, room_id):
response = await self.client.room_get_state(room_id)
if isinstance(response, RoomGetStateResponse):
admins = []
for event in response.events:
if event.type == "m.room.power_levels":
power_levels = event.content
for user, level in power_levels.get("users", {}).items():
if level >= 100: # TODO: Make this configurable or something
admins.append(user)
return admins
return []
async def message_callback(self, room: MatrixRoom, event: RoomMessageText):
if event.sender == self.client.user:
return # Ignore messages sent by the bot
# Check for unacceptable language
unacceptable_entry = self.check_unacceptable_language(event.body)
if unacceptable_entry:
admin_response = f"Attention: The term '{unacceptable_entry['term']}' used by {event.sender} in {room.room_id} might be unacceptable ({unacceptable_entry['reason']})."
# Get room admins and send them a message
admins = await self.get_room_admins(room.room_id)
for admin in admins:
await self.send_private_message(admin, admin_response)
response = f"Your message in {room.room_id} contains the term '{unacceptable_entry['term']}' which might be unacceptable ({unacceptable_entry['reason']}). Please consider revising it."
# Send private message to the user
await self.send_private_message(event.sender, response)
# Check for non-inclusive language
suggestions = self.check_inclusive_language(event.body)
if suggestions:
response = f"I've noticed some potentially non-inclusive language in your message in {room.room_id}:\n"
for suggestion in suggestions:
response += f"- '{suggestion['term']}' could potentially be replaced with '{suggestion['replacement']}' ({suggestion['reason']}).\n"
response += "Thank you for using inclusive language and making our community a better place! 🌈🦄"
# Send private message to the user
await self.send_private_message(event.sender, response)
async def run(self):
await self.client.login(self.config["password"])
self.client.add_event_callback(self.message_callback, RoomMessageText)
await self.client.sync_forever(timeout=30000)
def main():
bot = InclusiveLanguageBot("config.yaml")
asyncio.get_event_loop().run_until_complete(bot.run())
if __name__ == "__main__":
main()

0
config.dist.yaml Normal file
View file

144
dictionary/default.yaml Normal file
View file

@ -0,0 +1,144 @@
non_inclusive_terms:
blacklist:
replacement: "blocklist"
reason: "Avoids racial connotations"
whitelist:
replacement: "allowlist"
reason: "Avoids racial connotations"
master:
replacement: "main"
reason: "Avoids references to slavery"
slave:
replacement: "replica"
reason: "Avoids references to slavery"
sanity_check:
replacement: "quick check"
reason: "Avoids ableist language"
dummy_value:
replacement: "placeholder value"
reason: "Avoids ableist language"
man_hours:
replacement: "person-hours"
reason: "Gender-neutral"
manpower:
replacement: "workforce"
reason: "Gender-neutral"
grandfathered:
replacement: "legacy status"
reason: "Avoids ageist language"
crazy:
replacement: "unreasonable"
reason: "Avoids ableist language"
insane:
replacement: "unreasonable"
reason: "Avoids ableist language"
crippled:
replacement: "restricted"
reason: "Avoids ableist language"
dumb:
replacement: "unwise"
reason: "Avoids ableist language"
lame:
replacement: "unimpressive"
reason: "Avoids ableist language"
powwow:
replacement: "meeting"
reason: "Avoids cultural appropriation"
spirit animal:
replacement: "inspiration"
reason: "Avoids cultural appropriation"
tribe:
replacement: "group"
reason: "Avoids cultural appropriation"
off the reservation:
replacement: "off track"
reason: "Avoids cultural appropriation"
Eskimo:
replacement: "Inuit" or "Yupik"
reason: "More accurate and respectful term"
gypped:
replacement: "cheated"
reason: "Avoids ethnic slur"
long time no see:
replacement: "it's been a while"
reason: "Avoids mocking non-native English speakers"
no can do:
replacement: "unable to do"
reason: "Avoids mocking non-native English speakers"
black sheep:
replacement: "outcast"
reason: "Avoids racial connotations"
cakewalk:
replacement: "easy task"
reason: "Avoids references to slavery"
peanut gallery:
replacement: "audience"
reason: "Avoids classist language"
sold down the river:
replacement: "betrayed"
reason: "Avoids references to slavery"
uppity:
replacement: "arrogant"
reason: "Avoids racial connotations"
grandfather clause:
replacement: "legacy clause"
reason: "Avoids ageist language"
gyp:
replacement: "cheat"
reason: "Avoids ethnic slur"
# Add more term-improvement pairs here
unacceptable_terms:
- term: "nigga"
reason: "Racial slur"
- term: "nigger"
reason: "Racial slur"
- term: "fag"
reason: "Homophobic slur"
- term: "tranny"
reason: "Transphobic slur"
- term: "retard"
reason: "Ableist slur"
- term: "spic"
reason: "Racial slur"
- term: "chink"
reason: "Racial slur"
- term: "gook"
reason: "Racial slur"
- term: "kike"
reason: "Anti-Semitic slur"
- term: "cunt"
reason: "Sexist slur"
- term: "faggot"
reason: "Homophobic slur"
- term: "dyke"
reason: "Homophobic slur"
- term: "coon"
reason: "Racial slur"
- term: "wetback"
reason: "Racial slur"
- term: "jap"
reason: "Racial slur"
- term: "towelhead"
reason: "Racial slur"
- term: "sandnigger"
reason: "Racial slur"
- term: "whore"
reason: "Sexist slur"
- term: "slut"
reason: "Sexist slur"
- term: "bitch"
reason: "Sexist slur"
- term: "pussy"
reason: "Sexist slur"
- term: "dickhead"
reason: "Sexist slur"
- term: "asshole"
reason: "Offensive language"
- term: "cock"
reason: "Offensive language"
- term: "motherfucker"
reason: "Offensive language"
- term: "cocksucker"
reason: "Offensive language"
# Add more unacceptable terms here

4
requirements-dev.txt Normal file
View file

@ -0,0 +1,4 @@
matrix-nio
pyyaml
black
ruff