feat: add inclusive language Matrix bot
Added an inclusive language bot for Matrix chat, which detects and suggests replacements for non-inclusive or unacceptable language based on configurable terms. Bot can send direct messages to users and notify room admins. Included configuration and default dictionary files. - Added `.gitignore` for virtual environments, build artifacts, and Python cache - Config YAML template and default inclusive dictionary terms - Required dependencies listed in `requirements-dev.txt`
This commit is contained in:
commit
629718b79f
5 changed files with 285 additions and 0 deletions
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
venv/
|
||||
dist/
|
||||
*.pyc
|
||||
__pycache__/
|
133
bot.py
Normal file
133
bot.py
Normal file
|
@ -0,0 +1,133 @@
|
|||
import asyncio
|
||||
import pathlib
|
||||
import yaml
|
||||
from nio import (
|
||||
AsyncClient,
|
||||
MatrixRoom,
|
||||
RoomMessageText,
|
||||
RoomCreateResponse,
|
||||
RoomGetStateResponse,
|
||||
)
|
||||
|
||||
|
||||
class InclusiveLanguageBot:
|
||||
def __init__(self, config_path):
|
||||
# Load configuration from YAML file
|
||||
with open(config_path, "r") as config_file:
|
||||
self.config = yaml.safe_load(config_file)
|
||||
|
||||
# Load dictionary from YAML file specified in the config
|
||||
with open(
|
||||
self.config.get(
|
||||
"dictionary_path",
|
||||
pathlib.Path(__file__).parent / "dictionary" / "default.yaml",
|
||||
),
|
||||
"r",
|
||||
) as dictionary_file:
|
||||
dictionary = yaml.safe_load(dictionary_file)
|
||||
self.non_inclusive_terms = dictionary["non_inclusive_terms"]
|
||||
self.unacceptable_terms = dictionary["unacceptable_terms"]
|
||||
|
||||
# Initialize the client with server URL and credentials from config
|
||||
self.client = AsyncClient(self.config["server_url"], self.config["username"])
|
||||
|
||||
def check_inclusive_language(self, message):
|
||||
suggestions = []
|
||||
for term, details in self.non_inclusive_terms.items():
|
||||
if term in message:
|
||||
suggestions.append(
|
||||
{
|
||||
"term": term,
|
||||
"replacement": details["replacement"],
|
||||
"reason": details["reason"],
|
||||
}
|
||||
)
|
||||
return suggestions
|
||||
|
||||
def check_unacceptable_language(self, message):
|
||||
for entry in self.unacceptable_terms:
|
||||
term = entry["term"]
|
||||
if term in message:
|
||||
return entry
|
||||
return None
|
||||
|
||||
async def get_dm_room(self, user_id):
|
||||
# Check if a DM room already exists
|
||||
for room_id, room in self.client.rooms.items():
|
||||
if user_id in room.users and len(room.users) == 2:
|
||||
return room_id
|
||||
|
||||
# Create a new DM room with the user
|
||||
response = await self.client.room_create(invite=[user_id], is_direct=True)
|
||||
|
||||
if isinstance(response, RoomCreateResponse):
|
||||
return response.room_id
|
||||
else:
|
||||
return None # Failed to create or invite to a room
|
||||
|
||||
async def send_private_message(self, user_id, message):
|
||||
room_id = await self.get_dm_room(user_id)
|
||||
if room_id:
|
||||
await self.client.room_send(
|
||||
room_id,
|
||||
message_type="m.room.message",
|
||||
content={"msgtype": "m.text", "body": message},
|
||||
)
|
||||
|
||||
async def get_room_admins(self, room_id):
|
||||
response = await self.client.room_get_state(room_id)
|
||||
if isinstance(response, RoomGetStateResponse):
|
||||
admins = []
|
||||
for event in response.events:
|
||||
if event.type == "m.room.power_levels":
|
||||
power_levels = event.content
|
||||
for user, level in power_levels.get("users", {}).items():
|
||||
if level >= 100: # TODO: Make this configurable or something
|
||||
admins.append(user)
|
||||
return admins
|
||||
return []
|
||||
|
||||
async def message_callback(self, room: MatrixRoom, event: RoomMessageText):
|
||||
if event.sender == self.client.user:
|
||||
return # Ignore messages sent by the bot
|
||||
|
||||
# Check for unacceptable language
|
||||
unacceptable_entry = self.check_unacceptable_language(event.body)
|
||||
if unacceptable_entry:
|
||||
admin_response = f"Attention: The term '{unacceptable_entry['term']}' used by {event.sender} in {room.room_id} might be unacceptable ({unacceptable_entry['reason']})."
|
||||
|
||||
# Get room admins and send them a message
|
||||
admins = await self.get_room_admins(room.room_id)
|
||||
for admin in admins:
|
||||
await self.send_private_message(admin, admin_response)
|
||||
|
||||
response = f"Your message in {room.room_id} contains the term '{unacceptable_entry['term']}' which might be unacceptable ({unacceptable_entry['reason']}). Please consider revising it."
|
||||
|
||||
# Send private message to the user
|
||||
await self.send_private_message(event.sender, response)
|
||||
|
||||
# Check for non-inclusive language
|
||||
suggestions = self.check_inclusive_language(event.body)
|
||||
if suggestions:
|
||||
response = f"I've noticed some potentially non-inclusive language in your message in {room.room_id}:\n"
|
||||
for suggestion in suggestions:
|
||||
response += f"- '{suggestion['term']}' could potentially be replaced with '{suggestion['replacement']}' ({suggestion['reason']}).\n"
|
||||
|
||||
response += "Thank you for using inclusive language and making our community a better place! 🌈🦄"
|
||||
|
||||
# Send private message to the user
|
||||
await self.send_private_message(event.sender, response)
|
||||
|
||||
async def run(self):
|
||||
await self.client.login(self.config["password"])
|
||||
self.client.add_event_callback(self.message_callback, RoomMessageText)
|
||||
await self.client.sync_forever(timeout=30000)
|
||||
|
||||
|
||||
def main():
|
||||
bot = InclusiveLanguageBot("config.yaml")
|
||||
asyncio.get_event_loop().run_until_complete(bot.run())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
0
config.dist.yaml
Normal file
0
config.dist.yaml
Normal file
144
dictionary/default.yaml
Normal file
144
dictionary/default.yaml
Normal file
|
@ -0,0 +1,144 @@
|
|||
non_inclusive_terms:
|
||||
blacklist:
|
||||
replacement: "blocklist"
|
||||
reason: "Avoids racial connotations"
|
||||
whitelist:
|
||||
replacement: "allowlist"
|
||||
reason: "Avoids racial connotations"
|
||||
master:
|
||||
replacement: "main"
|
||||
reason: "Avoids references to slavery"
|
||||
slave:
|
||||
replacement: "replica"
|
||||
reason: "Avoids references to slavery"
|
||||
sanity_check:
|
||||
replacement: "quick check"
|
||||
reason: "Avoids ableist language"
|
||||
dummy_value:
|
||||
replacement: "placeholder value"
|
||||
reason: "Avoids ableist language"
|
||||
man_hours:
|
||||
replacement: "person-hours"
|
||||
reason: "Gender-neutral"
|
||||
manpower:
|
||||
replacement: "workforce"
|
||||
reason: "Gender-neutral"
|
||||
grandfathered:
|
||||
replacement: "legacy status"
|
||||
reason: "Avoids ageist language"
|
||||
crazy:
|
||||
replacement: "unreasonable"
|
||||
reason: "Avoids ableist language"
|
||||
insane:
|
||||
replacement: "unreasonable"
|
||||
reason: "Avoids ableist language"
|
||||
crippled:
|
||||
replacement: "restricted"
|
||||
reason: "Avoids ableist language"
|
||||
dumb:
|
||||
replacement: "unwise"
|
||||
reason: "Avoids ableist language"
|
||||
lame:
|
||||
replacement: "unimpressive"
|
||||
reason: "Avoids ableist language"
|
||||
powwow:
|
||||
replacement: "meeting"
|
||||
reason: "Avoids cultural appropriation"
|
||||
spirit animal:
|
||||
replacement: "inspiration"
|
||||
reason: "Avoids cultural appropriation"
|
||||
tribe:
|
||||
replacement: "group"
|
||||
reason: "Avoids cultural appropriation"
|
||||
off the reservation:
|
||||
replacement: "off track"
|
||||
reason: "Avoids cultural appropriation"
|
||||
Eskimo:
|
||||
replacement: "Inuit" or "Yupik"
|
||||
reason: "More accurate and respectful term"
|
||||
gypped:
|
||||
replacement: "cheated"
|
||||
reason: "Avoids ethnic slur"
|
||||
long time no see:
|
||||
replacement: "it's been a while"
|
||||
reason: "Avoids mocking non-native English speakers"
|
||||
no can do:
|
||||
replacement: "unable to do"
|
||||
reason: "Avoids mocking non-native English speakers"
|
||||
black sheep:
|
||||
replacement: "outcast"
|
||||
reason: "Avoids racial connotations"
|
||||
cakewalk:
|
||||
replacement: "easy task"
|
||||
reason: "Avoids references to slavery"
|
||||
peanut gallery:
|
||||
replacement: "audience"
|
||||
reason: "Avoids classist language"
|
||||
sold down the river:
|
||||
replacement: "betrayed"
|
||||
reason: "Avoids references to slavery"
|
||||
uppity:
|
||||
replacement: "arrogant"
|
||||
reason: "Avoids racial connotations"
|
||||
grandfather clause:
|
||||
replacement: "legacy clause"
|
||||
reason: "Avoids ageist language"
|
||||
gyp:
|
||||
replacement: "cheat"
|
||||
reason: "Avoids ethnic slur"
|
||||
# Add more term-improvement pairs here
|
||||
|
||||
unacceptable_terms:
|
||||
- term: "nigga"
|
||||
reason: "Racial slur"
|
||||
- term: "nigger"
|
||||
reason: "Racial slur"
|
||||
- term: "fag"
|
||||
reason: "Homophobic slur"
|
||||
- term: "tranny"
|
||||
reason: "Transphobic slur"
|
||||
- term: "retard"
|
||||
reason: "Ableist slur"
|
||||
- term: "spic"
|
||||
reason: "Racial slur"
|
||||
- term: "chink"
|
||||
reason: "Racial slur"
|
||||
- term: "gook"
|
||||
reason: "Racial slur"
|
||||
- term: "kike"
|
||||
reason: "Anti-Semitic slur"
|
||||
- term: "cunt"
|
||||
reason: "Sexist slur"
|
||||
- term: "faggot"
|
||||
reason: "Homophobic slur"
|
||||
- term: "dyke"
|
||||
reason: "Homophobic slur"
|
||||
- term: "coon"
|
||||
reason: "Racial slur"
|
||||
- term: "wetback"
|
||||
reason: "Racial slur"
|
||||
- term: "jap"
|
||||
reason: "Racial slur"
|
||||
- term: "towelhead"
|
||||
reason: "Racial slur"
|
||||
- term: "sandnigger"
|
||||
reason: "Racial slur"
|
||||
- term: "whore"
|
||||
reason: "Sexist slur"
|
||||
- term: "slut"
|
||||
reason: "Sexist slur"
|
||||
- term: "bitch"
|
||||
reason: "Sexist slur"
|
||||
- term: "pussy"
|
||||
reason: "Sexist slur"
|
||||
- term: "dickhead"
|
||||
reason: "Sexist slur"
|
||||
- term: "asshole"
|
||||
reason: "Offensive language"
|
||||
- term: "cock"
|
||||
reason: "Offensive language"
|
||||
- term: "motherfucker"
|
||||
reason: "Offensive language"
|
||||
- term: "cocksucker"
|
||||
reason: "Offensive language"
|
||||
# Add more unacceptable terms here
|
4
requirements-dev.txt
Normal file
4
requirements-dev.txt
Normal file
|
@ -0,0 +1,4 @@
|
|||
matrix-nio
|
||||
pyyaml
|
||||
black
|
||||
ruff
|
Loading…
Reference in a new issue