From 31f001057a78b6d5fec0a5fa8a4398beb9993a4d Mon Sep 17 00:00:00 2001 From: Kumi Date: Mon, 4 Dec 2023 18:07:57 +0100 Subject: [PATCH] Enhance Wikipedia tool flexibility Added options to extract specific info and summarize content from Wikipedia pages within the gptbot's Wikipedia tool. The 'extract' option enables partial retrieval of page data based on a user-defined string, leveraging the bot's existing chat API for extraction. The 'summarize' option allows users to get concise versions of articles, again utilizing the bot's chat capabilities. These additions provide users with more granular control over the information they receive, potentially reducing response clutter and focusing on user-specified interests. --- src/gptbot/tools/wikipedia.py | 47 ++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/src/gptbot/tools/wikipedia.py b/src/gptbot/tools/wikipedia.py index 0d2aef7..4b42491 100644 --- a/src/gptbot/tools/wikipedia.py +++ b/src/gptbot/tools/wikipedia.py @@ -18,6 +18,15 @@ class Wikipedia(BaseTool): "description": "The language to search in.", "default": "en", }, + "extract": { + "type": "string", + "description": "What information to extract from the page. If not provided, the full page will be returned." + }, + "summarize": { + "type": "boolean", + "description": "Whether to summarize the page or not.", + "default": False, + } }, "required": ["query"], } @@ -28,7 +37,8 @@ class Wikipedia(BaseTool): raise Exception('No query provided.') language = self.kwargs.get("language", "en") - extract = self.kwargs.get("extract", False) + extract = self.kwargs.get("extract") + summarize = self.kwargs.get("summarize", False) args = { "action": "query", @@ -36,13 +46,8 @@ class Wikipedia(BaseTool): "titles": query, } - if extract: - args["prop"] = "extracts" - args["exintro"] = "" - - else: - args["prop"] = "revisions" - args["rvprop"] = "content" + args["prop"] = "revisions" + args["rvprop"] = "content" url = f'https://{language}.wikipedia.org/w/api.php?{urlencode(args)}' @@ -50,13 +55,25 @@ class Wikipedia(BaseTool): async with session.get(url) as response: if response.status == 200: data = await response.json() - pages = data['query']['pages'] - page = list(pages.values())[0] - if 'extract' in page: - return f"**{page['title']} (Extract)**\n{page['extract']}" - elif 'revisions' in page: - return f"**{page['title']}**\n{page['revisions'][0]['*']}" - else: + + try: + pages = data['query']['pages'] + page = list(pages.values())[0] + content = page['revisions'][0]['*'] + except KeyError: raise Exception(f'No results for {query} found in Wikipedia.') + + if extract: + chat_messages = [{"role": "system", "content": f"Extract the following from the provided content: {extract}"}] + chat_messages.append({"role": "user", "content": content}) + content, _ = await self.bot.chat_api.generate_chat_response(chat_messages, room=self.room, user=self.user, allow_override=False, use_tools=False) + + if summarize: + chat_messages = [{"role": "system", "content": "Summarize the following content:"}] + chat_messages.append({"role": "user", "content": content}) + content, _ = await self.bot.chat_api.generate_chat_response(chat_messages, room=self.room, user=self.user, allow_override=False, use_tools=False) + + return f"**Wikipedia: {page['title']}**\n{content}" + else: raise Exception(f'Could not connect to Wikipedia API: {response.status} {response.reason}') \ No newline at end of file