Spaces:
Sleeping
Sleeping
File size: 15,180 Bytes
ce8469e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 |
import logging
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.agent.react import ReActAgent
from llama_index.core.tools import QueryEngineTool
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import PydanticMultiSelector
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import FunctionTool
from openai import AuthenticationError
from knowledgeBase.hybrid_query_engine import load_hybrid_query_engine
from utils import sort_dict_by_values, internet_search
from prompts import default_prompt
class UserAgent:
"""
A class to manage and interact with language models and embedding models from OpenAI,
and to set up query engines and agents for querying.
Attributes:
llm_name (str): The name of the language model to use.
embedding_name (str): The name of the embedding model to use.
openAI_api (str): The API key for accessing OpenAI services.
query_engines_details (list): A list of details for query engines to be used.
temperature (float): The temperature setting for the language model.
model_llm (object): The language model instance.
model_embd (object): The embedding model instance.
agent (object): The agent instance for querying.
Methods:
__init__(llm_name, embedding_name, openAI_api, query_engines_details=[], temperature=0):
Initializes the UserAgent with the specified parameters.
set_llm(llm_name):
Sets the language model based on the provided name.
set_embd(embedding_name):
Sets the embedding model based on the provided name.
set_agent(query_engines_details):
Sets up the agent with the provided query engines details.
set_api(openAI_api):
Sets the OpenAI API key and reinitializes the models and agent.
"""
def __init__(self, llm_name, embedding_name, openAI_api, mode, query_engines_details=[], temperature=0, system_message=None):
self.llm_name = llm_name
self.embedding_name = embedding_name
self.openAI_api = openAI_api
self.mode = mode
self.temperature = temperature
self.model_llm = None
self.model_embd = None
self.agent = None
self.memory = None
self.query_engines_details = query_engines_details
if self.openAI_api != "":
self.set_llm(llm_name)
self.set_embd(embedding_name)
if system_message is None:
self.system_message = default_prompt()
else:
self.system_message = system_message
def interact_with_agent(self, message, chat_history):
"""
Interacts with the AI agent based on the selected mode and updates the chat history.
Parameters:
message (str): The user's message to be sent to the AI agent.
chat_history (list): The current chat history, which will be updated with the new interaction.
Returns:
tuple: An empty string and the updated chat history.
Raises:
ValueError: If the selected mode is not supported.
The function operates in two modes:
1. "ReAct: Query Engines & Internet": Sends the user's message to the AI agent and collects article names and links from the sources.
2. "Router-Based Query Engines": Sends the user's message to the Router Query Engine and collects article names and links from the source nodes.
The collected references are formatted and appended to the bot's message, which is then added to the chat history.
"""
references = {}
if self.mode == "ReAct: Query Engines & Internet":
# Send the user's message to the AI agent
try:
ai_answer = self.agent.chat(message)
except AuthenticationError:
bot_message = "An error occurred: Authentication Error. Please check your OpenAI API key."
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": bot_message})
logging.error("Authentication error: Incorrect API key provided.")
return "", chat_history
except Exception as e:
bot_message = f"An error occurred: {e}"
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": bot_message})
logging.error(f"An unexpected error occurred: {e}")
return "", chat_history
bot_message = ai_answer.response
# Collect article names and links
for tool_output in ai_answer.sources:
raw_output = tool_output.raw_output
# Check if raw_output has the attribute 'source_nodes', to avoid situations when
# the agent has not decided to retrieve any information from the query engines
if hasattr(raw_output, 'source_nodes'):
for node in raw_output.source_nodes:
name = node.node.metadata.get('Name')
link = node.node.metadata.get('Link')
if name and link:
if name and link:
current_score = node.score if node.score is not None else 0
if name and len(name) > 80:
name = name[:80] + "..."
if (name, link) in references:
# Update the score if the reference already exists
references[(name, link)] = max(references[(name, link)], current_score)
else:
references[(name, link)] = current_score
else:
# Handle the case where source_nodes isn't available
logging.info("Warning: 'source_nodes' attribute not found in raw_output.")
elif self.mode in ["Router-Based Query Engines", "SubQuestion-Based Query Engines"]:
# Send the user's message to the Router Query Engine
try:
response = self.agent.query(message)
except AuthenticationError:
bot_message = "An error occurred: Authentication Error. Please check your OpenAI API key."
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": bot_message})
logging.error("Authentication error: Incorrect API key provided.")
return "", chat_history
except Exception as e:
bot_message = f"An error occurred: {e}"
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": bot_message})
logging.error(f"An unexpected error occurred: {e}")
return "", chat_history
bot_message = response.response
for source in response.source_nodes:
# Access the underlying node from the NodeWithScore object
node = source.node
# Assuming metadata is stored as a dict in the node's metadata attribute:
metadata = node.metadata
name = metadata.get('Name')
link = metadata.get('Link')
if name and link:
current_score = source.score if source.score is not None else 0
if name and len(name) > 80:
name = name[:80] + "..."
if (name, link) in references:
# Update the score if the reference already exists
references[(name, link)] = max(references[(name, link)], current_score)
else:
references[(name, link)] = current_score
else:
raise ValueError('Selected mode is not supported.')
# Format the references
if references:
# Sort the references by LLM Judge score
references = sort_dict_by_values(references)
formatted_references = []
# Loop through references
for item in references:
# Unpack the first part of the tuple and the score
(name, link), score = item
# Format the reference as needed
formatted_references.append(f"🔗 [{name}]({link}) ⭐ {score:.2f}/1 | " if score != 0 else f"🔗 [{name}]({link}) ⭐ -/1 | ")
references = formatted_references
references_text = "Some helpful articles, sorted by relevance according to LLM Judge, along with semantic scores:\n" + " ".join(references)
bot_message += "\n\n" + references_text
# Update the chat history
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": bot_message})
return "", chat_history
def set_llm(self, llm_name):
"""
Set the language model (LLM) based on the provided LLM name.
Parameters:
llm_name (str): The name of the language model to set. Supported values are 'OpenAI GPT-4o mini' and 'OpenAI GPT-4o'.
Raises:
ValueError: If the provided LLM name is not supported.
"""
self.llm_name = llm_name
if self.llm_name == 'OpenAI GPT-4o mini':
self.model_llm = OpenAI(model="gpt-4o-mini", temperature=self.temperature, api_key=self.openAI_api, system_prompt=self.system_message)
elif self.llm_name == 'OpenAI GPT-4o':
self.model_llm = OpenAI(model="gpt-4o", temperature=self.temperature, api_key=self.openAI_api, system_prompt=self.system_message)
else:
raise ValueError('Selected LLM name is not supported.')
def set_embd(self, embedding_name):
"""
Sets the embedding model based on the provided embedding name.
Parameters:
embedding_name (str): The name of the embedding model to be set. Currently, only 'OpenAI text-embedding-3-small' is supported.
Raises:
ValueError: If the provided embedding name is not supported.
"""
self.embedding_name = embedding_name
if self.embedding_name == 'OpenAI text-embedding-3-small':
self.model_embd = OpenAIEmbedding(model="text-embedding-3-small", api_key=self.openAI_api)
else:
raise ValueError('Selected Embedding name is not supported.')
def set_agent(self, query_engines_details):
"""
Set up the agent with the provided query engines details.
This method initializes and configures the agent based on the provided query engines details.
It supports two modes: "ReAct: Query Engines & Internet" and "Router-Based Query Engines".
Args:
query_engines_details (list): A list of dictionaries, each containing details of a query engine.
Each dictionary should have the following keys:
- 'name': The name of the query engine.
- 'description': A description of the query engine.
Raises:
ValueError: If the selected mode is not supported.
"""
self.query_engines_details = query_engines_details
# Load and initialize query engines based on provided set of query engines
qs_list = []
for qs_detail_i in query_engines_details:
print(qs_detail_i)
# Load hybrid query engine: Semantic + Keyword-based
qs_i = load_hybrid_query_engine(
model_llm=self.model_llm,
model_embd=self.model_embd,
query_engine_name=qs_detail_i['name'],
query_engine_description=qs_detail_i['description']
)
if qs_i is None:
logging.info('> Query engine {} could not be loaded.'.format(qs_detail_i['name']))
else:
logging.info('> Query engine {} was loaded.'.format(qs_detail_i['name']))
# Create a QueryEngine tool instance from the loaded query engine
qs_i_tool = QueryEngineTool.from_defaults(
query_engine=qs_i,
description=qs_detail_i['description'],
)
qs_list.append(qs_i_tool)
if self.mode == "ReAct: Query Engines & Internet":
# Initialize a ChatMemoryBuffer with a token limit
self.memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
search_tool = FunctionTool.from_defaults(internet_search)
# Create a ReActAgent using the list of tools, the language model, and the memory buffer
self.agent = ReActAgent.from_tools(
tools=qs_list+[search_tool],
llm=self.model_llm,
memory=self.memory,
verbose=True
)
elif self.mode == "Router-Based Query Engines":
# Create a RouterQueryEngine using the list of tools
self.agent = RouterQueryEngine(
selector=PydanticMultiSelector.from_defaults(llm=self.model_llm),
query_engine_tools=qs_list,
llm=self.model_llm,
verbose=True
)
elif self.mode == "SubQuestion-Based Query Engines":
self.agent = SubQuestionQueryEngine.from_defaults(
query_engine_tools=qs_list,
llm=self.model_llm,
verbose=True
)
else:
raise ValueError('Selected mode is not supported.')
def set_api(self, openAI_api):
"""
Sets the OpenAI API key and initializes the language model, embedding, and agent with the provided details.
Args:
openAI_api (str): The API key for accessing OpenAI services.
"""
self.openAI_api = openAI_api
self.set_llm(llm_name=self.llm_name)
self.set_embd(embedding_name=self.embedding_name)
self.set_agent(query_engines_details=self.query_engines_details)
def set_mode(self, mode):
"""
Sets the mode of the agent.
Args:
mode (str): The mode of the agent. Supported values are 'ReAct: Query Engines & Internet' and 'Router-Based Query Engines'.
"""
self.mode = mode
self.set_agent(query_engines_details=self.query_engines_details)
def reset_memory(self):
"""
Resets the memory buffer of the agent.
"""
if self.memory is not None:
self.memory.reset() |