Spaces:

Farhaddlrn
/

Collection-LLM-RAG

Sleeping

File size: 15,180 Bytes

ce8469e

import logging
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding 
from llama_index.core.agent.react import ReActAgent
from llama_index.core.tools import QueryEngineTool
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import PydanticMultiSelector
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import FunctionTool
from openai import AuthenticationError

from knowledgeBase.hybrid_query_engine import load_hybrid_query_engine
from utils import sort_dict_by_values, internet_search
from prompts import default_prompt

class UserAgent:
    """
    A class to manage and interact with language models and embedding models from OpenAI, 
    and to set up query engines and agents for querying.
    Attributes:
        llm_name (str): The name of the language model to use.
        embedding_name (str): The name of the embedding model to use.
        openAI_api (str): The API key for accessing OpenAI services.
        query_engines_details (list): A list of details for query engines to be used.
        temperature (float): The temperature setting for the language model.
        model_llm (object): The language model instance.
        model_embd (object): The embedding model instance.
        agent (object): The agent instance for querying.
    Methods:
        __init__(llm_name, embedding_name, openAI_api, query_engines_details=[], temperature=0):
            Initializes the UserAgent with the specified parameters.
        set_llm(llm_name):
            Sets the language model based on the provided name.
        set_embd(embedding_name):
            Sets the embedding model based on the provided name.
        set_agent(query_engines_details):
            Sets up the agent with the provided query engines details.
        set_api(openAI_api):
            Sets the OpenAI API key and reinitializes the models and agent.
    """
    def __init__(self, llm_name, embedding_name, openAI_api, mode, query_engines_details=[], temperature=0, system_message=None):
        
        self.llm_name = llm_name
        self.embedding_name = embedding_name
        self.openAI_api = openAI_api
        self.mode = mode
        self.temperature = temperature

        self.model_llm = None
        self.model_embd = None
        self.agent = None
        
        self.memory = None

        self.query_engines_details = query_engines_details
        
        if self.openAI_api != "":
            self.set_llm(llm_name)
            self.set_embd(embedding_name)

        if system_message is None:
            self.system_message = default_prompt()
        else:
            self.system_message = system_message

    def interact_with_agent(self, message, chat_history):
        """
        Interacts with the AI agent based on the selected mode and updates the chat history.
        Parameters:
        message (str): The user's message to be sent to the AI agent.
        chat_history (list): The current chat history, which will be updated with the new interaction.
        Returns:
        tuple: An empty string and the updated chat history.
        Raises:
        ValueError: If the selected mode is not supported.
        The function operates in two modes:
        1. "ReAct: Query Engines & Internet": Sends the user's message to the AI agent and collects article names and links from the sources.
        2. "Router-Based Query Engines": Sends the user's message to the Router Query Engine and collects article names and links from the source nodes.
        The collected references are formatted and appended to the bot's message, which is then added to the chat history.
        """
        references = {}
        if self.mode == "ReAct: Query Engines & Internet":
            # Send the user's message to the AI agent 
            try:
                ai_answer = self.agent.chat(message)
            except AuthenticationError:
                bot_message = "An error occurred: Authentication Error. Please check your OpenAI API key."
                chat_history.append({"role": "user", "content": message})
                chat_history.append({"role": "assistant", "content": bot_message})
                logging.error("Authentication error: Incorrect API key provided.")
                return "", chat_history
            except Exception as e:
                bot_message = f"An error occurred: {e}"
                chat_history.append({"role": "user", "content": message})
                chat_history.append({"role": "assistant", "content": bot_message})
                logging.error(f"An unexpected error occurred: {e}")
                return "", chat_history
            
            bot_message = ai_answer.response

            # Collect article names and links
            for tool_output in ai_answer.sources:
                raw_output = tool_output.raw_output
                # Check if raw_output has the attribute 'source_nodes', to avoid situations when 
                # the agent has not decided to retrieve any information from the query engines
                if hasattr(raw_output, 'source_nodes'):
                    for node in raw_output.source_nodes:
                        name = node.node.metadata.get('Name')
                        link = node.node.metadata.get('Link')
                        if name and link:
                            if name and link:
                                current_score = node.score if node.score is not None else 0
                                if name and len(name) > 80:
                                    name = name[:80] + "..."
                                if (name, link) in references:
                                    # Update the score if the reference already exists
                                    references[(name, link)] = max(references[(name, link)], current_score)
                                else:
                                    references[(name, link)] = current_score
                else:
                    # Handle the case where source_nodes isn't available
                    logging.info("Warning: 'source_nodes' attribute not found in raw_output.")
        
        elif self.mode in ["Router-Based Query Engines", "SubQuestion-Based Query Engines"]:    
            # Send the user's message to the Router Query Engine
            try:
                response = self.agent.query(message)
            except AuthenticationError:
                bot_message = "An error occurred: Authentication Error. Please check your OpenAI API key."
                chat_history.append({"role": "user", "content": message})
                chat_history.append({"role": "assistant", "content": bot_message})
                logging.error("Authentication error: Incorrect API key provided.")
                return "", chat_history
            except Exception as e:
                bot_message = f"An error occurred: {e}"
                chat_history.append({"role": "user", "content": message})
                chat_history.append({"role": "assistant", "content": bot_message})
                logging.error(f"An unexpected error occurred: {e}")
                return "", chat_history

            bot_message = response.response
            for source in response.source_nodes:
                # Access the underlying node from the NodeWithScore object
                node = source.node  
                # Assuming metadata is stored as a dict in the node's metadata attribute:
                metadata = node.metadata  
                name = metadata.get('Name')
                link = metadata.get('Link')
                if name and link:
                    current_score = source.score if source.score is not None else 0
                    if name and len(name) > 80:
                            name = name[:80] + "..."
                    if (name, link) in references:
                        # Update the score if the reference already exists
                        references[(name, link)] = max(references[(name, link)], current_score)
                    else:
                        references[(name, link)] = current_score
        else:
            raise ValueError('Selected mode is not supported.')
        
        # Format the references
        if references:
            # Sort the references by LLM Judge score 
            references = sort_dict_by_values(references)
            formatted_references = []
            # Loop through references
            for item in references:
                # Unpack the first part of the tuple and the score
                (name, link), score = item
                # Format the reference as needed
                formatted_references.append(f"🔗 [{name}]({link}) ⭐ {score:.2f}/1  | " if score != 0 else f"🔗 [{name}]({link}) ⭐ -/1  | ")

            references = formatted_references

            references_text = "Some helpful articles, sorted by relevance according to LLM Judge, along with semantic scores:\n" + " ".join(references)
            bot_message += "\n\n" + references_text
        
        # Update the chat history
        chat_history.append({"role": "user", "content": message})
        chat_history.append({"role": "assistant", "content": bot_message})
        return "", chat_history


    def set_llm(self, llm_name):
        """
        Set the language model (LLM) based on the provided LLM name.

        Parameters:
        llm_name (str): The name of the language model to set. Supported values are 'OpenAI GPT-4o mini' and 'OpenAI GPT-4o'.

        Raises:
        ValueError: If the provided LLM name is not supported.
        """

        self.llm_name = llm_name
        if self.llm_name == 'OpenAI GPT-4o mini':
            self.model_llm = OpenAI(model="gpt-4o-mini", temperature=self.temperature, api_key=self.openAI_api, system_prompt=self.system_message)
        elif self.llm_name == 'OpenAI GPT-4o':
            self.model_llm = OpenAI(model="gpt-4o", temperature=self.temperature, api_key=self.openAI_api, system_prompt=self.system_message)
        else:
            raise ValueError('Selected LLM name is not supported.')


    def set_embd(self, embedding_name):
        """
        Sets the embedding model based on the provided embedding name.

        Parameters:
        embedding_name (str): The name of the embedding model to be set. Currently, only 'OpenAI text-embedding-3-small' is supported.

        Raises:
        ValueError: If the provided embedding name is not supported.
        """
        self.embedding_name = embedding_name
        if self.embedding_name == 'OpenAI text-embedding-3-small':
            self.model_embd = OpenAIEmbedding(model="text-embedding-3-small", api_key=self.openAI_api)
        else:
            raise ValueError('Selected Embedding name is not supported.')
    

    def set_agent(self, query_engines_details):
        """
        Set up the agent with the provided query engines details.
        This method initializes and configures the agent based on the provided query engines details.
        It supports two modes: "ReAct: Query Engines & Internet" and "Router-Based Query Engines".
        Args:
            query_engines_details (list): A list of dictionaries, each containing details of a query engine.
                Each dictionary should have the following keys:
                - 'name': The name of the query engine.
                - 'description': A description of the query engine.
        Raises:
            ValueError: If the selected mode is not supported.
        """
        self.query_engines_details = query_engines_details

        # Load and initialize query engines based on provided set of query engines
        qs_list = []
        for qs_detail_i in query_engines_details:
            print(qs_detail_i)
            # Load hybrid query engine: Semantic + Keyword-based
            qs_i = load_hybrid_query_engine(
                            model_llm=self.model_llm, 
                            model_embd=self.model_embd, 
                            query_engine_name=qs_detail_i['name'], 
                            query_engine_description=qs_detail_i['description']
                        )

            if qs_i is None:
                logging.info('>    Query engine {} could not be loaded.'.format(qs_detail_i['name']))
            else:
                logging.info('>    Query engine {} was loaded.'.format(qs_detail_i['name']))
                # Create a QueryEngine tool instance from the loaded query engine
                qs_i_tool = QueryEngineTool.from_defaults(
                   query_engine=qs_i,
                   description=qs_detail_i['description'],
                )
                qs_list.append(qs_i_tool)

        if self.mode == "ReAct: Query Engines & Internet":
            # Initialize a ChatMemoryBuffer with a token limit
            self.memory = ChatMemoryBuffer.from_defaults(token_limit=1500)

            search_tool = FunctionTool.from_defaults(internet_search)

            # Create a ReActAgent using the list of tools, the language model, and the memory buffer
            self.agent = ReActAgent.from_tools(
                tools=qs_list+[search_tool],
                llm=self.model_llm,
                memory=self.memory,
                verbose=True
            )
        elif self.mode == "Router-Based Query Engines":
            # Create a RouterQueryEngine using the list of tools
            self.agent = RouterQueryEngine(
                            selector=PydanticMultiSelector.from_defaults(llm=self.model_llm),
                            query_engine_tools=qs_list,
                            llm=self.model_llm,
                            verbose=True
                        )
        elif self.mode == "SubQuestion-Based Query Engines":
            self.agent = SubQuestionQueryEngine.from_defaults(
                query_engine_tools=qs_list,
                llm=self.model_llm,
                verbose=True
            )
        else:
           raise ValueError('Selected mode is not supported.')


    def set_api(self, openAI_api):
        """
        Sets the OpenAI API key and initializes the language model, embedding, and agent with the provided details.
        Args:
            openAI_api (str): The API key for accessing OpenAI services.
        """

        self.openAI_api = openAI_api
        self.set_llm(llm_name=self.llm_name)
        self.set_embd(embedding_name=self.embedding_name)
        self.set_agent(query_engines_details=self.query_engines_details)
    

    def set_mode(self, mode):
        """
        Sets the mode of the agent.
        Args:
            mode (str): The mode of the agent. Supported values are 'ReAct: Query Engines & Internet' and 'Router-Based Query Engines'.
        """
        self.mode = mode
        self.set_agent(query_engines_details=self.query_engines_details)

    def reset_memory(self):
        """
        Resets the memory buffer of the agent.
        """
        if self.memory is not None:
            self.memory.reset()