File size: 11,855 Bytes

5374a2d

import json
from pydantic import model_validator 
from pydantic_core import PydanticUndefined
from typing import Optional, Type, Tuple, Union, List, Any

from ..core.module import BaseModule
from ..core.module_utils import get_type_name
from ..core.registry import MODULE_REGISTRY
# from ..core.base_config import Parameter
from ..core.parser import Parser
from ..core.message import Message
from ..models.base_model import BaseLLM, LLMOutputParser
from ..tools.tool import Toolkit 
from ..prompts.context_extraction import CONTEXT_EXTRACTION
from ..prompts.template import PromptTemplate  


class ActionInput(LLMOutputParser):
    """Input specification and parsing for actions.
    
    This class defines the input requirements for actions and provides methods
    to generate structured input specifications. It inherits from LLMOutputParser 
    to allow parsing of LLM outputs into structured inputs for actions.
    
    Notes:
        Parameters in ActionInput should be defined in Pydantic Field format.
        For optional variables, use format: 
        var: Optional[int] = Field(default=None, description="xxx")
        Remember to add `default=None` for optional parameters.
    """

    @classmethod
    def get_input_specification(cls, ignore_fields: List[str] = []) -> str:
        """Generate a JSON specification of the input requirements.
        
        Examines the class fields and produces a structured specification of
        the input parameters, including their types, descriptions, and whether
        they are required.
        
        Args:
            ignore_fields (List[str]): List of field names to exclude from the specification.
            
        Returns:
            A JSON string containing the input specification, or an empty string
            if no fields are defined or all are ignored.
        """
        fields_info = {}
        attrs = cls.get_attrs()
        for field_name, field_info in cls.model_fields.items():
            if field_name in ignore_fields:
                continue
            if field_name not in attrs:
                continue
            field_type = get_type_name(field_info.annotation)
            field_desc = field_info.description if field_info.description is not None else None
            # field_required = field_info.is_required()
            field_default = str(field_info.default) if field_info.default is not PydanticUndefined else None
            field_required = True if field_default is None else False
            description = field_type + ", "
            if field_desc is not None:
                description += (field_desc.strip() + ", ") 
            description += ("required" if field_required else "optional")
            if field_default is not None:
                description += (", Default value: " + field_default)
            fields_info[field_name] = description
        
        if len(fields_info) == 0:
            return "" 
        fields_info_str = json.dumps(fields_info, indent=4)
        return fields_info_str
        
    @classmethod
    def get_required_input_names(cls) -> List[str]:
        """Get a list of all required input parameter names.
        
        Returns:
            List[str]: Names of all parameters that are required (don't have default values).
        """
        required_fields = []
        attrs = cls.get_attrs()
        for field_name, field_info in cls.model_fields.items():
            if field_name not in attrs:
                continue
            field_default = field_info.default
            # A field is required if it doesn't have a default value
            if field_default is PydanticUndefined:
                required_fields.append(field_name)
        return required_fields


class ActionOutput(LLMOutputParser):
    """Output representation for actions.
    
    This class handles the structured output of actions, providing methods
    to convert the output to structured data. It inherits from LLMOutputParser
    to support parsing of LLM outputs into structured action results.
    """
    
    def to_str(self) -> str:
        """Convert the output to a formatted JSON string.
        
        Returns:
            A pretty-printed JSON string representation of the structured data.
        """
        return json.dumps(self.get_structured_data(), indent=4)
    

class Action(BaseModule):
    """Base class for all actions in the EvoAgentX framework.
    
    Actions represent discrete operations that can be performed by agents.
    They define inputs, outputs, and execution behavior, and can optionally
    use tools to accomplish their tasks.
    
    Attributes:
        name (str): Unique identifier for the action.
        description (str): Human-readable description of what the action does.
        prompt (Optional[str]): Optional prompt template for this action.
        tools (Optional[List[Toolkit]]): Optional list of tools that can be used by this action.
        inputs_format (Optional[Type[ActionInput]]): Optional class defining the expected input structure.
        outputs_format (Optional[Type[Parser]]): Optional class defining the expected output structure.
    """

    name: str
    description: str
    prompt: Optional[str] = None
    prompt_template: Optional[PromptTemplate] = None 
    tools: Optional[List[Toolkit]] = None # specify the possible tool for the action
    inputs_format: Optional[Type[ActionInput]] = None # specify the input format of the action
    outputs_format: Optional[Type[Parser]] = None  # specify the possible structured output format

    def init_module(self):
        """Initialize the action module.
        
        This method is called after the action is instantiated.
        Subclasses can override this to perform custom initialization.
        """
        pass 

    def to_dict(self, exclude_none: bool = True, ignore: List[str] = [], **kwargs) -> dict:
        """
        Convert the action to a dictionary for saving.  
        """
        data = super().to_dict(exclude_none=exclude_none, ignore=ignore, **kwargs)
        if self.inputs_format:
            data["inputs_format"] = self.inputs_format.__name__ 
        if self.outputs_format:
            data["outputs_format"] = self.outputs_format.__name__ 
        # TODO: customize serialization for the tools 
        return data 
    
    @model_validator(mode="before")
    @classmethod
    def validate_data(cls, data: Any) -> Any:
        if "inputs_format" in data and data["inputs_format"] and isinstance(data["inputs_format"], str):
            # only used when loading from a file
            data["inputs_format"] = MODULE_REGISTRY.get_module(data["inputs_format"])
        if "outputs_format" in data and data["outputs_format"] and isinstance(data["outputs_format"], str):
            # only used when loading from a file
            data["outputs_format"] = MODULE_REGISTRY.get_module(data["outputs_format"])
        # TODO: customize loading for the tools
        return data 
    
    def execute(self, llm: Optional[BaseLLM] = None, inputs: Optional[dict] = None, sys_msg: Optional[str]=None, return_prompt: bool = False, **kwargs) -> Optional[Union[Parser, Tuple[Parser, str]]]:
        """Execute the action to produce a result.
        
        This is the main entry point for executing an action. Subclasses must
        implement this method to define the action's behavior.

        Args:
            llm (Optional[BaseLLM]): The LLM used to execute the action.
            inputs (Optional[dict]): Input data for the action execution. The input data should be a dictionary that matches the input format of the provided prompt. 
                For example, if the prompt contains a variable `{input_var}`, the `inputs` dictionary should have a key `input_var`, otherwise the variable will be set to empty string. 
            sys_msg (Optional[str]): Optional system message for the LLM.
            return_prompt (bool): Whether to return the complete prompt passed to the LLM.
            **kwargs (Any): Additional keyword arguments for the execution.
        
        Returns:
            If `return_prompt` is False, the method returns a Parser object containing the structured result of the action.
            If `return_prompt` is True, the method returns a tuple containing the Parser object and the complete prompt passed to the LLM.
        """
        raise NotImplementedError(f"`execute` function of {type(self).__name__} is not implemented!")

    async def async_execute(self, llm: Optional[BaseLLM] = None, inputs: Optional[dict] = None, sys_msg: Optional[str]=None, return_prompt: bool = False, **kwargs) -> Optional[Union[Parser, Tuple[Parser, str]]]:
        """
        Asynchronous execution of the action.
        
        This method is the asynchronous counterpart of the `execute` method.
        It allows the action to be executed asynchronously using an LLM.
        """
        raise NotImplementedError(f"`async_execute` function of {type(self).__name__} is not implemented!")

class ContextExtraction(Action):
    """Action for extracting structured inputs from context.
    
    This action analyzes a conversation context to extract relevant information
    that can be used as inputs for other actions. It uses the LLM to interpret
    unstructured contextual information and format it according to the target
    action's input requirements.
    """

    def __init__(self, **kwargs):
        name = kwargs.pop("name") if "name" in kwargs else CONTEXT_EXTRACTION["name"]
        description = kwargs.pop("description") if "description" in kwargs else CONTEXT_EXTRACTION["description"]
        super().__init__(name=name, description=description, **kwargs)

    def get_context_from_messages(self, messages: List[Message]) -> str:
        str_context = "\n\n".join([str(msg) for msg in messages])
        return str_context 
    
    def execute(self, llm: Optional[BaseLLM] = None, action: Action = None, context: List[Message] = None, **kwargs) -> Union[dict, None]:
        """Extract structured inputs for an action from conversation context.
        
        This method uses the LLM to analyze the conversation context and extract
        information that matches the input requirements of the target action.
        
        Args:
            llm: The language model to use for extraction.
            action: The target action whose input requirements (`inputs_format`) define what to extract.
            context: List of messages providing the conversation context.
            **kwargs: Additional keyword arguments.
            
        Returns:
            A dictionary containing the extracted inputs for the target action,
            or None if extraction is not possible (e.g., if the action doesn't
            require inputs or if context is missing).
        """
        if action is None or context is None:
            return None
        
        action_inputs_cls: Type[ActionInput] = action.inputs_format
        if action_inputs_cls is None:
            # the action does not require inputs
            return None
        
        action_inputs_desc = action_inputs_cls.get_input_specification()
        str_context = self.get_context_from_messages(messages=context)

        if not action_inputs_desc or not str_context:
            return None
        
        prompt = CONTEXT_EXTRACTION["prompt"].format(
            context=str_context,
            action_name=action.name, 
            action_description=action.description,
            action_inputs=action_inputs_desc
        )

        action_inputs = llm.generate(
            prompt=prompt, 
            system_message=CONTEXT_EXTRACTION["system_prompt"],
            parser=action_inputs_cls
        )
        action_inputs_data = action_inputs.get_structured_data()

        return action_inputs_data