import regex from copy import deepcopy from pydantic import Field from pydantic_core import PydanticUndefined from typing import Union, Optional, List, Any, Type from ..core.logging import logger from ..core.module import BaseModule from ..models.base_model import LLMOutputParser, PARSER_VALID_MODE from ..tools import Toolkit from ..prompts.tool_calling import TOOL_CALLING_TEMPLATE class PromptTemplate(BaseModule): instruction: str = Field(description="The instruction that the LLM will follow.") context: Optional[str] = Field(default=None, description="Additional context that can help the LLM understand the instruction.") constraints: Optional[Union[List[str], str]] = Field(default=None, description="Constraints that the LLM must follow.") tools: Optional[List[Toolkit]] = Field(default=None, description="Tools that the LLM can use.") demonstrations: Optional[List[dict]] = Field(default=None, description="Examples of how to use the instruction.") history: Optional[List[Any]] = Field(default=None, description="History of the conversation between the user and the LLM.") def get_field_names(self) -> List[str]: return [name for name, _ in type(self).model_fields.items() if name != "class_name"] def get(self, key: str) -> Any: fields = self.get_field_names() if key not in fields: raise ValueError(f"Invalid key `{key}` for `{self.__class__.__name__}`. Valid keys are: {fields}") return getattr(self, key) def set(self, key: str, value: Any): fields = self.get_field_names() if key not in fields: raise ValueError(f"Invalid key `{key}` for `{self.__class__.__name__}`. Valid keys are: {fields}") setattr(self, key, value) def get_instruction(self) -> str: return self.instruction def get_demonstrations(self) -> List[Any]: return self.demonstrations def get_context(self) -> Optional[str]: return self.context def get_history(self) -> Optional[List[Any]]: return self.history def get_constraints(self) -> Optional[Union[List[str], str]]: return self.constraints def get_tools(self) -> Optional[List[str]]: return self.tools def set_instruction(self, instruction: str): self.set("instruction", instruction) def set_demonstrations(self, demonstrations: List[Any]): self.set("demonstrations", demonstrations) def set_context(self, context: str): self.set("context", context) def set_history(self, history: List[Any]): self.set("history", history) def set_constraints(self, constraints: Union[List[str], str]): self.set("constraints", constraints) def set_tools(self, tools: List[Toolkit]): self.set("tools", tools) def get_required_inputs_or_outputs(self, format: Type[LLMOutputParser]) -> List[str]: """ Get the required fields of the format. """ required_fields = [] attrs = format.get_attrs() for field_name, field_info in format.model_fields.items(): if field_name not in attrs: continue field_default = field_info.default # A field is required if it doesn't have a default value if field_default is PydanticUndefined: required_fields.append(field_name) return required_fields def clear_placeholders(self, text: str) -> str: """ Find all {xx} placeholders in the text, and replace them with `xx`, adding backticks only if not already present. """ # Step 1: Find all unique {xx} placeholders (single braces only) matches = set(regex.findall(r"(? 0 else "" after = text[end] if end < len(text) else "" replacement = field if before != "`": replacement = "`" + replacement if after != "`": replacement = replacement + "`" return replacement text = regex.sub(pattern, replacer, text) return text def check_required_inputs(self, inputs_format: Type[LLMOutputParser], values: dict): if inputs_format is None: return required_inputs = self.get_required_inputs_or_outputs(inputs_format) missing_required_inputs = [field for field in required_inputs if field not in values] if missing_required_inputs: logger.warning(f"Missing required inputs (without default values) for `{inputs_format.__name__}`: {missing_required_inputs}, will set them to empty strings.") def render_input_example(self, inputs_format: Type[LLMOutputParser], values: dict, missing_field_value: str = "") -> str: if inputs_format is None and values is None: return "" if inputs_format is not None: fields = inputs_format.get_attrs() field_values = {field: values.get(field, missing_field_value) for field in fields} else: field_values = values return "\n".join(f"[[ **{field}** ]]:\n{value}" for field, value in field_values.items()) def get_output_template(self, outputs_format: Type[LLMOutputParser], parse_mode: str="title", title_format: str="## {title}") -> str: if outputs_format is None: raise ValueError("`outputs_format` is required in `get_output_format`.") valid_modes = ["json", "xml", "title"] if parse_mode not in valid_modes: raise ValueError(f"Invalid parse mode `{parse_mode}` for `{self.__class__.__name__}.get_output_template`. Valid modes are: {valid_modes}.") fields = outputs_format.get_attrs() required_fields = self.get_required_inputs_or_outputs(outputs_format) if parse_mode == "json": json_template = "{{\n" for field in fields: json_template += f" \"{field}\"" json_template += f": \"{{{field}}}\",\n" if field in required_fields else f" (Optional): \"{{{field}}}\",\n" json_template = json_template.rstrip(",\n") + "\n}}" output_template, output_keys = json_template, fields elif parse_mode == "xml": xml_template = "" for field in fields: xml_template += f"<{field}>\n" if field in required_fields else f"<{field}> (Optional)\n" xml_template += f"{{{field}}}\n\n" xml_template = xml_template.rstrip("\n") output_template, output_keys = xml_template, fields elif parse_mode == "title": title_template = "" for field in fields: title_section = title_format.format(title=field) title_section += "\n" if field in required_fields else " (Optional)\n" title_section += f"{{{field}}}\n\n" title_template += title_section title_template = title_template.rstrip("\n") output_template, output_keys = title_template, fields return output_template, output_keys def render_instruction(self) -> str: # clear the potential placeholders in the instruction. we will use the input section to specify the inputs. instruction_str = self.clear_placeholders(self.instruction) return f"### Instruction\nThis is the main task instruction you must follow:\n{instruction_str}\n" def render_context(self) -> str: if not self.context: return "" return f"### Context\nHere is some additional background information to help you understand the task:\n{self.context}\n" def render_tools(self) -> str: if not self.tools: return "" tools_schemas = [tool.get_tool_schemas() for tool in self.tools] tools_schemas = [j for i in tools_schemas for j in i] return TOOL_CALLING_TEMPLATE.format(tools_description=tools_schemas) def render_constraints(self) -> str: if not self.constraints: return "" if isinstance(self.constraints, list): constraints_str = "\n".join(f"- {c}" for c in self.constraints) else: constraints_str = self.constraints return f"### Constraints\nYou must follow these rules or constraints when generating your output:\n{constraints_str}\n" def _render_system_message(self, system_prompt: Optional[str] = None) -> str: """ Render the system message by combining system prompt, instruction, context, tools and constraints. """ prompt_pieces = [] if system_prompt: prompt_pieces.append(system_prompt + "\n") prompt_pieces.append(self.render_instruction()) if self.context: prompt_pieces.append(self.render_context()) if self.tools: prompt_pieces.append(self.render_tools()) if self.constraints: prompt_pieces.append(self.render_constraints()) return "\n".join(prompt_pieces) def render_outputs(self, outputs_format: Type[LLMOutputParser], parse_mode: str="title", title_format: str="## {title}") -> str: if outputs_format is None or parse_mode in [None, "str", "custom"] or len(outputs_format.get_attrs()) == 0: return "### Outputs Format\nPlease generate a response that best fits the task instruction.\n" ouptut_template, output_keys = self.get_output_template(outputs_format, parse_mode=parse_mode, title_format=title_format) output_str = "### Outputs Format\nYou MUST strictly follow the following format when generating your output:\n\n" if parse_mode == "json": output_str += "Format your output in json format, such as:\n" elif parse_mode == "xml": output_str += "Format your output in xml format, such as:\n" elif parse_mode == "title": output_str += "Format your output in sectioned title format, such as:\n" example_values = {} for key in output_keys: field_info = outputs_format.model_fields.get(key) if field_info and field_info.description: example_values[key] = "[" + field_info.description + "]" else: example_values[key] = "[Your output here]" output_str += ouptut_template.format(**example_values) if "(Optional)" in ouptut_template: output_str += "\n\nNote: For optional fields, you can omit them in your output if they are not necessary." output_str += "\n" return output_str def format( self, inputs_format: Optional[Type[LLMOutputParser]] = None, outputs_format: Optional[Type[LLMOutputParser]] = None, values: Optional[dict] = None, parse_mode: Optional[str] = "title", title_format: Optional[str] = "## {title}", output_format: Optional[str] = None, **kwargs ) -> str: raise NotImplementedError(f"`format` method is not implemented for `{self.__class__.__name__}`.") def get_config(self) -> dict: return self.to_dict() def copy(self, **kwargs) -> "PromptTemplate": """ Create a deep-copied new PromptTemplate, optionally overriding fields with provided kwargs. """ config = self.get_config() new_config = deepcopy(config) new_config = {k: kwargs.get(k, v) for k, v in new_config.items()} return self.__class__.from_dict(new_config) class StringTemplate(PromptTemplate): def render_demonstrations( self, inputs_format: Type[LLMOutputParser], outputs_format: Type[LLMOutputParser], parse_mode: str, title_format: str = None, custom_output_format: str = None, **kwargs ) -> str: if not self.demonstrations: return "" if inputs_format is None or outputs_format is None: raise ValueError("`inputs_format` and `outputs_format` are required in `render_demonstrations`.") if len(inputs_format.get_attrs()) == 0 or len(outputs_format.get_attrs()) == 0: raise ValueError("`inputs_format` and `outputs_format` must have at least one attribute.") demo_str_list = [] for i, demo in enumerate(self.demonstrations): demo_str = f"Example {i+1}:\n" demo_str += "### Inputs\n" input_fields = inputs_format.get_attrs() input_values = {field: demo.get(field, "Not provided") for field in input_fields} demo_str += self.render_input_example(inputs_format, input_values, missing_field_value="Not provided") demo_str += "\n\n" demo_str += "### Outputs\n" output_fields = outputs_format.get_attrs() output_values = {field: demo.get(field, "Not provided") for field in output_fields} if custom_output_format is not None or parse_mode in [None, "str", "custom"]: output_str = "\n".join(f"{field}:\n{value}" for field, value in output_values.items()) else: output_template, output_keys = self.get_output_template(outputs_format, parse_mode=parse_mode, title_format=title_format) output_str = output_template.format(**output_values) output_str = output_str.replace("(Optional)", "") demo_str += output_str demo_str_list.append(demo_str) result = "### Examples\n" + "\n\n".join(demo_str_list) + "\n\n=== End of Examples ===\n" return result def render_history(self) -> str: result = "### History\n{history}".format(history=self.history) return result def render_inputs(self, inputs_format: Type[LLMOutputParser], values: dict) -> str: if (inputs_format is None and values is None) or (inputs_format is not None and len(inputs_format.get_attrs()) == 0): return "" # Check if all required fields are provided self.check_required_inputs(inputs_format, values) input_str = "### Inputs\nThese are the input values provided by the user (with input names emplasized):\n" input_str += self.render_input_example(inputs_format, values, missing_field_value="Not provided") input_str += "\n" return input_str def format( self, system_prompt: Optional[str] = None, values: Optional[dict] = None, inputs_format: Optional[Type[LLMOutputParser]] = None, outputs_format: Optional[Type[LLMOutputParser]] = None, parse_mode: Optional[str] = "title", title_format: Optional[str] = "## {title}", custom_output_format: Optional[str] = None, **kwargs ) -> str: """ Format the prompt template. Convert the prompt template into a prompt string. It will sequentially concatenate the following sections (if provided): instruction, context, tools, constraints, demonstrations, history, inputs and outputs. Args: values (Optional[dict]): The values to be used to render the inputs. inputs_format (Optional[Type[LLMOutputParser]]): Define the input variables. If provided, it will be used to extract inputs (specified in `inputs_format`) from `values` and use them to render the inputs section. Otherwise, will use all fields in `values` (if provided) directly to render the inputs section. outputs_format (Optional[Type[LLMOutputParser]]): Define the output variables. If provided, it will be used to construct the output format based on `parse_mode`. Otherwise, a default output format will be used. parse_mode (Optional[str]): The mode to parse the outputs, chosen from ["json", "xml", "title", "str", "custom"]. It will be used to construct the output format if `outputs_format` is provided. Moreover, if `parse_mode` is "title", `title_format` will be used to format the title of the outputs. title_format (Optional[str]): The format to format the title of the outputs. Default is "## {title}". Only used when `parse_mode` is "title". custom_output_format (Optional[str]): User-specified output format. If provided, it will be directly used in the `Outputs Format` section of the prompt. Otherwise, the output format will be constructed from `outputs_format` and `parse_mode`. **kwargs: Additional keyword arguments. Returns: str: The formatted prompt string. """ if parse_mode not in PARSER_VALID_MODE: raise ValueError(f"Invalid parse mode `{parse_mode}` for `{self.__class__.__name__}.format`. Valid modes are: {PARSER_VALID_MODE}.") prompt_pieces = [] prompt_pieces.append(self._render_system_message(system_prompt)) if self.demonstrations: prompt_pieces.append( self.render_demonstrations( inputs_format=inputs_format, outputs_format=outputs_format, parse_mode=parse_mode, title_format=title_format, custom_output_format=custom_output_format ) ) if self.history: prompt_pieces.append(self.render_history()) if inputs_format or values: prompt_pieces.append("-"*20) prompt_pieces.append(self.render_inputs(inputs_format, values)) # define the output format if custom_output_format: prompt_pieces.append(f"### Outputs Format\n{custom_output_format}") else: prompt_pieces.append(self.render_outputs(outputs_format, parse_mode, title_format)) prompt_pieces = [piece for piece in prompt_pieces if piece] prompt = "\n".join(prompt_pieces) return prompt.strip() class ChatTemplate(StringTemplate): def _create_message(self, role: str, content: str) -> dict: """Create a message dictionary with role and content.""" return {"role": role, "content": content} def render_demonstrations( self, inputs_format: Type[LLMOutputParser], outputs_format: Type[LLMOutputParser], parse_mode: str, title_format: str = None, custom_output_format: str = None ) -> List[dict]: """ Render demonstrations as alternating user and assistant messages. """ if not self.demonstrations: return [] if inputs_format is None or outputs_format is None: raise ValueError("`inputs_format` and `outputs_format` are required in `render_demonstrations`.") if len(inputs_format.get_attrs()) == 0 or len(outputs_format.get_attrs()) == 0: raise ValueError("`inputs_format` and `outputs_format` must have at least one attribute.") messages = [] for demo in self.demonstrations: # Render user message (input) input_fields = inputs_format.get_attrs() input_values = {field: demo.get(field, "Not provided") for field in input_fields} user_content = self.render_input_example(inputs_format, input_values, missing_field_value="Not provided") messages.append(self._create_message("user", user_content)) # Render assistant message (output) output_fields = outputs_format.get_attrs() output_values = {field: demo.get(field, "Not provided") for field in output_fields} if custom_output_format is not None or parse_mode in [None, "str", "custom"]: assistant_content = "\n".join(f"{field}:\n{value}" for field, value in output_values.items()) else: output_template, output_keys = self.get_output_template(outputs_format, parse_mode=parse_mode, title_format=title_format) assistant_content = output_template.format(**output_values) assistant_content = assistant_content.replace("(Optional)", "") messages.append(self._create_message("assistant", assistant_content)) return messages # def render_history(self) -> List[dict]: # """Render conversation history as alternating user and assistant messages.""" # raise NotImplementedError("`render_history` method is not supported for `{self.__class__.__name__}`. Returning empty list.") def render_inputs(self, inputs_format: Optional[Type[LLMOutputParser]], values: Optional[dict]) -> str: if (inputs_format is None and values is None) or (inputs_format is not None and len(inputs_format.get_attrs()) == 0): return "" # check if all required inputs are provided self.check_required_inputs(inputs_format, values) input_str = "### Inputs\n" input_str += self.render_input_example(inputs_format, values, missing_field_value="Not provided") input_str += "\n" return input_str def render_current_user_message( self, values: Optional[dict], inputs_format: Optional[Type[LLMOutputParser]], outputs_format: Optional[Type[LLMOutputParser]], parse_mode: str, title_format: str, custom_output_format: Optional[str] = None ) -> str: """Render the current user input message.""" input_pieces = [] if inputs_format or values: input_pieces.append(self.render_inputs(inputs_format, values)) if custom_output_format: input_pieces.append(f"### Outputs Format\n{custom_output_format}") else: input_pieces.append(self.render_outputs(outputs_format, parse_mode, title_format)) input_pieces = [piece for piece in input_pieces if piece] user_message = "\n".join(input_pieces) return user_message.strip() def format( self, system_prompt: Optional[str] = None, values: Optional[dict] = None, inputs_format: Optional[Type[LLMOutputParser]] = None, outputs_format: Optional[Type[LLMOutputParser]] = None, parse_mode: Optional[str] = "title", title_format: Optional[str] = "## {title}", custom_output_format: Optional[str] = None, **kwargs ) -> List[dict]: """ Format the prompt template into a list of chat messages. The messages will be formatted in the following order: 1. System message (containing system prompt, instruction, context, tools, and constraints) 2. Few-shot examples (if provided in demonstrations) 3. Conversation history (if provided) 4. Current user input (with input values and output format requirements) Args: system_prompt (Optional[str]): Additional system prompt to prepend to the template. values (Optional[dict]): The values to be used to render the inputs. inputs_format (Optional[Type[LLMOutputParser]]): Define the input variables. outputs_format (Optional[Type[LLMOutputParser]]): Define the output variables. parse_mode (Optional[str]): The mode to parse the outputs. title_format (Optional[str]): The format to format the title of the outputs. custom_output_format (Optional[str]): User-specified output format. **kwargs: Additional keyword arguments. Returns: List[dict]: A list of chat messages in the format: [ {"role": "system", "content": system_message}, # Begin few-shot examples {"role": "user", "content": few_shot_example_1_input}, {"role": "assistant", "content": few_shot_example_1_output}, ... # End few-shot examples {"role": "user", "content": current_input}, ] """ if parse_mode not in PARSER_VALID_MODE: raise ValueError(f"Invalid parse mode `{parse_mode}` for `{self.__class__.__name__}.prompt`. Valid modes are: {PARSER_VALID_MODE}.") messages = [] # Add system message system_content = self._render_system_message(system_prompt) messages.append(self._create_message("system", system_content)) # Add few-shot examples if self.demonstrations: messages.extend( self.render_demonstrations( inputs_format=inputs_format, outputs_format=outputs_format, parse_mode=parse_mode, title_format=title_format, custom_output_format=custom_output_format ) ) # Add current user input & output format requirements current_input = self.render_current_user_message( values=values, inputs_format=inputs_format, outputs_format=outputs_format, parse_mode=parse_mode, title_format=title_format, custom_output_format=custom_output_format ) messages.append(self._create_message("user", current_input)) return messages class MiproPromptTemplate(ChatTemplate): def render_demonstrations(self, inputs_format: LLMOutputParser, outputs_format: LLMOutputParser, parse_mode: str, title_format: str = None, custom_output_format: str = None) -> List[dict]: import dspy if self.demonstrations: demo = self.demonstrations[0] if isinstance(demo, dspy.Example): self.demonstrations = [demo.toDict() for demo in self.demonstrations] return super().render_demonstrations(inputs_format, outputs_format, parse_mode, title_format, custom_output_format)