""" TestTime RLVR 프롬프트 중앙 관리 시스템 모든 프롬프트를 한 곳에서 관리하여 일관성과 유지보수성을 향상시킵니다. """ from typing import Dict, List, Any from dataclasses import dataclass from enum import Enum class PromptType(Enum): """프롬프트 유형 정의""" SOLUTION_GENERATION = "solution_generation" DIVERSE_GENERATION = "diverse_generation" INPUT_GENERATION = "input_generation" TASK_GENERATION = "task_generation" TASK_EVALUATION = "task_evaluation" class BenchmarkType(Enum): """벤치마크 유형 정의""" HUMANEVAL = "humaneval" MBPP = "mbpp" GENERAL = "general" @dataclass class PromptTemplate: """프롬프트 템플릿 데이터 클래스""" name: str template: str description: str benchmark: BenchmarkType temperature: float = 0.05 variables: List[str] = None def __post_init__(self): if self.variables is None: self.variables = [] class PromptManager: """프롬프트 중앙 관리 클래스""" def __init__(self): self.prompts = self._initialize_prompts() def _initialize_prompts(self) -> Dict[str, PromptTemplate]: """모든 프롬프트 템플릿 초기화""" prompts = {} # ================================================================================ # 1. SOLUTION GENERATION PROMPTS (Current Evaluation - 베이스라인) # ================================================================================ # HumanEval 기본 솔루션 생성 prompts["solution_humaneval_basic"] = PromptTemplate( name="HumanEval 기본 솔루션 생성", benchmark=BenchmarkType.HUMANEVAL, temperature=0.05, description="HumanEval 문제에 대한 기본 솔루션 생성 (greedy)", variables=["problem_prompt"], template="""You are a Python writing assistant. Complete the following Python function. {problem_prompt} Please provide a complete implementation of the function.""" ) # HumanEval 다중 함수 처리 prompts["solution_humaneval_multi"] = PromptTemplate( name="HumanEval 다중 함수 솔루션 생성", benchmark=BenchmarkType.HUMANEVAL, temperature=0.05, description="여러 함수가 있는 HumanEval 문제 처리", variables=["problem_prompt", "entry_point"], template="""You are a Python writing assistant. Complete the following Python function. {problem_prompt} Please provide ONLY the implementation for the function `{entry_point}`. Complete the body of the `{entry_point}` function where it is incomplete. Do not modify or reimplement other functions that are already complete.""" ) # MBPP 기본 솔루션 생성 prompts["solution_mbpp_basic"] = PromptTemplate( name="MBPP 기본 솔루션 생성", benchmark=BenchmarkType.MBPP, temperature=0.05, description="MBPP 문제에 대한 기본 솔루션 생성", variables=["problem_prompt"], template=""" Please generate a complete, self-contained Python script that solves the following problem. CRITICAL REQUIREMENTS: - You MUST maintain the EXACT function signature as shown in the examples - The function name, parameter names, parameter types, and parameter count MUST match exactly with the examples - Look at the assert statements carefully to understand the expected function signature - DO NOT change the number of parameters or their types from what is shown in the examples Instructions: - Wrap the entire script in a Markdown code block with syntax highlighting (```python ... ```). - For each function, include a concise docstring enclosed in triple single quotes (''' ... '''), placed immediately below the def line. The docstring should briefly describe: • The function's purpose • Input parameters • Return value Problem statement: {problem_prompt} """ ) # ================================================================================ # 2. DIVERSE GENERATION PROMPTS (다양한 프로그램 생성) # ================================================================================ # HumanEval 다양성 솔루션 prompts["diverse_humaneval_basic"] = PromptTemplate( name="HumanEval 다양성 솔루션 생성", benchmark=BenchmarkType.HUMANEVAL, temperature=0.7, description="HumanEval 문제에 대한 다양한 접근법 솔루션", variables=["diversity_instruction", "problem_prompt"], template="""You are a Python writing assistant. {diversity_instruction} {problem_prompt} Please provide a complete implementation of the function.""" ) # HumanEval 다양성 다중 함수 prompts["diverse_humaneval_multi"] = PromptTemplate( name="HumanEval 다양성 다중 함수 솔루션", benchmark=BenchmarkType.HUMANEVAL, temperature=0.7, description="다중 함수 HumanEval에 대한 다양성 솔루션", variables=["diversity_instruction", "problem_prompt", "entry_point"], template="""You are a Python writing assistant. {diversity_instruction} {problem_prompt} Please provide ONLY the implementation for the function `{entry_point}`. Complete the body of the `{entry_point}` function where it is incomplete. Do not modify or reimplement other functions that are already complete.""" ) # MBPP 다양성 솔루션 prompts["diverse_mbpp_basic"] = PromptTemplate( name="MBPP 다양성 솔루션 생성", benchmark=BenchmarkType.MBPP, temperature=0.7, description="MBPP 문제에 대한 다양한 접근법 솔루션", variables=["diversity_instruction", "problem_prompt"], template="""Please generate a complete, self-contained Python script that solves the following problem. CRITICAL REQUIREMENTS: - You MUST maintain the EXACT function signature as shown in the examples - The function name, parameter names, parameter types, and parameter count MUST match exactly with the examples - Look at the assert statements carefully to understand the expected function signature - DO NOT change the number of parameters or their types from what is shown in the examples Instructions: - Wrap the entire script in a Markdown code block with syntax highlighting (```python ... ```). - For each function, include a concise docstring enclosed in triple single quotes (''' ... '''), placed immediately below the def line. The docstring should briefly describe: • The function's purpose • Input parameters • Return value {diversity_instruction} Problem statement: {problem_prompt} """ ) # ================================================================================ # 3. INPUT GENERATION PROMPTS (입력 증강) # ================================================================================ prompts["input_generation_basic"] = PromptTemplate( name="기본 입력 생성", benchmark=BenchmarkType.GENERAL, temperature=0.5, description="기존 IPO 예제를 바탕으로 새로운 입력 생성", variables=["problem_description", "existing_examples", "full_code", "arg_type_info"], template="""Given the following problem description and its Python function implementation, first analyze the types and valid ranges of the function arguments, then write **5 different example inputs** for the function that cover a diverse mix of typical (general) cases and edge/boundary cases. Problem Description: ''' {problem_description} ''' Existing Examples from Problem: {existing_examples} Function Implementation: ```python {full_code} ``` {arg_type_info} Based on the existing examples above, generate 5 NEW diverse test inputs that are different from the existing ones. Each input should be a Python dict where: - Keys are the exact parameter names from the function signature - Values are appropriate test values for each parameter Format your response as: ```python examples = [ {{dict_with_all_function_parameters}}, # Description of this test case {{dict_with_all_function_parameters}}, # Description of this test case ... # Continue for all 5 examples ] ``` Ensure your examples include: - At least 2 typical/general cases - At least 2 edge/boundary cases - 1 special case (empty, zero, maximum values, etc.) - All examples should be DIFFERENT from the existing examples shown above""" ) # ================================================================================ # 4. TASK GENERATION PROMPTS (IPO → 추론 태스크) # ================================================================================ prompts["task_induction"] = PromptTemplate( name="Induction 태스크 생성 (AZR code_f)", benchmark=BenchmarkType.GENERAL, temperature=0.05, description="주어진 입력-출력으로부터 프로그램 추론 (AZR 원본)", variables=["input_output_pairs", "message"], template="""A conversation between User and Assistant. The User provides a set of input/output pairs and a message describing the hidden function. The Assistant must: 1. **Privately think step-by-step** about how to reconstruct the general function based on the provided examples. 2. **Output exactly one** `...` block containing the full reasoning process. 3. **Then output exactly one** `...` block containing **only** the Python code snippet defining the function `f`—no labels, no comments, no extra text. 4. **Do not** generate any text outside these two blocks. 5. Follow to the **code requirements** and **formatting rules**. # Code Requirements: - Name the entry function `f` (e.g., `def f(...): ...`), you may include nested definitions inside `f`. - Ensure the function returns a value. - Include at least one input parameter. - Make the function deterministic. - AVOID the FOLLOWING: * Random functions or variables * Date/time operations * I/O operations (reading files, network requests) * Printing or logging * Any external state - Ensure execution completes within 10 seconds on a modern CPU. - All imports and custom class definitions must be at the very top of the code snippet. - The snippet must end with a return statement from the main function `f`; anything after will be removed. User: # Input and Output Pairs: {input_output_pairs} # Message: {message}""" ) prompts["task_deduction"] = PromptTemplate( name="Deduction 태스크 생성 (AZR code_o)", benchmark=BenchmarkType.GENERAL, temperature=0.05, description="주어진 프로그램과 입력으로부터 출력 추론 (AZR 원본)", variables=["snippet", "input_args"], template="""A conversation between User and Assistant. The User provides a Python code snippet and specific input values. The Assistant must: 1. **Privately think step-by-step** about how the code executes with the given inputs. 2. **Output exactly one** `...` block containing your full reasoning. 3. **Then output exactly one** `...` block containing **only** the output values—no labels, no comments, no extra text. 4. **Do not** generate any text outside these two blocks. 5. Adhere to the **output rules**. # Output Rules: - If the output is a string, wrap it in quotes. - For dicts, lists, and other literals, use valid Python literal notation. User: # Python Code Snippet: {snippet} # Input: {input_args}""" ) prompts["task_abduction"] = PromptTemplate( name="Abduction 태스크 생성 (AZR code_i)", benchmark=BenchmarkType.GENERAL, temperature=0.05, description="주어진 프로그램과 출력으로부터 입력 추론 (AZR 원본)", variables=["snippet", "output"], template="""A conversation between User and Assistant. The User provides a Python code snippet and its observed output. The Assistant must: 1. **Privately think step-by-step** about which input produces that output. 2. **Output exactly one** `...` block containing your full reasoning. 3. **Then output exactly one** `...` block containing **only** the input values—no labels, no comments, no extra text. 4. **Do not** generate any text outside these two blocks. 5. Adhere to the **input rules**. # Input Rules: - If an argument is a string, wrap it in quotes. - For multiple arguments, separate by commas. - Use Python literal notation for lists, dicts, tuples. - Boolean values must be `True` or `False`. User: # Python Code Snippet: {snippet} # Observed Output: {output}""" ) # ================================================================================ # 5. TASK EVALUATION PROMPTS (LLM 태스크 응답) # ================================================================================ prompts["task_evaluation_basic"] = PromptTemplate( name="기본 태스크 평가", benchmark=BenchmarkType.GENERAL, temperature=0.05, description="생성된 추론 태스크에 대한 LLM 응답", variables=["task_prompt"], template="{task_prompt}" ) return prompts def get_prompt(self, prompt_key: str, **kwargs) -> str: """프롬프트 키로 템플릿을 가져와 변수를 채움""" if prompt_key not in self.prompts: raise ValueError(f"Unknown prompt key: {prompt_key}") template = self.prompts[prompt_key] # 필수 변수 확인 missing_vars = [] for var in template.variables: if var not in kwargs: missing_vars.append(var) if missing_vars: raise ValueError(f"Missing required variables for prompt '{prompt_key}': {missing_vars}") # 템플릿 포맷팅 try: return template.template.format(**kwargs) except KeyError as e: raise ValueError(f"Template formatting error for prompt '{prompt_key}': {e}") def get_temperature(self, prompt_key: str) -> float: """프롬프트의 권장 temperature 반환""" if prompt_key not in self.prompts: raise ValueError(f"Unknown prompt key: {prompt_key}") return self.prompts[prompt_key].temperature def get_diversity_instruction(self, variation_id: int) -> str: """variation_id에 따른 다양성 지시문 반환""" diversity_instructions = [ "", # 기본 "", "", "" ] # diversity_instructions = [ # "", # 기본 # "Implement this in a robust way that works well for various examples", # "Provide an alternative solution with a unique implementation style:", # "Try to implement using a different approach, algorithm, or coding style than typical solutions." # ] return diversity_instructions[variation_id % len(diversity_instructions)] def list_prompts(self) -> Dict[str, PromptTemplate]: """모든 프롬프트 템플릿 목록 반환""" return self.prompts.copy() def get_prompts_by_type(self, benchmark: BenchmarkType) -> Dict[str, PromptTemplate]: """벤치마크 타입별 프롬프트 반환""" return { key: template for key, template in self.prompts.items() if template.benchmark == benchmark or template.benchmark == BenchmarkType.GENERAL } # 전역 프롬프트 매니저 인스턴스 prompt_manager = PromptManager() # 편의 함수들 def get_prompt(prompt_key: str, **kwargs) -> str: """프롬프트 가져오기 편의 함수""" return prompt_manager.get_prompt(prompt_key, **kwargs) def get_temperature(prompt_key: str) -> float: """프롬프트 temperature 가져오기 편의 함수""" return prompt_manager.get_temperature(prompt_key) def get_diversity_instruction(variation_id: int) -> str: """다양성 지시문 가져오기 편의 함수""" return prompt_manager.get_diversity_instruction(variation_id)