"""
TestTime RLVR 프롬프트 중앙 관리 시스템
모든 프롬프트를 한 곳에서 관리하여 일관성과 유지보수성을 향상시킵니다.
"""
from typing import Dict, List, Any
from dataclasses import dataclass
from enum import Enum
class PromptType(Enum):
"""프롬프트 유형 정의"""
SOLUTION_GENERATION = "solution_generation"
DIVERSE_GENERATION = "diverse_generation"
INPUT_GENERATION = "input_generation"
TASK_GENERATION = "task_generation"
TASK_EVALUATION = "task_evaluation"
class BenchmarkType(Enum):
"""벤치마크 유형 정의"""
HUMANEVAL = "humaneval"
MBPP = "mbpp"
GENERAL = "general"
@dataclass
class PromptTemplate:
"""프롬프트 템플릿 데이터 클래스"""
name: str
template: str
description: str
benchmark: BenchmarkType
temperature: float = 0.05
variables: List[str] = None
def __post_init__(self):
if self.variables is None:
self.variables = []
class PromptManager:
"""프롬프트 중앙 관리 클래스"""
def __init__(self):
self.prompts = self._initialize_prompts()
def _initialize_prompts(self) -> Dict[str, PromptTemplate]:
"""모든 프롬프트 템플릿 초기화"""
prompts = {}
# ================================================================================
# 1. SOLUTION GENERATION PROMPTS (Current Evaluation - 베이스라인)
# ================================================================================
# HumanEval 기본 솔루션 생성
prompts["solution_humaneval_basic"] = PromptTemplate(
name="HumanEval 기본 솔루션 생성",
benchmark=BenchmarkType.HUMANEVAL,
temperature=0.05,
description="HumanEval 문제에 대한 기본 솔루션 생성 (greedy)",
variables=["problem_prompt"],
template="""You are a Python writing assistant. Complete the following Python function.
{problem_prompt}
Please provide a complete implementation of the function."""
)
# HumanEval 다중 함수 처리
prompts["solution_humaneval_multi"] = PromptTemplate(
name="HumanEval 다중 함수 솔루션 생성",
benchmark=BenchmarkType.HUMANEVAL,
temperature=0.05,
description="여러 함수가 있는 HumanEval 문제 처리",
variables=["problem_prompt", "entry_point"],
template="""You are a Python writing assistant. Complete the following Python function.
{problem_prompt}
Please provide ONLY the implementation for the function `{entry_point}`.
Complete the body of the `{entry_point}` function where it is incomplete.
Do not modify or reimplement other functions that are already complete."""
)
# MBPP 기본 솔루션 생성
prompts["solution_mbpp_basic"] = PromptTemplate(
name="MBPP 기본 솔루션 생성",
benchmark=BenchmarkType.MBPP,
temperature=0.05,
description="MBPP 문제에 대한 기본 솔루션 생성",
variables=["problem_prompt"],
template="""
Please generate a complete, self-contained Python script that solves the following problem.
CRITICAL REQUIREMENTS:
- You MUST maintain the EXACT function signature as shown in the examples
- The function name, parameter names, parameter types, and parameter count MUST match exactly with the examples
- Look at the assert statements carefully to understand the expected function signature
- DO NOT change the number of parameters or their types from what is shown in the examples
Instructions:
- Wrap the entire script in a Markdown code block with syntax highlighting (```python ... ```).
- For each function, include a concise docstring enclosed in triple single quotes (''' ... '''), placed immediately below the def line.
The docstring should briefly describe:
• The function's purpose
• Input parameters
• Return value
Problem statement:
{problem_prompt}
"""
)
# ================================================================================
# 2. DIVERSE GENERATION PROMPTS (다양한 프로그램 생성)
# ================================================================================
# HumanEval 다양성 솔루션
prompts["diverse_humaneval_basic"] = PromptTemplate(
name="HumanEval 다양성 솔루션 생성",
benchmark=BenchmarkType.HUMANEVAL,
temperature=0.7,
description="HumanEval 문제에 대한 다양한 접근법 솔루션",
variables=["diversity_instruction", "problem_prompt"],
template="""You are a Python writing assistant. {diversity_instruction}
{problem_prompt}
Please provide a complete implementation of the function."""
)
# HumanEval 다양성 다중 함수
prompts["diverse_humaneval_multi"] = PromptTemplate(
name="HumanEval 다양성 다중 함수 솔루션",
benchmark=BenchmarkType.HUMANEVAL,
temperature=0.7,
description="다중 함수 HumanEval에 대한 다양성 솔루션",
variables=["diversity_instruction", "problem_prompt", "entry_point"],
template="""You are a Python writing assistant. {diversity_instruction}
{problem_prompt}
Please provide ONLY the implementation for the function `{entry_point}`.
Complete the body of the `{entry_point}` function where it is incomplete.
Do not modify or reimplement other functions that are already complete."""
)
# MBPP 다양성 솔루션
prompts["diverse_mbpp_basic"] = PromptTemplate(
name="MBPP 다양성 솔루션 생성",
benchmark=BenchmarkType.MBPP,
temperature=0.7,
description="MBPP 문제에 대한 다양한 접근법 솔루션",
variables=["diversity_instruction", "problem_prompt"],
template="""Please generate a complete, self-contained Python script that solves the following problem.
CRITICAL REQUIREMENTS:
- You MUST maintain the EXACT function signature as shown in the examples
- The function name, parameter names, parameter types, and parameter count MUST match exactly with the examples
- Look at the assert statements carefully to understand the expected function signature
- DO NOT change the number of parameters or their types from what is shown in the examples
Instructions:
- Wrap the entire script in a Markdown code block with syntax highlighting (```python ... ```).
- For each function, include a concise docstring enclosed in triple single quotes (''' ... '''), placed immediately below the def line.
The docstring should briefly describe:
• The function's purpose
• Input parameters
• Return value
{diversity_instruction}
Problem statement:
{problem_prompt}
"""
)
# ================================================================================
# 3. INPUT GENERATION PROMPTS (입력 증강)
# ================================================================================
prompts["input_generation_basic"] = PromptTemplate(
name="기본 입력 생성",
benchmark=BenchmarkType.GENERAL,
temperature=0.5,
description="기존 IPO 예제를 바탕으로 새로운 입력 생성",
variables=["problem_description", "existing_examples", "full_code", "arg_type_info"],
template="""Given the following problem description and its Python function implementation, first analyze the types and valid ranges of the function arguments, then write **5 different example inputs** for the function that cover a diverse mix of typical (general) cases and edge/boundary cases.
Problem Description:
'''
{problem_description}
'''
Existing Examples from Problem:
{existing_examples}
Function Implementation:
```python
{full_code}
```
{arg_type_info}
Based on the existing examples above, generate 5 NEW diverse test inputs that are different from the existing ones. Each input should be a Python dict where:
- Keys are the exact parameter names from the function signature
- Values are appropriate test values for each parameter
Format your response as:
```python
examples = [
{{dict_with_all_function_parameters}}, # Description of this test case
{{dict_with_all_function_parameters}}, # Description of this test case
... # Continue for all 5 examples
]
```
Ensure your examples include:
- At least 2 typical/general cases
- At least 2 edge/boundary cases
- 1 special case (empty, zero, maximum values, etc.)
- All examples should be DIFFERENT from the existing examples shown above"""
)
# ================================================================================
# 4. TASK GENERATION PROMPTS (IPO → 추론 태스크)
# ================================================================================
prompts["task_induction"] = PromptTemplate(
name="Induction 태스크 생성 (AZR code_f)",
benchmark=BenchmarkType.GENERAL,
temperature=0.05,
description="주어진 입력-출력으로부터 프로그램 추론 (AZR 원본)",
variables=["input_output_pairs", "message"],
template="""A conversation between User and Assistant.
The User provides a set of input/output pairs and a message describing the hidden function. The Assistant must:
1. **Privately think step-by-step** about how to reconstruct the general function based on the provided examples.
2. **Output exactly one** `...` block containing the full reasoning process.
3. **Then output exactly one** `...` block containing **only** the Python code snippet defining the function `f`—no labels, no comments, no extra text.
4. **Do not** generate any text outside these two blocks.
5. Follow to the **code requirements** and **formatting rules**.
# Code Requirements:
- Name the entry function `f` (e.g., `def f(...): ...`), you may include nested definitions inside `f`.
- Ensure the function returns a value.
- Include at least one input parameter.
- Make the function deterministic.
- AVOID the FOLLOWING:
* Random functions or variables
* Date/time operations
* I/O operations (reading files, network requests)
* Printing or logging
* Any external state
- Ensure execution completes within 10 seconds on a modern CPU.
- All imports and custom class definitions must be at the very top of the code snippet.
- The snippet must end with a return statement from the main function `f`; anything after will be removed.
User:
# Input and Output Pairs:
{input_output_pairs}
# Message:
{message}"""
)
prompts["task_deduction"] = PromptTemplate(
name="Deduction 태스크 생성 (AZR code_o)",
benchmark=BenchmarkType.GENERAL,
temperature=0.05,
description="주어진 프로그램과 입력으로부터 출력 추론 (AZR 원본)",
variables=["snippet", "input_args"],
template="""A conversation between User and Assistant.
The User provides a Python code snippet and specific input values. The Assistant must:
1. **Privately think step-by-step** about how the code executes with the given inputs.
2. **Output exactly one** `...` block containing your full reasoning.
3. **Then output exactly one** `...` block containing **only** the output values—no labels, no comments, no extra text.
4. **Do not** generate any text outside these two blocks.
5. Adhere to the **output rules**.
# Output Rules:
- If the output is a string, wrap it in quotes.
- For dicts, lists, and other literals, use valid Python literal notation.
User:
# Python Code Snippet:
{snippet}
# Input:
{input_args}"""
)
prompts["task_abduction"] = PromptTemplate(
name="Abduction 태스크 생성 (AZR code_i)",
benchmark=BenchmarkType.GENERAL,
temperature=0.05,
description="주어진 프로그램과 출력으로부터 입력 추론 (AZR 원본)",
variables=["snippet", "output"],
template="""A conversation between User and Assistant.
The User provides a Python code snippet and its observed output. The Assistant must:
1. **Privately think step-by-step** about which input produces that output.
2. **Output exactly one** `...` block containing your full reasoning.
3. **Then output exactly one** `...` block containing **only** the input values—no labels, no comments, no extra text.
4. **Do not** generate any text outside these two blocks.
5. Adhere to the **input rules**.
# Input Rules:
- If an argument is a string, wrap it in quotes.
- For multiple arguments, separate by commas.
- Use Python literal notation for lists, dicts, tuples.
- Boolean values must be `True` or `False`.
User:
# Python Code Snippet:
{snippet}
# Observed Output:
{output}"""
)
# ================================================================================
# 5. TASK EVALUATION PROMPTS (LLM 태스크 응답)
# ================================================================================
prompts["task_evaluation_basic"] = PromptTemplate(
name="기본 태스크 평가",
benchmark=BenchmarkType.GENERAL,
temperature=0.05,
description="생성된 추론 태스크에 대한 LLM 응답",
variables=["task_prompt"],
template="{task_prompt}"
)
return prompts
def get_prompt(self, prompt_key: str, **kwargs) -> str:
"""프롬프트 키로 템플릿을 가져와 변수를 채움"""
if prompt_key not in self.prompts:
raise ValueError(f"Unknown prompt key: {prompt_key}")
template = self.prompts[prompt_key]
# 필수 변수 확인
missing_vars = []
for var in template.variables:
if var not in kwargs:
missing_vars.append(var)
if missing_vars:
raise ValueError(f"Missing required variables for prompt '{prompt_key}': {missing_vars}")
# 템플릿 포맷팅
try:
return template.template.format(**kwargs)
except KeyError as e:
raise ValueError(f"Template formatting error for prompt '{prompt_key}': {e}")
def get_temperature(self, prompt_key: str) -> float:
"""프롬프트의 권장 temperature 반환"""
if prompt_key not in self.prompts:
raise ValueError(f"Unknown prompt key: {prompt_key}")
return self.prompts[prompt_key].temperature
def get_diversity_instruction(self, variation_id: int) -> str:
"""variation_id에 따른 다양성 지시문 반환"""
diversity_instructions = [
"", # 기본
"",
"",
""
]
# diversity_instructions = [
# "", # 기본
# "Implement this in a robust way that works well for various examples",
# "Provide an alternative solution with a unique implementation style:",
# "Try to implement using a different approach, algorithm, or coding style than typical solutions."
# ]
return diversity_instructions[variation_id % len(diversity_instructions)]
def list_prompts(self) -> Dict[str, PromptTemplate]:
"""모든 프롬프트 템플릿 목록 반환"""
return self.prompts.copy()
def get_prompts_by_type(self, benchmark: BenchmarkType) -> Dict[str, PromptTemplate]:
"""벤치마크 타입별 프롬프트 반환"""
return {
key: template for key, template in self.prompts.items()
if template.benchmark == benchmark or template.benchmark == BenchmarkType.GENERAL
}
# 전역 프롬프트 매니저 인스턴스
prompt_manager = PromptManager()
# 편의 함수들
def get_prompt(prompt_key: str, **kwargs) -> str:
"""프롬프트 가져오기 편의 함수"""
return prompt_manager.get_prompt(prompt_key, **kwargs)
def get_temperature(prompt_key: str) -> float:
"""프롬프트 temperature 가져오기 편의 함수"""
return prompt_manager.get_temperature(prompt_key)
def get_diversity_instruction(variation_id: int) -> str:
"""다양성 지시문 가져오기 편의 함수"""
return prompt_manager.get_diversity_instruction(variation_id)