File size: 16,842 Bytes

24c2665

"""
TestTime RLVR 프롬프트 중앙 관리 시스템

모든 프롬프트를 한 곳에서 관리하여 일관성과 유지보수성을 향상시킵니다.
"""

from typing import Dict, List, Any
from dataclasses import dataclass
from enum import Enum


class PromptType(Enum):
    """프롬프트 유형 정의"""
    SOLUTION_GENERATION = "solution_generation"
    DIVERSE_GENERATION = "diverse_generation"
    INPUT_GENERATION = "input_generation"
    TASK_GENERATION = "task_generation"
    TASK_EVALUATION = "task_evaluation"


class BenchmarkType(Enum):
    """벤치마크 유형 정의"""
    HUMANEVAL = "humaneval"
    MBPP = "mbpp"
    GENERAL = "general"


@dataclass
class PromptTemplate:
    """프롬프트 템플릿 데이터 클래스"""
    name: str
    template: str
    description: str
    benchmark: BenchmarkType
    temperature: float = 0.05
    variables: List[str] = None
    
    def __post_init__(self):
        if self.variables is None:
            self.variables = []


class PromptManager:
    """프롬프트 중앙 관리 클래스"""
    
    def __init__(self):
        self.prompts = self._initialize_prompts()
        
    def _initialize_prompts(self) -> Dict[str, PromptTemplate]:
        """모든 프롬프트 템플릿 초기화"""
        
        prompts = {}
        
        # ================================================================================
        # 1. SOLUTION GENERATION PROMPTS (Current Evaluation - 베이스라인)
        # ================================================================================
        
        # HumanEval 기본 솔루션 생성
        prompts["solution_humaneval_basic"] = PromptTemplate(
            name="HumanEval 기본 솔루션 생성",
            benchmark=BenchmarkType.HUMANEVAL,
            temperature=0.05,
            description="HumanEval 문제에 대한 기본 솔루션 생성 (greedy)",
            variables=["problem_prompt"],
            template="""You are a Python writing assistant. Complete the following Python function.

{problem_prompt}

Please provide a complete implementation of the function."""
        )
        
        # HumanEval 다중 함수 처리
        prompts["solution_humaneval_multi"] = PromptTemplate(
            name="HumanEval 다중 함수 솔루션 생성",
            benchmark=BenchmarkType.HUMANEVAL,
            temperature=0.05,
            description="여러 함수가 있는 HumanEval 문제 처리",
            variables=["problem_prompt", "entry_point"],
            template="""You are a Python writing assistant. Complete the following Python function.

{problem_prompt}

Please provide ONLY the implementation for the function `{entry_point}`. 
Complete the body of the `{entry_point}` function where it is incomplete.
Do not modify or reimplement other functions that are already complete."""
        )
        
        # MBPP 기본 솔루션 생성
        prompts["solution_mbpp_basic"] = PromptTemplate(
            name="MBPP 기본 솔루션 생성",
            benchmark=BenchmarkType.MBPP,
            temperature=0.05,
            description="MBPP 문제에 대한 기본 솔루션 생성",
            variables=["problem_prompt"],
            template="""
Please generate a complete, self-contained Python script that solves the following problem.  

CRITICAL REQUIREMENTS:
- You MUST maintain the EXACT function signature as shown in the examples
- The function name, parameter names, parameter types, and parameter count MUST match exactly with the examples
- Look at the assert statements carefully to understand the expected function signature
- DO NOT change the number of parameters or their types from what is shown in the examples

Instructions:
- Wrap the entire script in a Markdown code block with syntax highlighting (```python ... ```).  
- For each function, include a concise docstring enclosed in triple single quotes (''' ... '''), placed immediately below the def line.  
The docstring should briefly describe:  
• The function's purpose  
• Input parameters  
• Return value  

Problem statement:
{problem_prompt}
"""
        )
        
        # ================================================================================
        # 2. DIVERSE GENERATION PROMPTS (다양한 프로그램 생성)
        # ================================================================================
        
        # HumanEval 다양성 솔루션
        prompts["diverse_humaneval_basic"] = PromptTemplate(
            name="HumanEval 다양성 솔루션 생성",
            benchmark=BenchmarkType.HUMANEVAL,
            temperature=0.7,
            description="HumanEval 문제에 대한 다양한 접근법 솔루션",
            variables=["diversity_instruction", "problem_prompt"],
            template="""You are a Python writing assistant. {diversity_instruction}

{problem_prompt}

Please provide a complete implementation of the function."""
        )
        
        # HumanEval 다양성 다중 함수
        prompts["diverse_humaneval_multi"] = PromptTemplate(
            name="HumanEval 다양성 다중 함수 솔루션",
            benchmark=BenchmarkType.HUMANEVAL,
            temperature=0.7,
            description="다중 함수 HumanEval에 대한 다양성 솔루션",
            variables=["diversity_instruction", "problem_prompt", "entry_point"],
            template="""You are a Python writing assistant. {diversity_instruction}

{problem_prompt}

Please provide ONLY the implementation for the function `{entry_point}`. 
Complete the body of the `{entry_point}` function where it is incomplete.
Do not modify or reimplement other functions that are already complete."""
        )
        
        # MBPP 다양성 솔루션
        prompts["diverse_mbpp_basic"] = PromptTemplate(
            name="MBPP 다양성 솔루션 생성",
            benchmark=BenchmarkType.MBPP,
            temperature=0.7,
            description="MBPP 문제에 대한 다양한 접근법 솔루션",
            variables=["diversity_instruction", "problem_prompt"],
            template="""Please generate a complete, self-contained Python script that solves the following problem.  

CRITICAL REQUIREMENTS:
- You MUST maintain the EXACT function signature as shown in the examples
- The function name, parameter names, parameter types, and parameter count MUST match exactly with the examples
- Look at the assert statements carefully to understand the expected function signature
- DO NOT change the number of parameters or their types from what is shown in the examples

Instructions:
- Wrap the entire script in a Markdown code block with syntax highlighting (```python ... ```).  
- For each function, include a concise docstring enclosed in triple single quotes (''' ... '''), placed immediately below the def line.  
The docstring should briefly describe:  
• The function's purpose  
• Input parameters  
• Return value

{diversity_instruction}

Problem statement:
{problem_prompt}
"""
        )
        
        # ================================================================================
        # 3. INPUT GENERATION PROMPTS (입력 증강)
        # ================================================================================
        
        prompts["input_generation_basic"] = PromptTemplate(
            name="기본 입력 생성",
            benchmark=BenchmarkType.GENERAL,
            temperature=0.5,
            description="기존 IPO 예제를 바탕으로 새로운 입력 생성",
            variables=["problem_description", "existing_examples", "full_code", "arg_type_info"],
            template="""Given the following problem description and its Python function implementation, first analyze the types and valid ranges of the function arguments, then write **5 different example inputs** for the function that cover a diverse mix of typical (general) cases and edge/boundary cases.

Problem Description:
'''
{problem_description}
'''

Existing Examples from Problem:
{existing_examples}

Function Implementation:
```python
{full_code}
```

{arg_type_info}

Based on the existing examples above, generate 5 NEW diverse test inputs that are different from the existing ones. Each input should be a Python dict where:
- Keys are the exact parameter names from the function signature
- Values are appropriate test values for each parameter

Format your response as:
```python
examples = [
    {{dict_with_all_function_parameters}},  # Description of this test case
    {{dict_with_all_function_parameters}},  # Description of this test case
    ...  # Continue for all 5 examples
]
```

Ensure your examples include:
- At least 2 typical/general cases
- At least 2 edge/boundary cases  
- 1 special case (empty, zero, maximum values, etc.)
- All examples should be DIFFERENT from the existing examples shown above"""
        )
        
        # ================================================================================
        # 4. TASK GENERATION PROMPTS (IPO → 추론 태스크)
        # ================================================================================
        
        prompts["task_induction"] = PromptTemplate(
            name="Induction 태스크 생성 (AZR code_f)",
            benchmark=BenchmarkType.GENERAL,
            temperature=0.05,
            description="주어진 입력-출력으로부터 프로그램 추론 (AZR 원본)",
            variables=["input_output_pairs", "message"],
            template="""A conversation between User and Assistant.  
The User provides a set of input/output pairs and a message describing the hidden function. The Assistant must:
1. **Privately think step-by-step** about how to reconstruct the general function based on the provided examples.  
2. **Output exactly one** `<think>...</think>` block containing the full reasoning process.  
3. **Then output exactly one** `<answer>...</answer>` block containing **only** the Python code snippet defining the function `f`—no labels, no comments, no extra text.  
4. **Do not** generate any text outside these two blocks.  
5. Follow to the **code requirements** and **formatting rules**.

# Code Requirements:
- Name the entry function `f` (e.g., `def f(...): ...`), you may include nested definitions inside `f`.  
- Ensure the function returns a value.  
- Include at least one input parameter.  
- Make the function deterministic.  
- AVOID the FOLLOWING:
  * Random functions or variables  
  * Date/time operations  
  * I/O operations (reading files, network requests)  
  * Printing or logging  
  * Any external state  
- Ensure execution completes within 10 seconds on a modern CPU.  
- All imports and custom class definitions must be at the very top of the code snippet.  
- The snippet must end with a return statement from the main function `f`; anything after will be removed.

User:
# Input and Output Pairs:
{input_output_pairs}

# Message:
{message}"""
        )
        
        prompts["task_deduction"] = PromptTemplate(
            name="Deduction 태스크 생성 (AZR code_o)",
            benchmark=BenchmarkType.GENERAL,
            temperature=0.05,
            description="주어진 프로그램과 입력으로부터 출력 추론 (AZR 원본)",
            variables=["snippet", "input_args"],
            template="""A conversation between User and Assistant.  
The User provides a Python code snippet and specific input values. The Assistant must:
1. **Privately think step-by-step** about how the code executes with the given inputs.  
2. **Output exactly one** `<think>...</think>` block containing your full reasoning.  
3. **Then output exactly one** `<answer>...</answer>` block containing **only** the output values—no labels, no comments, no extra text.  
4. **Do not** generate any text outside these two blocks.
5. Adhere to the **output rules**.

# Output Rules:
- If the output is a string, wrap it in quotes.  
- For dicts, lists, and other literals, use valid Python literal notation.

User:
# Python Code Snippet:
{snippet}

# Input:
{input_args}"""
        )
        
        prompts["task_abduction"] = PromptTemplate(
            name="Abduction 태스크 생성 (AZR code_i)",
            benchmark=BenchmarkType.GENERAL,
            temperature=0.05,
            description="주어진 프로그램과 출력으로부터 입력 추론 (AZR 원본)",
            variables=["snippet", "output"],
            template="""A conversation between User and Assistant.  
The User provides a Python code snippet and its observed output. The Assistant must:
1. **Privately think step-by-step** about which input produces that output.
2. **Output exactly one** `<think>...</think>` block containing your full reasoning.
3. **Then output exactly one** `<answer>...</answer>` block containing **only** the input values—no labels, no comments, no extra text.
4. **Do not** generate any text outside these two blocks.
5. Adhere to the **input rules**.

# Input Rules:
- If an argument is a string, wrap it in quotes.
- For multiple arguments, separate by commas.
- Use Python literal notation for lists, dicts, tuples.
- Boolean values must be `True` or `False`.

User:
# Python Code Snippet:
{snippet}

# Observed Output:
{output}"""
        )
        
        # ================================================================================
        # 5. TASK EVALUATION PROMPTS (LLM 태스크 응답)
        # ================================================================================
        
        prompts["task_evaluation_basic"] = PromptTemplate(
            name="기본 태스크 평가",
            benchmark=BenchmarkType.GENERAL,
            temperature=0.05,
            description="생성된 추론 태스크에 대한 LLM 응답",
            variables=["task_prompt"],
            template="{task_prompt}"
        )
        
        return prompts
    
    def get_prompt(self, prompt_key: str, **kwargs) -> str:
        """프롬프트 키로 템플릿을 가져와 변수를 채움"""
        if prompt_key not in self.prompts:
            raise ValueError(f"Unknown prompt key: {prompt_key}")
        
        template = self.prompts[prompt_key]
        
        # 필수 변수 확인
        missing_vars = []
        for var in template.variables:
            if var not in kwargs:
                missing_vars.append(var)
        
        if missing_vars:
            raise ValueError(f"Missing required variables for prompt '{prompt_key}': {missing_vars}")
        
        # 템플릿 포맷팅
        try:
            return template.template.format(**kwargs)
        except KeyError as e:
            raise ValueError(f"Template formatting error for prompt '{prompt_key}': {e}")
    
    def get_temperature(self, prompt_key: str) -> float:
        """프롬프트의 권장 temperature 반환"""
        if prompt_key not in self.prompts:
            raise ValueError(f"Unknown prompt key: {prompt_key}")
        return self.prompts[prompt_key].temperature
    
    def get_diversity_instruction(self, variation_id: int) -> str:
        """variation_id에 따른 다양성 지시문 반환"""
        diversity_instructions = [
            "",  # 기본
            "",
            "",
            ""
        ]

        # diversity_instructions = [
        #     "",  # 기본
        #     "Implement this in a robust way that works well for various examples",
        #     "Provide an alternative solution with a unique implementation style:",
        #     "Try to implement using a different approach, algorithm, or coding style than typical solutions."
        # ]

        return diversity_instructions[variation_id % len(diversity_instructions)]
    
    def list_prompts(self) -> Dict[str, PromptTemplate]:
        """모든 프롬프트 템플릿 목록 반환"""
        return self.prompts.copy()
    
    def get_prompts_by_type(self, benchmark: BenchmarkType) -> Dict[str, PromptTemplate]:
        """벤치마크 타입별 프롬프트 반환"""
        return {
            key: template for key, template in self.prompts.items()
            if template.benchmark == benchmark or template.benchmark == BenchmarkType.GENERAL
        }


# 전역 프롬프트 매니저 인스턴스
prompt_manager = PromptManager()


# 편의 함수들
def get_prompt(prompt_key: str, **kwargs) -> str:
    """프롬프트 가져오기 편의 함수"""
    return prompt_manager.get_prompt(prompt_key, **kwargs)


def get_temperature(prompt_key: str) -> float:
    """프롬프트 temperature 가져오기 편의 함수"""
    return prompt_manager.get_temperature(prompt_key)


def get_diversity_instruction(variation_id: int) -> str:
    """다양성 지시문 가져오기 편의 함수"""
    return prompt_manager.get_diversity_instruction(variation_id)