File size: 4,761 Bytes
b0c0df0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import re
from typing import Any, Dict, Optional, Tuple, Union
from .prompt import (
BINARY_JUDGE_PROMPT,
COMPARATIVE_JUDGE_PROMPT,
CORRECTNESS_JUDGE_PROMPT,
)
class JudgePromptBuilder:
"""Helper class to build prompts for different judge types"""
@staticmethod
def build_binary_prompt(question: str, answer: str, prediction: str, output_format: str = "0/1", custom_prompt: Optional[str] = None, **kwargs) -> str:
"""Build prompt for binary evaluation"""
if custom_prompt:
return custom_prompt.format(question=question, answer=answer, pred=prediction, prediction=prediction, **kwargs)
positive, negative = ("1", "0") if output_format == "0/1" or output_format == "1/0" else ("Yes", "No")
return BINARY_JUDGE_PROMPT.format(question=question, answer=answer, prediction=prediction, positive=positive, negative=negative)
@staticmethod
def build_comparative_prompt(
question: str, response1: str, response2: str, context: Optional[str] = None, score_range: Tuple[int, int] = (1, 10), custom_prompt: Optional[str] = None, evaluation_instruction: Optional[str] = None, **kwargs
) -> str:
"""Build prompt for comparative evaluation"""
if custom_prompt:
return custom_prompt.format(question=question, response1=response1, response2=response2, context=context or "", **kwargs)
context_section = f"[Context]\n{context}\n\n" if context else ""
if not evaluation_instruction:
evaluation_instruction = f"Please provide scores from {score_range[0]} to {score_range[1]}."
return COMPARATIVE_JUDGE_PROMPT.format(question=question, response1=response1, response2=response2, context_section=context_section, min_score=score_range[0], max_score=score_range[1], evaluation_instruction=evaluation_instruction)
@staticmethod
def build_correctness_prompt(question: str, answer: str, prediction: str, output_format: str = "yes/no", **kwargs) -> str:
"""Build prompt for correctness evaluation"""
positive, negative = ("Yes", "No") if output_format == "yes/no" else ("1", "0")
return CORRECTNESS_JUDGE_PROMPT.format(question=question, answer=answer, prediction=prediction, positive=positive, negative=negative)
class ResponseParser:
"""Helper class to parse different types of judge responses"""
@staticmethod
def parse_binary_response(response: str, output_format: str = "0/1") -> Union[int, bool]:
"""Parse binary response (0/1 or yes/no)"""
response = response.strip().lower()
if output_format == "0/1" or output_format == "1/0":
# Check for various formats of 1
if any(pattern in response for pattern in ["1", "[1]", "score: 1", "answer: 1"]):
return 1
else:
return 0
else:
# yes/no format
return response == "yes" or response.startswith("yes")
@staticmethod
def parse_score_response(response: str, score_range: Optional[Tuple[float, float]] = None) -> float:
"""Parse a single score from response"""
try:
# Try to extract first number from response
numbers = re.findall(r"-?\d+(?:\.\d+)?", response)
if numbers:
score = float(numbers[0])
# Clamp to valid range if provided
if score_range:
score = max(score_range[0], min(score, score_range[1]))
return score
except Exception as e:
pass
# Return minimum score as default
return score_range[0] if score_range else 0.0
@staticmethod
def parse_comparative_response(response: str) -> Tuple[float, float]:
"""Parse comparative scores from response"""
try:
# Extract scores from first line
lines = response.strip().split("\n")
if lines:
score_line = lines[0]
# Handle different separators
score_line = score_line.replace(",", " ").replace(";", " ")
scores = re.findall(r"-?\d+(?:\.\d+)?", score_line)
if len(scores) >= 2:
return float(scores[0]), float(scores[1])
except Exception as e:
pass
return -1.0, -1.0
@staticmethod
def parse_json_response(response: str) -> Dict[str, Any]:
"""Parse JSON response"""
try:
# Try to extract JSON from response
json_match = re.search(r"\{.*\}", response, re.DOTALL)
if json_match:
import json
return json.loads(json_match.group())
except Exception as e:
pass
return {}
|