llm_cp2 / src /lmms-eval /lmms_eval /llm_judge /utils.py

Upload folder using huggingface_hub

b0c0df0 verified about 1 month ago

4.76 kB

	import re
	from typing import Any, Dict, Optional, Tuple, Union

	from .prompt import (
	BINARY_JUDGE_PROMPT,
	COMPARATIVE_JUDGE_PROMPT,
	CORRECTNESS_JUDGE_PROMPT,
	)


	class JudgePromptBuilder:
	"""Helper class to build prompts for different judge types"""

	@staticmethod
	def build_binary_prompt(question: str, answer: str, prediction: str, output_format: str = "0/1", custom_prompt: Optional[str] = None, **kwargs) -> str:
	"""Build prompt for binary evaluation"""
	if custom_prompt:
	return custom_prompt.format(question=question, answer=answer, pred=prediction, prediction=prediction, **kwargs)

	positive, negative = ("1", "0") if output_format == "0/1" or output_format == "1/0" else ("Yes", "No")

	return BINARY_JUDGE_PROMPT.format(question=question, answer=answer, prediction=prediction, positive=positive, negative=negative)

	@staticmethod
	def build_comparative_prompt(
	question: str, response1: str, response2: str, context: Optional[str] = None, score_range: Tuple[int, int] = (1, 10), custom_prompt: Optional[str] = None, evaluation_instruction: Optional[str] = None, **kwargs
	) -> str:
	"""Build prompt for comparative evaluation"""
	if custom_prompt:
	return custom_prompt.format(question=question, response1=response1, response2=response2, context=context or "", **kwargs)

	context_section = f"[Context]\n{context}\n\n" if context else ""

	if not evaluation_instruction:
	evaluation_instruction = f"Please provide scores from {score_range[0]} to {score_range[1]}."

	return COMPARATIVE_JUDGE_PROMPT.format(question=question, response1=response1, response2=response2, context_section=context_section, min_score=score_range[0], max_score=score_range[1], evaluation_instruction=evaluation_instruction)

	@staticmethod
	def build_correctness_prompt(question: str, answer: str, prediction: str, output_format: str = "yes/no", **kwargs) -> str:
	"""Build prompt for correctness evaluation"""
	positive, negative = ("Yes", "No") if output_format == "yes/no" else ("1", "0")

	return CORRECTNESS_JUDGE_PROMPT.format(question=question, answer=answer, prediction=prediction, positive=positive, negative=negative)


	class ResponseParser:
	"""Helper class to parse different types of judge responses"""

	@staticmethod
	def parse_binary_response(response: str, output_format: str = "0/1") -> Union[int, bool]:
	"""Parse binary response (0/1 or yes/no)"""
	response = response.strip().lower()

	if output_format == "0/1" or output_format == "1/0":
	# Check for various formats of 1
	if any(pattern in response for pattern in ["1", "[1]", "score: 1", "answer: 1"]):
	return 1
	else:
	return 0
	else:
	# yes/no format
	return response == "yes" or response.startswith("yes")

	@staticmethod
	def parse_score_response(response: str, score_range: Optional[Tuple[float, float]] = None) -> float:
	"""Parse a single score from response"""
	try:
	# Try to extract first number from response
	numbers = re.findall(r"-?\d+(?:\.\d+)?", response)
	if numbers:
	score = float(numbers[0])
	# Clamp to valid range if provided
	if score_range:
	score = max(score_range[0], min(score, score_range[1]))
	return score
	except Exception as e:
	pass

	# Return minimum score as default
	return score_range[0] if score_range else 0.0

	@staticmethod
	def parse_comparative_response(response: str) -> Tuple[float, float]:
	"""Parse comparative scores from response"""
	try:
	# Extract scores from first line
	lines = response.strip().split("\n")
	if lines:
	score_line = lines[0]
	# Handle different separators
	score_line = score_line.replace(",", " ").replace(";", " ")
	scores = re.findall(r"-?\d+(?:\.\d+)?", score_line)

	if len(scores) >= 2:
	return float(scores[0]), float(scores[1])
	except Exception as e:
	pass

	return -1.0, -1.0

	@staticmethod
	def parse_json_response(response: str) -> Dict[str, Any]:
	"""Parse JSON response"""
	try:
	# Try to extract JSON from response
	json_match = re.search(r"\{.*\}", response, re.DOTALL)
	if json_match:
	import json

	return json.loads(json_match.group())
	except Exception as e:
	pass

	return {}