Spaces:

ghadgemadhuri92
/

mathstutor

Running

App Files Files Community

mathstutor / app /validation /answer_checker.py

ghadgemadhuri92

agent tested with the prompt: Calculate 15 * 12 then add 50.

565a379 3 months ago

raw

history blame contribute delete

2.8 kB

	from typing import Any, Dict, List, Optional, Tuple

	class AnswerValidator:
	"""
	Validates AI-generated answers against quality and safety standards.
	"""

	def __init__(self, confidence_threshold: float = 0.5):
	"""
	Initialize the AnswerValidator.

	Args:
	confidence_threshold: Minimum confidence score required. Defaults to 0.5.
	"""
	self.confidence_threshold = confidence_threshold
	self.required_fields = ["latex", "reasoning", "final_answer", "confidence_score"]

	def validate(self, response: Dict[str, Any], is_math_problem: bool = True) -> Tuple[bool, List[str]]:
	"""
	Validates the AI response.

	Args:
	response: The JSON response dictionary from the AI.
	is_math_problem: Whether the input was identified as a math problem.
	If True, checks for LaTeX content.

	Returns:
	Tuple[bool, List[str]]: (IsValid, List of error reasons)
	"""
	errors = []

	# 1. Check required fields
	for field in self.required_fields:
	if field not in response:
	errors.append(f"Missing required field: {field}")

	if errors:
	return False, errors

	# 2. check for hallucinated/empty content
	# sometimes models succeed but return empty strings
	if not response.get("final_answer") or str(response.get("final_answer")).strip() == "":
	errors.append("Final answer is empty.")

	if not response.get("reasoning") or str(response.get("reasoning")).strip() == "":
	errors.append("Reasoning is empty.")

	# 3. Verify LaTeX presence for math problems
	# We assume 'latex' field should contain some latex-like distinct characters if it's a math problem
	# or at least not be empty.
	if is_math_problem:
	latex_content = response.get("latex", "")
	if not latex_content or str(latex_content).strip() == "":
	errors.append("LaTeX content is missing for a math problem.")
	# Optional: heuristic check for common latex symbols if we want to be stricter
	# if "\\" not in latex_content and "$" not in latex_content:
	# errors.append("LaTeX content does not appear to contain valid LaTeX syntax.")

	# 4. Confidence threshold check
	try:
	score = float(response.get("confidence_score", 0.0))
	if score < self.confidence_threshold:
	errors.append(f"Confidence score {score} is below threshold {self.confidence_threshold}.")
	except (ValueError, TypeError):
	errors.append("Invalid confidence score format.")

	return len(errors) == 0, errors