Spaces:
Running
Running
| from typing import Any, Dict, List, Optional, Tuple | |
| class AnswerValidator: | |
| """ | |
| Validates AI-generated answers against quality and safety standards. | |
| """ | |
| def __init__(self, confidence_threshold: float = 0.5): | |
| """ | |
| Initialize the AnswerValidator. | |
| Args: | |
| confidence_threshold: Minimum confidence score required. Defaults to 0.5. | |
| """ | |
| self.confidence_threshold = confidence_threshold | |
| self.required_fields = ["latex", "reasoning", "final_answer", "confidence_score"] | |
| def validate(self, response: Dict[str, Any], is_math_problem: bool = True) -> Tuple[bool, List[str]]: | |
| """ | |
| Validates the AI response. | |
| Args: | |
| response: The JSON response dictionary from the AI. | |
| is_math_problem: Whether the input was identified as a math problem. | |
| If True, checks for LaTeX content. | |
| Returns: | |
| Tuple[bool, List[str]]: (IsValid, List of error reasons) | |
| """ | |
| errors = [] | |
| # 1. Check required fields | |
| for field in self.required_fields: | |
| if field not in response: | |
| errors.append(f"Missing required field: {field}") | |
| if errors: | |
| return False, errors | |
| # 2. check for hallucinated/empty content | |
| # sometimes models succeed but return empty strings | |
| if not response.get("final_answer") or str(response.get("final_answer")).strip() == "": | |
| errors.append("Final answer is empty.") | |
| if not response.get("reasoning") or str(response.get("reasoning")).strip() == "": | |
| errors.append("Reasoning is empty.") | |
| # 3. Verify LaTeX presence for math problems | |
| # We assume 'latex' field should contain some latex-like distinct characters if it's a math problem | |
| # or at least not be empty. | |
| if is_math_problem: | |
| latex_content = response.get("latex", "") | |
| if not latex_content or str(latex_content).strip() == "": | |
| errors.append("LaTeX content is missing for a math problem.") | |
| # Optional: heuristic check for common latex symbols if we want to be stricter | |
| # if "\\" not in latex_content and "$" not in latex_content: | |
| # errors.append("LaTeX content does not appear to contain valid LaTeX syntax.") | |
| # 4. Confidence threshold check | |
| try: | |
| score = float(response.get("confidence_score", 0.0)) | |
| if score < self.confidence_threshold: | |
| errors.append(f"Confidence score {score} is below threshold {self.confidence_threshold}.") | |
| except (ValueError, TypeError): | |
| errors.append("Invalid confidence score format.") | |
| return len(errors) == 0, errors | |