EXAM_RAG_API / generation /parsing_utils.py
MinaNasser's picture
1st
1bc3f18
import json
import logging
from typing import Any, Dict, Optional
logger = logging.getLogger("ExamGraph")
def safe_parse(parser_obj, text: str, question_no: int) -> Optional[Dict[str, Any]]:
if not text or text.strip() in ("null", "None", ""):
logger.warning(f"[Parse] q{question_no}: empty/null response")
return None
last_error = None
# Try direct parse
try:
result = parser_obj.parse(text)
return result.model_dump() if hasattr(result, "model_dump") else result
except Exception as e:
last_error = e
logger.debug(f"[Parse] q{question_no}: direct parse failed, trying extraction")
# Try to extract JSON from text (LLM may have wrapped it in prose)
try:
# look for {...} pattern
start = text.rfind("{")
end = text.rfind("}") + 1
if start >= 0 and end > start:
json_str = text[start:end]
json_obj = json.loads(json_str)
result = parser_obj.parse(json.dumps(json_obj))
return result.model_dump() if hasattr(result, "model_dump") else result
except Exception as e:
last_error = e
logger.debug(f"[Parse] q{question_no}: json extraction failed")
# Last resort: if it looks like partial JSON, mark for regen
error_msg = str(last_error) if last_error else "unknown"
logger.error(f"[Parse] q{question_no}: failed all attempts: {error_msg}")
return None
def categorize_error(error_str: str) -> str:
err = error_str.lower()
if "timeout" in err:
return "timeout"
elif "json" in err or "invalid" in err:
return "invalid_json"
elif "field required" in err or "missing" in err:
return "missing_field"
elif "none" in err or "null" in err:
return "null"
return "unknown"