from typing import Dict, List, Any, Tuple def validate_schema_fill(obj: Dict[str, Any], schema_keys: List[str]) -> Tuple[bool, str]: """ Strictly validate a mined JSON object against a target schema. """ # 1. Key set check if set(obj.keys()) != set(schema_keys): missing = set(schema_keys) - set(obj.keys()) extra = set(obj.keys()) - set(schema_keys) return False, f"Schema mismatch. Missing: {missing}, Extra: {extra}" # 2. Value type and length check for k in schema_keys: v = obj[k] if v is not None and not isinstance(v, str): return False, f"Type error at '{k}': expected string or null, got {type(v).__name__}" if isinstance(v, str): # Safety: reject excessively long strings (potential hallucination or prose injection) if len(v) > 800: return False, f"Value too long at '{k}' (>800 chars)" # Safety: reject code blocks or markdown artifacts if found if "```" in v: return False, f"Markdown artifact found at '{k}'" return True, "ok"