File size: 1,138 Bytes
6d07351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from typing import Dict, List, Any, Tuple

def validate_schema_fill(obj: Dict[str, Any], schema_keys: List[str]) -> Tuple[bool, str]:
    """
    Strictly validate a mined JSON object against a target schema.
    """
    # 1. Key set check
    if set(obj.keys()) != set(schema_keys):
        missing = set(schema_keys) - set(obj.keys())
        extra = set(obj.keys()) - set(schema_keys)
        return False, f"Schema mismatch. Missing: {missing}, Extra: {extra}"

    # 2. Value type and length check
    for k in schema_keys:
        v = obj[k]
        if v is not None and not isinstance(v, str):
            return False, f"Type error at '{k}': expected string or null, got {type(v).__name__}"
        
        if isinstance(v, str):
            # Safety: reject excessively long strings (potential hallucination or prose injection)
            if len(v) > 800:
                return False, f"Value too long at '{k}' (>800 chars)"
            
            # Safety: reject code blocks or markdown artifacts if found
            if "```" in v:
                return False, f"Markdown artifact found at '{k}'"

    return True, "ok"