File size: 2,233 Bytes
318c7d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63104ae
 
318c7d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63104ae
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import json
import jsonschema


def grade_repair(repaired_json: str, task: dict) -> tuple:
    """
    Grade the agent's JSON repair. Returns (reward: float 0.0-1.0, info: dict)
    
    Scoring:
    - Valid JSON syntax:     0.40
    - Schema compliance:     0.40
    - Semantic correctness:  0.20
    """
    score = 0.0
    info = {"errors": [], "checks": {}}

    # --- Check 1: Valid JSON syntax (40%) ---
    try:
        parsed = json.loads(repaired_json)
        score += 0.40
        info["checks"]["valid_json"] = True
    except json.JSONDecodeError as e:
        info["errors"].append(f"Invalid JSON syntax: {str(e)}")
        info["checks"]["valid_json"] = False
        info["final_score"] = 0.1
        return 0.1, info

    # --- Check 2: Schema compliance (40%) ---
    try:
        jsonschema.validate(instance=parsed, schema=task["schema"])
        score += 0.40
        info["checks"]["schema_valid"] = True
    except jsonschema.ValidationError as e:
        info["errors"].append(f"Schema validation failed: {e.message}")
        info["checks"]["schema_valid"] = False
        score += 0.05  # tiny partial credit for at least being valid JSON

    # --- Check 3: Exact semantic match (20%) ---
    try:
        correct = json.loads(task["correct_json"])
        if parsed == correct:
            score += 0.20
            info["checks"]["exact_match"] = True
        else:
            # Partial credit: how many required keys match correctly
            required = task["schema"].get("required", [])
            if required:
                matched = sum(
                    1 for k in required
                    if k in parsed and k in correct and parsed[k] == correct[k]
                )
                partial = (matched / len(required)) * 0.10
                score += partial
            info["checks"]["exact_match"] = False
    except Exception:
        info["checks"]["exact_match"] = False

    # CLIP TO (0, 1) RANGE: Hackathon requirements specify scores must be strictly between 0 and 1
    # We use a base of 0.1 and a multiplier of 0.8 so that 0 becomes 0.1 and 1 becomes 0.9
    final_score = round(0.1 + (score * 0.8), 4)

    info["final_score"] = final_score
    return final_score, info