Spaces:
Sleeping
Sleeping
File size: 2,233 Bytes
318c7d3 63104ae 318c7d3 63104ae | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | import json
import jsonschema
def grade_repair(repaired_json: str, task: dict) -> tuple:
"""
Grade the agent's JSON repair. Returns (reward: float 0.0-1.0, info: dict)
Scoring:
- Valid JSON syntax: 0.40
- Schema compliance: 0.40
- Semantic correctness: 0.20
"""
score = 0.0
info = {"errors": [], "checks": {}}
# --- Check 1: Valid JSON syntax (40%) ---
try:
parsed = json.loads(repaired_json)
score += 0.40
info["checks"]["valid_json"] = True
except json.JSONDecodeError as e:
info["errors"].append(f"Invalid JSON syntax: {str(e)}")
info["checks"]["valid_json"] = False
info["final_score"] = 0.1
return 0.1, info
# --- Check 2: Schema compliance (40%) ---
try:
jsonschema.validate(instance=parsed, schema=task["schema"])
score += 0.40
info["checks"]["schema_valid"] = True
except jsonschema.ValidationError as e:
info["errors"].append(f"Schema validation failed: {e.message}")
info["checks"]["schema_valid"] = False
score += 0.05 # tiny partial credit for at least being valid JSON
# --- Check 3: Exact semantic match (20%) ---
try:
correct = json.loads(task["correct_json"])
if parsed == correct:
score += 0.20
info["checks"]["exact_match"] = True
else:
# Partial credit: how many required keys match correctly
required = task["schema"].get("required", [])
if required:
matched = sum(
1 for k in required
if k in parsed and k in correct and parsed[k] == correct[k]
)
partial = (matched / len(required)) * 0.10
score += partial
info["checks"]["exact_match"] = False
except Exception:
info["checks"]["exact_match"] = False
# CLIP TO (0, 1) RANGE: Hackathon requirements specify scores must be strictly between 0 and 1
# We use a base of 0.1 and a multiplier of 0.8 so that 0 becomes 0.1 and 1 becomes 0.9
final_score = round(0.1 + (score * 0.8), 4)
info["final_score"] = final_score
return final_score, info
|