Spaces:
Sleeping
Sleeping
| import json | |
| import jsonschema | |
| def grade_repair(repaired_json: str, task: dict) -> tuple: | |
| """ | |
| Grade the agent's JSON repair. Returns (reward: float 0.0-1.0, info: dict) | |
| Scoring: | |
| - Valid JSON syntax: 0.40 | |
| - Schema compliance: 0.40 | |
| - Semantic correctness: 0.20 | |
| """ | |
| score = 0.0 | |
| info = {"errors": [], "checks": {}} | |
| # --- Check 1: Valid JSON syntax (40%) --- | |
| try: | |
| parsed = json.loads(repaired_json) | |
| score += 0.40 | |
| info["checks"]["valid_json"] = True | |
| except json.JSONDecodeError as e: | |
| info["errors"].append(f"Invalid JSON syntax: {str(e)}") | |
| info["checks"]["valid_json"] = False | |
| info["final_score"] = 0.1 | |
| return 0.1, info | |
| # --- Check 2: Schema compliance (40%) --- | |
| try: | |
| jsonschema.validate(instance=parsed, schema=task["schema"]) | |
| score += 0.40 | |
| info["checks"]["schema_valid"] = True | |
| except jsonschema.ValidationError as e: | |
| info["errors"].append(f"Schema validation failed: {e.message}") | |
| info["checks"]["schema_valid"] = False | |
| score += 0.05 # tiny partial credit for at least being valid JSON | |
| # --- Check 3: Exact semantic match (20%) --- | |
| try: | |
| correct = json.loads(task["correct_json"]) | |
| if parsed == correct: | |
| score += 0.20 | |
| info["checks"]["exact_match"] = True | |
| else: | |
| # Partial credit: how many required keys match correctly | |
| required = task["schema"].get("required", []) | |
| if required: | |
| matched = sum( | |
| 1 for k in required | |
| if k in parsed and k in correct and parsed[k] == correct[k] | |
| ) | |
| partial = (matched / len(required)) * 0.10 | |
| score += partial | |
| info["checks"]["exact_match"] = False | |
| except Exception: | |
| info["checks"]["exact_match"] = False | |
| # CLIP TO (0, 1) RANGE: Hackathon requirements specify scores must be strictly between 0 and 1 | |
| # We use a base of 0.1 and a multiplier of 0.8 so that 0 becomes 0.1 and 1 becomes 0.9 | |
| final_score = round(0.1 + (score * 0.8), 4) | |
| info["final_score"] = final_score | |
| return final_score, info | |