Spaces:
Running on Zero
Running on Zero
| """ | |
| Test the game validation and repair pipeline. | |
| Run this script to: | |
| 1. Test validation against hard rules | |
| 2. Test repair logic for failed validations | |
| 3. Validate generated games from the generator | |
| 4. Display validation results | |
| """ | |
| import json | |
| from app.services.retrieval import load_games_dataset, normalize_game_record, retrieve_examples | |
| from app.services.generator import generate_game | |
| from app.services.validator import validate_game, repair_game | |
| def main(): | |
| print("\n" + "=" * 80) | |
| print("PHASE 2, TASK 7: GAME VALIDATION AND REPAIR") | |
| print("=" * 80) | |
| # Load dataset | |
| print("\n1. Loading dataset...") | |
| raw_records = load_games_dataset("app/data/games_dataset.json") | |
| normalized_records = [normalize_game_record(r) for r in raw_records] | |
| print(f"β Loaded {len(normalized_records)} records") | |
| # Test configs | |
| test_configs = [ | |
| { | |
| "name": "Valid: Scavenger Hunt - Adults - Medium", | |
| "config": { | |
| "game_type": "scavenger_hunt", | |
| "city": "Paris", | |
| "area": "Le Marais", | |
| "location_type": "mixed", | |
| "duration_minutes": 60, | |
| "num_players": 4, | |
| "difficulty": "medium", | |
| "age_group": "adults" | |
| }, | |
| "expect_valid": True | |
| }, | |
| { | |
| "name": "Valid: Hide & Seek - Kids - Easy", | |
| "config": { | |
| "game_type": "hide_and_seek", | |
| "city": "Paris", | |
| "area": "Parc des Buttes-Chaumont", | |
| "location_type": "park", | |
| "duration_minutes": 45, | |
| "num_players": 5, | |
| "difficulty": "easy", | |
| "age_group": "kids" | |
| }, | |
| "expect_valid": True | |
| }, | |
| { | |
| "name": "Valid: Tag - Teens - Hard", | |
| "config": { | |
| "game_type": "tag", | |
| "city": "Paris", | |
| "area": "Jardins du TrocadΓ©ro", | |
| "location_type": "park", | |
| "duration_minutes": 30, | |
| "num_players": 8, | |
| "difficulty": "hard", | |
| "age_group": "teens" | |
| }, | |
| "expect_valid": True | |
| } | |
| ] | |
| # Invalid game example for repair testing | |
| invalid_game = { | |
| "game_id": "invalid-test", | |
| "title": "Test Game", | |
| "theme": "test", | |
| "setup": { | |
| "city": "Paris", | |
| "area": "Test Area", | |
| "meeting_point": "Central", | |
| "duration_minutes": 60, | |
| "num_players": 4 | |
| }, | |
| "rules": ["Rule 1"], | |
| "tasks": [ | |
| { | |
| "task_id": "t1", | |
| # Missing: title, description, location_hint, points, proof_type, hint, safety_note | |
| "description": None, | |
| "points": -5, # Invalid: negative points | |
| "proof_type": "invalid_type" # Invalid: not in enum | |
| } | |
| ], | |
| "global_hints": ["Hint"], | |
| "score_rules": [], # Empty | |
| "tie_breaker": "", # Empty | |
| "safety": {} # Missing required fields | |
| } | |
| results = [] | |
| # Test 1: Generated games validation | |
| print("\n" + "=" * 80) | |
| print("TEST SUITE 1: VALIDATING GENERATED GAMES") | |
| print("=" * 80) | |
| for test in test_configs: | |
| print(f"\n{test['name']}") | |
| print("-" * 80) | |
| config = test['config'] | |
| # Generate game | |
| print("Retrieving similar games...") | |
| retrieved = retrieve_examples(config, normalized_records, k=3) | |
| print("Generating game...") | |
| game = generate_game(config, retrieved) | |
| # Validate | |
| print("Validating game...") | |
| is_valid, failures = validate_game(game, config) | |
| if is_valid: | |
| print(f"β VALID - Game {game['game_id']} passed all checks") | |
| else: | |
| print(f"β INVALID - {len(failures)} validation errors:") | |
| for failure in failures[:3]: | |
| print(f" - {failure}") | |
| if len(failures) > 3: | |
| print(f" ... and {len(failures) - 3} more errors") | |
| results.append({ | |
| 'name': test['name'], | |
| 'valid': is_valid, | |
| 'expected_valid': test['expect_valid'], | |
| 'failures': failures | |
| }) | |
| # Test 2: Invalid game repair | |
| print("\n" + "=" * 80) | |
| print("TEST SUITE 2: REPAIR OF INVALID GAMES") | |
| print("=" * 80) | |
| print("\nBefore repair:") | |
| config = test_configs[0]['config'] | |
| is_valid_before, failures_before = validate_game(invalid_game, config) | |
| print(f"β Invalid with {len(failures_before)} errors:") | |
| for failure in failures_before[:5]: | |
| print(f" - {failure}") | |
| if len(failures_before) > 5: | |
| print(f" ... and {len(failures_before) - 5} more errors") | |
| print("\nRepairing game...") | |
| repaired_game = repair_game(invalid_game, failures_before, config) | |
| print("After repair:") | |
| is_valid_after, failures_after = validate_game(repaired_game, config) | |
| if is_valid_after: | |
| print(f"β VALID after repair") | |
| print(f" Fixed {len(failures_before)} issues") | |
| print(f" Game ID: {repaired_game['game_id']}") | |
| print(f" Tasks: {len(repaired_game['tasks'])}") | |
| print(f" Rules: {len(repaired_game['rules'])}") | |
| print(f" Safety zone: {repaired_game['safety'].get('allowed_zone')}") | |
| else: | |
| print(f"β Still invalid with {len(failures_after)} errors:") | |
| for failure in failures_after[:3]: | |
| print(f" - {failure}") | |
| # Summary | |
| print("\n" + "=" * 80) | |
| print("VALIDATION TEST SUMMARY") | |
| print("=" * 80) | |
| passed = sum(1 for r in results if r['valid'] == r['expected_valid']) | |
| total = len(results) | |
| print(f"\nGenerated games: {passed}/{total} passed expectations") | |
| for result in results: | |
| status = "β PASS" if result['valid'] == result['expected_valid'] else "β FAIL" | |
| valid_str = "VALID" if result['valid'] else "INVALID" | |
| print(f"{status}: {result['name']} β {valid_str}") | |
| print(f"\nRepair test: {'β PASS' if is_valid_after else 'β FAIL'}") | |
| print(f" Before: {len(failures_before)} failures") | |
| print(f" After: {len(failures_after)} failures") | |
| print("\n" + "=" * 80) | |
| print("VALIDATION CHECKS IMPLEMENTED") | |
| print("=" * 80) | |
| print(""" | |
| β Structure validation | |
| - All required fields present | |
| - Correct field types | |
| β Task requirements | |
| - Proof types in enum (photo|observation|text) | |
| - Positive points | |
| - Meaningful hints and safety notes | |
| β Safety constraints | |
| - No forbidden behaviors (buildings, private areas) | |
| - Water hazards have explicit restrictions | |
| - Road hazards have explicit guidance | |
| - No illegal building access | |
| β Age appropriateness | |
| - Kids/mixed games require adult supervision | |
| - Supervision mentioned in rules | |
| β Realism checks | |
| - Task count matches duration (8-20 min per task) | |
| - Clear win conditions | |
| - Scoring rules present | |
| β Repair automation | |
| - Adds missing fields with defaults | |
| - Fixes enum violations | |
| - Ensures positive values | |
| - Adjusts task count for duration | |
| """) | |
| if __name__ == "__main__": | |
| main() | |