""" Test the game validation and repair pipeline. Run this script to: 1. Test validation against hard rules 2. Test repair logic for failed validations 3. Validate generated games from the generator 4. Display validation results """ import json from app.services.retrieval import load_games_dataset, normalize_game_record, retrieve_examples from app.services.generator import generate_game from app.services.validator import validate_game, repair_game def main(): print("\n" + "=" * 80) print("PHASE 2, TASK 7: GAME VALIDATION AND REPAIR") print("=" * 80) # Load dataset print("\n1. Loading dataset...") raw_records = load_games_dataset("app/data/games_dataset.json") normalized_records = [normalize_game_record(r) for r in raw_records] print(f"✓ Loaded {len(normalized_records)} records") # Test configs test_configs = [ { "name": "Valid: Scavenger Hunt - Adults - Medium", "config": { "game_type": "scavenger_hunt", "city": "Paris", "area": "Le Marais", "location_type": "mixed", "duration_minutes": 60, "num_players": 4, "difficulty": "medium", "age_group": "adults" }, "expect_valid": True }, { "name": "Valid: Hide & Seek - Kids - Easy", "config": { "game_type": "hide_and_seek", "city": "Paris", "area": "Parc des Buttes-Chaumont", "location_type": "park", "duration_minutes": 45, "num_players": 5, "difficulty": "easy", "age_group": "kids" }, "expect_valid": True }, { "name": "Valid: Tag - Teens - Hard", "config": { "game_type": "tag", "city": "Paris", "area": "Jardins du Trocadéro", "location_type": "park", "duration_minutes": 30, "num_players": 8, "difficulty": "hard", "age_group": "teens" }, "expect_valid": True } ] # Invalid game example for repair testing invalid_game = { "game_id": "invalid-test", "title": "Test Game", "theme": "test", "setup": { "city": "Paris", "area": "Test Area", "meeting_point": "Central", "duration_minutes": 60, "num_players": 4 }, "rules": ["Rule 1"], "tasks": [ { "task_id": "t1", # Missing: title, description, location_hint, points, proof_type, hint, safety_note "description": None, "points": -5, # Invalid: negative points "proof_type": "invalid_type" # Invalid: not in enum } ], "global_hints": ["Hint"], "score_rules": [], # Empty "tie_breaker": "", # Empty "safety": {} # Missing required fields } results = [] # Test 1: Generated games validation print("\n" + "=" * 80) print("TEST SUITE 1: VALIDATING GENERATED GAMES") print("=" * 80) for test in test_configs: print(f"\n{test['name']}") print("-" * 80) config = test['config'] # Generate game print("Retrieving similar games...") retrieved = retrieve_examples(config, normalized_records, k=3) print("Generating game...") game = generate_game(config, retrieved) # Validate print("Validating game...") is_valid, failures = validate_game(game, config) if is_valid: print(f"✓ VALID - Game {game['game_id']} passed all checks") else: print(f"✗ INVALID - {len(failures)} validation errors:") for failure in failures[:3]: print(f" - {failure}") if len(failures) > 3: print(f" ... and {len(failures) - 3} more errors") results.append({ 'name': test['name'], 'valid': is_valid, 'expected_valid': test['expect_valid'], 'failures': failures }) # Test 2: Invalid game repair print("\n" + "=" * 80) print("TEST SUITE 2: REPAIR OF INVALID GAMES") print("=" * 80) print("\nBefore repair:") config = test_configs[0]['config'] is_valid_before, failures_before = validate_game(invalid_game, config) print(f"✗ Invalid with {len(failures_before)} errors:") for failure in failures_before[:5]: print(f" - {failure}") if len(failures_before) > 5: print(f" ... and {len(failures_before) - 5} more errors") print("\nRepairing game...") repaired_game = repair_game(invalid_game, failures_before, config) print("After repair:") is_valid_after, failures_after = validate_game(repaired_game, config) if is_valid_after: print(f"✓ VALID after repair") print(f" Fixed {len(failures_before)} issues") print(f" Game ID: {repaired_game['game_id']}") print(f" Tasks: {len(repaired_game['tasks'])}") print(f" Rules: {len(repaired_game['rules'])}") print(f" Safety zone: {repaired_game['safety'].get('allowed_zone')}") else: print(f"✗ Still invalid with {len(failures_after)} errors:") for failure in failures_after[:3]: print(f" - {failure}") # Summary print("\n" + "=" * 80) print("VALIDATION TEST SUMMARY") print("=" * 80) passed = sum(1 for r in results if r['valid'] == r['expected_valid']) total = len(results) print(f"\nGenerated games: {passed}/{total} passed expectations") for result in results: status = "✓ PASS" if result['valid'] == result['expected_valid'] else "✗ FAIL" valid_str = "VALID" if result['valid'] else "INVALID" print(f"{status}: {result['name']} → {valid_str}") print(f"\nRepair test: {'✓ PASS' if is_valid_after else '✗ FAIL'}") print(f" Before: {len(failures_before)} failures") print(f" After: {len(failures_after)} failures") print("\n" + "=" * 80) print("VALIDATION CHECKS IMPLEMENTED") print("=" * 80) print(""" ✓ Structure validation - All required fields present - Correct field types ✓ Task requirements - Proof types in enum (photo|observation|text) - Positive points - Meaningful hints and safety notes ✓ Safety constraints - No forbidden behaviors (buildings, private areas) - Water hazards have explicit restrictions - Road hazards have explicit guidance - No illegal building access ✓ Age appropriateness - Kids/mixed games require adult supervision - Supervision mentioned in rules ✓ Realism checks - Task count matches duration (8-20 min per task) - Clear win conditions - Scoring rules present ✓ Repair automation - Adds missing fields with defaults - Fixes enum violations - Ensures positive values - Adjusts task count for duration """) if __name__ == "__main__": main()