Spaces:

build-small-hackathon
/

cq-test

Running on Zero

File size: 7,344 Bytes

e9fc2fc

"""
Test the game validation and repair pipeline.

Run this script to:
1. Test validation against hard rules
2. Test repair logic for failed validations
3. Validate generated games from the generator
4. Display validation results
"""

import json
from app.services.retrieval import load_games_dataset, normalize_game_record, retrieve_examples
from app.services.generator import generate_game
from app.services.validator import validate_game, repair_game


def main():
    print("\n" + "=" * 80)
    print("PHASE 2, TASK 7: GAME VALIDATION AND REPAIR")
    print("=" * 80)
    
    # Load dataset
    print("\n1. Loading dataset...")
    raw_records = load_games_dataset("app/data/games_dataset.json")
    normalized_records = [normalize_game_record(r) for r in raw_records]
    print(f"✓ Loaded {len(normalized_records)} records")
    
    # Test configs
    test_configs = [
        {
            "name": "Valid: Scavenger Hunt - Adults - Medium",
            "config": {
                "game_type": "scavenger_hunt",
                "city": "Paris",
                "area": "Le Marais",
                "location_type": "mixed",
                "duration_minutes": 60,
                "num_players": 4,
                "difficulty": "medium",
                "age_group": "adults"
            },
            "expect_valid": True
        },
        {
            "name": "Valid: Hide & Seek - Kids - Easy",
            "config": {
                "game_type": "hide_and_seek",
                "city": "Paris",
                "area": "Parc des Buttes-Chaumont",
                "location_type": "park",
                "duration_minutes": 45,
                "num_players": 5,
                "difficulty": "easy",
                "age_group": "kids"
            },
            "expect_valid": True
        },
        {
            "name": "Valid: Tag - Teens - Hard",
            "config": {
                "game_type": "tag",
                "city": "Paris",
                "area": "Jardins du Trocadéro",
                "location_type": "park",
                "duration_minutes": 30,
                "num_players": 8,
                "difficulty": "hard",
                "age_group": "teens"
            },
            "expect_valid": True
        }
    ]
    
    # Invalid game example for repair testing
    invalid_game = {
        "game_id": "invalid-test",
        "title": "Test Game",
        "theme": "test",
        "setup": {
            "city": "Paris",
            "area": "Test Area",
            "meeting_point": "Central",
            "duration_minutes": 60,
            "num_players": 4
        },
        "rules": ["Rule 1"],
        "tasks": [
            {
                "task_id": "t1",
                # Missing: title, description, location_hint, points, proof_type, hint, safety_note
                "description": None,
                "points": -5,  # Invalid: negative points
                "proof_type": "invalid_type"  # Invalid: not in enum
            }
        ],
        "global_hints": ["Hint"],
        "score_rules": [],  # Empty
        "tie_breaker": "",  # Empty
        "safety": {}  # Missing required fields
    }
    
    results = []
    
    # Test 1: Generated games validation
    print("\n" + "=" * 80)
    print("TEST SUITE 1: VALIDATING GENERATED GAMES")
    print("=" * 80)
    
    for test in test_configs:
        print(f"\n{test['name']}")
        print("-" * 80)
        
        config = test['config']
        
        # Generate game
        print("Retrieving similar games...")
        retrieved = retrieve_examples(config, normalized_records, k=3)
        
        print("Generating game...")
        game = generate_game(config, retrieved)
        
        # Validate
        print("Validating game...")
        is_valid, failures = validate_game(game, config)
        
        if is_valid:
            print(f"✓ VALID - Game {game['game_id']} passed all checks")
        else:
            print(f"✗ INVALID - {len(failures)} validation errors:")
            for failure in failures[:3]:
                print(f"  - {failure}")
            if len(failures) > 3:
                print(f"  ... and {len(failures) - 3} more errors")
        
        results.append({
            'name': test['name'],
            'valid': is_valid,
            'expected_valid': test['expect_valid'],
            'failures': failures
        })
    
    # Test 2: Invalid game repair
    print("\n" + "=" * 80)
    print("TEST SUITE 2: REPAIR OF INVALID GAMES")
    print("=" * 80)
    
    print("\nBefore repair:")
    config = test_configs[0]['config']
    is_valid_before, failures_before = validate_game(invalid_game, config)
    print(f"✗ Invalid with {len(failures_before)} errors:")
    for failure in failures_before[:5]:
        print(f"  - {failure}")
    if len(failures_before) > 5:
        print(f"  ... and {len(failures_before) - 5} more errors")
    
    print("\nRepairing game...")
    repaired_game = repair_game(invalid_game, failures_before, config)
    
    print("After repair:")
    is_valid_after, failures_after = validate_game(repaired_game, config)
    
    if is_valid_after:
        print(f"✓ VALID after repair")
        print(f"  Fixed {len(failures_before)} issues")
        print(f"  Game ID: {repaired_game['game_id']}")
        print(f"  Tasks: {len(repaired_game['tasks'])}")
        print(f"  Rules: {len(repaired_game['rules'])}")
        print(f"  Safety zone: {repaired_game['safety'].get('allowed_zone')}")
    else:
        print(f"✗ Still invalid with {len(failures_after)} errors:")
        for failure in failures_after[:3]:
            print(f"  - {failure}")
    
    # Summary
    print("\n" + "=" * 80)
    print("VALIDATION TEST SUMMARY")
    print("=" * 80)
    
    passed = sum(1 for r in results if r['valid'] == r['expected_valid'])
    total = len(results)
    
    print(f"\nGenerated games: {passed}/{total} passed expectations")
    for result in results:
        status = "✓ PASS" if result['valid'] == result['expected_valid'] else "✗ FAIL"
        valid_str = "VALID" if result['valid'] else "INVALID"
        print(f"{status}: {result['name']} → {valid_str}")
    
    print(f"\nRepair test: {'✓ PASS' if is_valid_after else '✗ FAIL'}")
    print(f"  Before: {len(failures_before)} failures")
    print(f"  After: {len(failures_after)} failures")
    
    print("\n" + "=" * 80)
    print("VALIDATION CHECKS IMPLEMENTED")
    print("=" * 80)
    print("""
✓ Structure validation
  - All required fields present
  - Correct field types
  
✓ Task requirements
  - Proof types in enum (photo|observation|text)
  - Positive points
  - Meaningful hints and safety notes
  
✓ Safety constraints
  - No forbidden behaviors (buildings, private areas)
  - Water hazards have explicit restrictions
  - Road hazards have explicit guidance
  - No illegal building access
  
✓ Age appropriateness
  - Kids/mixed games require adult supervision
  - Supervision mentioned in rules
  
✓ Realism checks
  - Task count matches duration (8-20 min per task)
  - Clear win conditions
  - Scoring rules present
  
✓ Repair automation
  - Adds missing fields with defaults
  - Fixes enum violations
  - Ensures positive values
  - Adjusts task count for duration
    """)


if __name__ == "__main__":
    main()