File size: 7,344 Bytes
e9fc2fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
"""
Test the game validation and repair pipeline.

Run this script to:
1. Test validation against hard rules
2. Test repair logic for failed validations
3. Validate generated games from the generator
4. Display validation results
"""

import json
from app.services.retrieval import load_games_dataset, normalize_game_record, retrieve_examples
from app.services.generator import generate_game
from app.services.validator import validate_game, repair_game


def main():
    print("\n" + "=" * 80)
    print("PHASE 2, TASK 7: GAME VALIDATION AND REPAIR")
    print("=" * 80)
    
    # Load dataset
    print("\n1. Loading dataset...")
    raw_records = load_games_dataset("app/data/games_dataset.json")
    normalized_records = [normalize_game_record(r) for r in raw_records]
    print(f"βœ“ Loaded {len(normalized_records)} records")
    
    # Test configs
    test_configs = [
        {
            "name": "Valid: Scavenger Hunt - Adults - Medium",
            "config": {
                "game_type": "scavenger_hunt",
                "city": "Paris",
                "area": "Le Marais",
                "location_type": "mixed",
                "duration_minutes": 60,
                "num_players": 4,
                "difficulty": "medium",
                "age_group": "adults"
            },
            "expect_valid": True
        },
        {
            "name": "Valid: Hide & Seek - Kids - Easy",
            "config": {
                "game_type": "hide_and_seek",
                "city": "Paris",
                "area": "Parc des Buttes-Chaumont",
                "location_type": "park",
                "duration_minutes": 45,
                "num_players": 5,
                "difficulty": "easy",
                "age_group": "kids"
            },
            "expect_valid": True
        },
        {
            "name": "Valid: Tag - Teens - Hard",
            "config": {
                "game_type": "tag",
                "city": "Paris",
                "area": "Jardins du TrocadΓ©ro",
                "location_type": "park",
                "duration_minutes": 30,
                "num_players": 8,
                "difficulty": "hard",
                "age_group": "teens"
            },
            "expect_valid": True
        }
    ]
    
    # Invalid game example for repair testing
    invalid_game = {
        "game_id": "invalid-test",
        "title": "Test Game",
        "theme": "test",
        "setup": {
            "city": "Paris",
            "area": "Test Area",
            "meeting_point": "Central",
            "duration_minutes": 60,
            "num_players": 4
        },
        "rules": ["Rule 1"],
        "tasks": [
            {
                "task_id": "t1",
                # Missing: title, description, location_hint, points, proof_type, hint, safety_note
                "description": None,
                "points": -5,  # Invalid: negative points
                "proof_type": "invalid_type"  # Invalid: not in enum
            }
        ],
        "global_hints": ["Hint"],
        "score_rules": [],  # Empty
        "tie_breaker": "",  # Empty
        "safety": {}  # Missing required fields
    }
    
    results = []
    
    # Test 1: Generated games validation
    print("\n" + "=" * 80)
    print("TEST SUITE 1: VALIDATING GENERATED GAMES")
    print("=" * 80)
    
    for test in test_configs:
        print(f"\n{test['name']}")
        print("-" * 80)
        
        config = test['config']
        
        # Generate game
        print("Retrieving similar games...")
        retrieved = retrieve_examples(config, normalized_records, k=3)
        
        print("Generating game...")
        game = generate_game(config, retrieved)
        
        # Validate
        print("Validating game...")
        is_valid, failures = validate_game(game, config)
        
        if is_valid:
            print(f"βœ“ VALID - Game {game['game_id']} passed all checks")
        else:
            print(f"βœ— INVALID - {len(failures)} validation errors:")
            for failure in failures[:3]:
                print(f"  - {failure}")
            if len(failures) > 3:
                print(f"  ... and {len(failures) - 3} more errors")
        
        results.append({
            'name': test['name'],
            'valid': is_valid,
            'expected_valid': test['expect_valid'],
            'failures': failures
        })
    
    # Test 2: Invalid game repair
    print("\n" + "=" * 80)
    print("TEST SUITE 2: REPAIR OF INVALID GAMES")
    print("=" * 80)
    
    print("\nBefore repair:")
    config = test_configs[0]['config']
    is_valid_before, failures_before = validate_game(invalid_game, config)
    print(f"βœ— Invalid with {len(failures_before)} errors:")
    for failure in failures_before[:5]:
        print(f"  - {failure}")
    if len(failures_before) > 5:
        print(f"  ... and {len(failures_before) - 5} more errors")
    
    print("\nRepairing game...")
    repaired_game = repair_game(invalid_game, failures_before, config)
    
    print("After repair:")
    is_valid_after, failures_after = validate_game(repaired_game, config)
    
    if is_valid_after:
        print(f"βœ“ VALID after repair")
        print(f"  Fixed {len(failures_before)} issues")
        print(f"  Game ID: {repaired_game['game_id']}")
        print(f"  Tasks: {len(repaired_game['tasks'])}")
        print(f"  Rules: {len(repaired_game['rules'])}")
        print(f"  Safety zone: {repaired_game['safety'].get('allowed_zone')}")
    else:
        print(f"βœ— Still invalid with {len(failures_after)} errors:")
        for failure in failures_after[:3]:
            print(f"  - {failure}")
    
    # Summary
    print("\n" + "=" * 80)
    print("VALIDATION TEST SUMMARY")
    print("=" * 80)
    
    passed = sum(1 for r in results if r['valid'] == r['expected_valid'])
    total = len(results)
    
    print(f"\nGenerated games: {passed}/{total} passed expectations")
    for result in results:
        status = "βœ“ PASS" if result['valid'] == result['expected_valid'] else "βœ— FAIL"
        valid_str = "VALID" if result['valid'] else "INVALID"
        print(f"{status}: {result['name']} β†’ {valid_str}")
    
    print(f"\nRepair test: {'βœ“ PASS' if is_valid_after else 'βœ— FAIL'}")
    print(f"  Before: {len(failures_before)} failures")
    print(f"  After: {len(failures_after)} failures")
    
    print("\n" + "=" * 80)
    print("VALIDATION CHECKS IMPLEMENTED")
    print("=" * 80)
    print("""
βœ“ Structure validation
  - All required fields present
  - Correct field types
  
βœ“ Task requirements
  - Proof types in enum (photo|observation|text)
  - Positive points
  - Meaningful hints and safety notes
  
βœ“ Safety constraints
  - No forbidden behaviors (buildings, private areas)
  - Water hazards have explicit restrictions
  - Road hazards have explicit guidance
  - No illegal building access
  
βœ“ Age appropriateness
  - Kids/mixed games require adult supervision
  - Supervision mentioned in rules
  
βœ“ Realism checks
  - Task count matches duration (8-20 min per task)
  - Clear win conditions
  - Scoring rules present
  
βœ“ Repair automation
  - Adds missing fields with defaults
  - Fixes enum violations
  - Ensures positive values
  - Adjusts task count for duration
    """)


if __name__ == "__main__":
    main()