cq-test / test_validation.py
NANI-Nithin's picture
Phase 2: ship the core happy path
e9fc2fc
Raw
History Blame Contribute Delete
7.34 kB
"""
Test the game validation and repair pipeline.
Run this script to:
1. Test validation against hard rules
2. Test repair logic for failed validations
3. Validate generated games from the generator
4. Display validation results
"""
import json
from app.services.retrieval import load_games_dataset, normalize_game_record, retrieve_examples
from app.services.generator import generate_game
from app.services.validator import validate_game, repair_game
def main():
print("\n" + "=" * 80)
print("PHASE 2, TASK 7: GAME VALIDATION AND REPAIR")
print("=" * 80)
# Load dataset
print("\n1. Loading dataset...")
raw_records = load_games_dataset("app/data/games_dataset.json")
normalized_records = [normalize_game_record(r) for r in raw_records]
print(f"βœ“ Loaded {len(normalized_records)} records")
# Test configs
test_configs = [
{
"name": "Valid: Scavenger Hunt - Adults - Medium",
"config": {
"game_type": "scavenger_hunt",
"city": "Paris",
"area": "Le Marais",
"location_type": "mixed",
"duration_minutes": 60,
"num_players": 4,
"difficulty": "medium",
"age_group": "adults"
},
"expect_valid": True
},
{
"name": "Valid: Hide & Seek - Kids - Easy",
"config": {
"game_type": "hide_and_seek",
"city": "Paris",
"area": "Parc des Buttes-Chaumont",
"location_type": "park",
"duration_minutes": 45,
"num_players": 5,
"difficulty": "easy",
"age_group": "kids"
},
"expect_valid": True
},
{
"name": "Valid: Tag - Teens - Hard",
"config": {
"game_type": "tag",
"city": "Paris",
"area": "Jardins du TrocadΓ©ro",
"location_type": "park",
"duration_minutes": 30,
"num_players": 8,
"difficulty": "hard",
"age_group": "teens"
},
"expect_valid": True
}
]
# Invalid game example for repair testing
invalid_game = {
"game_id": "invalid-test",
"title": "Test Game",
"theme": "test",
"setup": {
"city": "Paris",
"area": "Test Area",
"meeting_point": "Central",
"duration_minutes": 60,
"num_players": 4
},
"rules": ["Rule 1"],
"tasks": [
{
"task_id": "t1",
# Missing: title, description, location_hint, points, proof_type, hint, safety_note
"description": None,
"points": -5, # Invalid: negative points
"proof_type": "invalid_type" # Invalid: not in enum
}
],
"global_hints": ["Hint"],
"score_rules": [], # Empty
"tie_breaker": "", # Empty
"safety": {} # Missing required fields
}
results = []
# Test 1: Generated games validation
print("\n" + "=" * 80)
print("TEST SUITE 1: VALIDATING GENERATED GAMES")
print("=" * 80)
for test in test_configs:
print(f"\n{test['name']}")
print("-" * 80)
config = test['config']
# Generate game
print("Retrieving similar games...")
retrieved = retrieve_examples(config, normalized_records, k=3)
print("Generating game...")
game = generate_game(config, retrieved)
# Validate
print("Validating game...")
is_valid, failures = validate_game(game, config)
if is_valid:
print(f"βœ“ VALID - Game {game['game_id']} passed all checks")
else:
print(f"βœ— INVALID - {len(failures)} validation errors:")
for failure in failures[:3]:
print(f" - {failure}")
if len(failures) > 3:
print(f" ... and {len(failures) - 3} more errors")
results.append({
'name': test['name'],
'valid': is_valid,
'expected_valid': test['expect_valid'],
'failures': failures
})
# Test 2: Invalid game repair
print("\n" + "=" * 80)
print("TEST SUITE 2: REPAIR OF INVALID GAMES")
print("=" * 80)
print("\nBefore repair:")
config = test_configs[0]['config']
is_valid_before, failures_before = validate_game(invalid_game, config)
print(f"βœ— Invalid with {len(failures_before)} errors:")
for failure in failures_before[:5]:
print(f" - {failure}")
if len(failures_before) > 5:
print(f" ... and {len(failures_before) - 5} more errors")
print("\nRepairing game...")
repaired_game = repair_game(invalid_game, failures_before, config)
print("After repair:")
is_valid_after, failures_after = validate_game(repaired_game, config)
if is_valid_after:
print(f"βœ“ VALID after repair")
print(f" Fixed {len(failures_before)} issues")
print(f" Game ID: {repaired_game['game_id']}")
print(f" Tasks: {len(repaired_game['tasks'])}")
print(f" Rules: {len(repaired_game['rules'])}")
print(f" Safety zone: {repaired_game['safety'].get('allowed_zone')}")
else:
print(f"βœ— Still invalid with {len(failures_after)} errors:")
for failure in failures_after[:3]:
print(f" - {failure}")
# Summary
print("\n" + "=" * 80)
print("VALIDATION TEST SUMMARY")
print("=" * 80)
passed = sum(1 for r in results if r['valid'] == r['expected_valid'])
total = len(results)
print(f"\nGenerated games: {passed}/{total} passed expectations")
for result in results:
status = "βœ“ PASS" if result['valid'] == result['expected_valid'] else "βœ— FAIL"
valid_str = "VALID" if result['valid'] else "INVALID"
print(f"{status}: {result['name']} β†’ {valid_str}")
print(f"\nRepair test: {'βœ“ PASS' if is_valid_after else 'βœ— FAIL'}")
print(f" Before: {len(failures_before)} failures")
print(f" After: {len(failures_after)} failures")
print("\n" + "=" * 80)
print("VALIDATION CHECKS IMPLEMENTED")
print("=" * 80)
print("""
βœ“ Structure validation
- All required fields present
- Correct field types
βœ“ Task requirements
- Proof types in enum (photo|observation|text)
- Positive points
- Meaningful hints and safety notes
βœ“ Safety constraints
- No forbidden behaviors (buildings, private areas)
- Water hazards have explicit restrictions
- Road hazards have explicit guidance
- No illegal building access
βœ“ Age appropriateness
- Kids/mixed games require adult supervision
- Supervision mentioned in rules
βœ“ Realism checks
- Task count matches duration (8-20 min per task)
- Clear win conditions
- Scoring rules present
βœ“ Repair automation
- Adds missing fields with defaults
- Fixes enum violations
- Ensures positive values
- Adjusts task count for duration
""")
if __name__ == "__main__":
main()