cq-test / test_generation.py
NANI-Nithin's picture
Phase 2: ship the core happy path
e9fc2fc
Raw
History Blame Contribute Delete
6.1 kB
"""
Test the game generation pipeline with retrieval and generation.
Run this script to:
1. Load and normalize the dataset
2. Retrieve similar games for a sample config
3. Generate a game using the generator
4. Validate the generated game against the schema
5. Display the results
"""
import json
from app.services.retrieval import load_games_dataset, normalize_game_record, retrieve_examples
from app.services.generator import generate_game, build_generation_prompt
from app.services.schema_validator import validate_game_schema
def main():
print("\n" + "=" * 80)
print("PHASE 2, TASK 6: GAME GENERATION TEST")
print("=" * 80)
# Load dataset
print("\n1. Loading dataset...")
raw_records = load_games_dataset("app/data/games_dataset.json")
normalized_records = [normalize_game_record(r) for r in raw_records]
print(f"✓ Loaded {len(normalized_records)} records")
# Test cases with different configs
test_configs = [
{
"name": "Scavenger Hunt - Adults - Medium - Le Marais",
"config": {
"game_type": "scavenger_hunt",
"city": "Paris",
"area": "Le Marais",
"location_type": "mixed",
"duration_minutes": 60,
"num_players": 4,
"difficulty": "medium",
"age_group": "adults",
"photo_enabled": True
}
},
{
"name": "Hide & Seek - Kids - Easy - Park",
"config": {
"game_type": "hide_and_seek",
"city": "Paris",
"area": "Parc des Buttes-Chaumont",
"location_type": "park",
"duration_minutes": 45,
"num_players": 5,
"difficulty": "easy",
"age_group": "kids",
"photo_enabled": False
}
},
{
"name": "Tag - Teens - Hard - Trocadéro",
"config": {
"game_type": "tag",
"city": "Paris",
"area": "Jardins du Trocadéro",
"location_type": "park",
"duration_minutes": 30,
"num_players": 8,
"difficulty": "hard",
"age_group": "teens",
"photo_enabled": False
}
}
]
# Test each config
results = []
for test in test_configs:
print("\n" + "=" * 80)
print(f"TEST: {test['name']}")
print("=" * 80)
config = test['config']
# Step 1: Retrieve similar games
print("\n2. Retrieving similar games...")
retrieved = retrieve_examples(config, normalized_records, k=3)
print(f"✓ Retrieved {len(retrieved)} similar games:")
for i, ex in enumerate(retrieved, 1):
print(f" {i}. {ex['id']} (score: {ex['retrieval_score']:.1f})")
# Step 2: Show prompt snippet
print("\n3. Building generation prompt...")
prompt = build_generation_prompt(config, retrieved)
print(f"✓ Prompt built ({len(prompt)} chars)")
print(f"\nPrompt preview:")
print(prompt[:300] + "...\n")
# Step 3: Generate game
print("4. Generating game...")
try:
game = generate_game(config, retrieved)
print(f"✓ Game generated: {game['game_id']}")
# Step 4: Validate against schema
print("\n5. Validating against schema...")
is_valid, errors = validate_game_schema(game)
if is_valid:
print("✓ Game VALID against schema")
else:
print(f"✗ Game INVALID - {len(errors)} errors:")
for error in errors[:3]:
print(f" - {error}")
# Step 5: Display game details
print("\n6. Generated Game Details:")
print(f" ID: {game['game_id']}")
print(f" Title: {game['title']}")
print(f" Theme: {game['theme']}")
print(f" Area: {game['setup']['area']}")
print(f" Duration: {game['setup']['duration_minutes']} min | Players: {game['setup']['num_players']}")
print(f" Rules: {len(game['rules'])} rules")
print(f" Tasks: {len(game['tasks'])} tasks")
print(f" Safety: Adult supervision = {game['safety']['adult_supervision']}")
print(f" Story tone: {game['story_seed']['tone']}")
# Show first 2 tasks
print(f"\n First 2 Tasks:")
for task in game['tasks'][:2]:
print(f" • {task['task_id']}: {task['title']}")
print(f" Points: {task['points']} | Proof: {task['proof_type']} | Time: {task['time_limit_minutes']} min")
results.append({
'config_name': test['name'],
'valid': is_valid,
'game_id': game['game_id'],
'error_count': len(errors)
})
except Exception as e:
print(f"✗ Generation failed: {e}")
results.append({
'config_name': test['name'],
'valid': False,
'game_id': None,
'error_count': 1
})
# Summary
print("\n" + "=" * 80)
print("TEST SUMMARY")
print("=" * 80)
for result in results:
status = "✓ PASS" if result['valid'] else "✗ FAIL"
print(f"{status}: {result['config_name']}")
if result['game_id']:
print(f" Generated: {result['game_id']}")
if result['error_count'] > 0:
print(f" Errors: {result['error_count']}")
total_pass = sum(1 for r in results if r['valid'])
print(f"\nTotal: {total_pass}/{len(results)} tests passed")
print("\n" + "=" * 80)
print("GENERATION READY FOR PHASE 2 TASK 7: VALIDATION")
print("=" * 80)
if __name__ == "__main__":
main()