Spaces:

build-small-hackathon
/

cq-test

Running on Zero

App Files Files Community

cq-test / test_validation.py

NANI-Nithin

Phase 2: ship the core happy path

e9fc2fc 24 days ago

Raw

History Blame Contribute Delete

7.34 kB

	"""
	Test the game validation and repair pipeline.

	Run this script to:
	1. Test validation against hard rules
	2. Test repair logic for failed validations
	3. Validate generated games from the generator
	4. Display validation results
	"""

	import json
	from app.services.retrieval import load_games_dataset, normalize_game_record, retrieve_examples
	from app.services.generator import generate_game
	from app.services.validator import validate_game, repair_game


	def main():
	print("\n" + "=" * 80)
	print("PHASE 2, TASK 7: GAME VALIDATION AND REPAIR")
	print("=" * 80)

	# Load dataset
	print("\n1. Loading dataset...")
	raw_records = load_games_dataset("app/data/games_dataset.json")
	normalized_records = [normalize_game_record(r) for r in raw_records]
	print(f"✓ Loaded {len(normalized_records)} records")

	# Test configs
	test_configs = [
	{
	"name": "Valid: Scavenger Hunt - Adults - Medium",
	"config": {
	"game_type": "scavenger_hunt",
	"city": "Paris",
	"area": "Le Marais",
	"location_type": "mixed",
	"duration_minutes": 60,
	"num_players": 4,
	"difficulty": "medium",
	"age_group": "adults"
	},
	"expect_valid": True
	},
	{
	"name": "Valid: Hide & Seek - Kids - Easy",
	"config": {
	"game_type": "hide_and_seek",
	"city": "Paris",
	"area": "Parc des Buttes-Chaumont",
	"location_type": "park",
	"duration_minutes": 45,
	"num_players": 5,
	"difficulty": "easy",
	"age_group": "kids"
	},
	"expect_valid": True
	},
	{
	"name": "Valid: Tag - Teens - Hard",
	"config": {
	"game_type": "tag",
	"city": "Paris",
	"area": "Jardins du Trocadéro",
	"location_type": "park",
	"duration_minutes": 30,
	"num_players": 8,
	"difficulty": "hard",
	"age_group": "teens"
	},
	"expect_valid": True
	}
	]

	# Invalid game example for repair testing
	invalid_game = {
	"game_id": "invalid-test",
	"title": "Test Game",
	"theme": "test",
	"setup": {
	"city": "Paris",
	"area": "Test Area",
	"meeting_point": "Central",
	"duration_minutes": 60,
	"num_players": 4
	},
	"rules": ["Rule 1"],
	"tasks": [
	{
	"task_id": "t1",
	# Missing: title, description, location_hint, points, proof_type, hint, safety_note
	"description": None,
	"points": -5, # Invalid: negative points
	"proof_type": "invalid_type" # Invalid: not in enum
	}
	],
	"global_hints": ["Hint"],
	"score_rules": [], # Empty
	"tie_breaker": "", # Empty
	"safety": {} # Missing required fields
	}

	results = []

	# Test 1: Generated games validation
	print("\n" + "=" * 80)
	print("TEST SUITE 1: VALIDATING GENERATED GAMES")
	print("=" * 80)

	for test in test_configs:
	print(f"\n{test['name']}")
	print("-" * 80)

	config = test['config']

	# Generate game
	print("Retrieving similar games...")
	retrieved = retrieve_examples(config, normalized_records, k=3)

	print("Generating game...")
	game = generate_game(config, retrieved)

	# Validate
	print("Validating game...")
	is_valid, failures = validate_game(game, config)

	if is_valid:
	print(f"✓ VALID - Game {game['game_id']} passed all checks")
	else:
	print(f"✗ INVALID - {len(failures)} validation errors:")
	for failure in failures[:3]:
	print(f" - {failure}")
	if len(failures) > 3:
	print(f" ... and {len(failures) - 3} more errors")

	results.append({
	'name': test['name'],
	'valid': is_valid,
	'expected_valid': test['expect_valid'],
	'failures': failures
	})

	# Test 2: Invalid game repair
	print("\n" + "=" * 80)
	print("TEST SUITE 2: REPAIR OF INVALID GAMES")
	print("=" * 80)

	print("\nBefore repair:")
	config = test_configs[0]['config']
	is_valid_before, failures_before = validate_game(invalid_game, config)
	print(f"✗ Invalid with {len(failures_before)} errors:")
	for failure in failures_before[:5]:
	print(f" - {failure}")
	if len(failures_before) > 5:
	print(f" ... and {len(failures_before) - 5} more errors")

	print("\nRepairing game...")
	repaired_game = repair_game(invalid_game, failures_before, config)

	print("After repair:")
	is_valid_after, failures_after = validate_game(repaired_game, config)

	if is_valid_after:
	print(f"✓ VALID after repair")
	print(f" Fixed {len(failures_before)} issues")
	print(f" Game ID: {repaired_game['game_id']}")
	print(f" Tasks: {len(repaired_game['tasks'])}")
	print(f" Rules: {len(repaired_game['rules'])}")
	print(f" Safety zone: {repaired_game['safety'].get('allowed_zone')}")
	else:
	print(f"✗ Still invalid with {len(failures_after)} errors:")
	for failure in failures_after[:3]:
	print(f" - {failure}")

	# Summary
	print("\n" + "=" * 80)
	print("VALIDATION TEST SUMMARY")
	print("=" * 80)

	passed = sum(1 for r in results if r['valid'] == r['expected_valid'])
	total = len(results)

	print(f"\nGenerated games: {passed}/{total} passed expectations")
	for result in results:
	status = "✓ PASS" if result['valid'] == result['expected_valid'] else "✗ FAIL"
	valid_str = "VALID" if result['valid'] else "INVALID"
	print(f"{status}: {result['name']} → {valid_str}")

	print(f"\nRepair test: {'✓ PASS' if is_valid_after else '✗ FAIL'}")
	print(f" Before: {len(failures_before)} failures")
	print(f" After: {len(failures_after)} failures")

	print("\n" + "=" * 80)
	print("VALIDATION CHECKS IMPLEMENTED")
	print("=" * 80)
	print("""
	✓ Structure validation
	- All required fields present
	- Correct field types

	✓ Task requirements
	- Proof types in enum (photo\|observation\|text)
	- Positive points
	- Meaningful hints and safety notes

	✓ Safety constraints
	- No forbidden behaviors (buildings, private areas)
	- Water hazards have explicit restrictions
	- Road hazards have explicit guidance
	- No illegal building access

	✓ Age appropriateness
	- Kids/mixed games require adult supervision
	- Supervision mentioned in rules

	✓ Realism checks
	- Task count matches duration (8-20 min per task)
	- Clear win conditions
	- Scoring rules present

	✓ Repair automation
	- Adds missing fields with defaults
	- Fixes enum violations
	- Ensures positive values
	- Adjusts task count for duration
	""")


	if __name__ == "__main__":
	main()