Spaces:

Chunteng
/

Scene_Foundry_Demo

Runtime error

App Files Files Community

Scene_Foundry_Demo / run_evaluation.py

Chunteng

Initial commit (Fresh Start)

a03fc9e about 2 months ago

raw

history blame contribute delete

14.3 kB

	"""
	Final Evaluation Pipeline - Production Ready
	- Uses real app.py generation pipeline
	- 20 test cases covering diverse layouts
	- Balanced evaluator optimized for Infinigen's capabilities
	"""

	import sys
	from pathlib import Path
	import shutil
	import time
	from evaluation.test_cases import get_test_cases
	from evaluation.mesh_extractor import MeshGraphExtractor
	from evaluation.balanced_evaluator import BalancedEvaluator


	def validate_and_fix_config(config_path: Path) -> bool:
	"""Validate and fix configuration to prevent Infinigen bugs"""
	import yaml

	try:
	with open(config_path, 'r') as f:
	original_content = f.read()
	f.seek(0)
	config = yaml.safe_load(f)

	# Debug: Show original AI output
	print(f" 📄 AI generated config:")
	for line in original_content.split('\n')[:15]: # First 15 lines
	if line.strip():
	print(f" {line}")

	# Ensure all area values are >= 5 (prevent division by zero)
	min_area = 5
	area_keys = ['kitchen_area', 'bedroom_area', 'living_room_area',
	'dining_room_area', 'bathroom_area']

	fixed = False
	for key in area_keys:
	if key in config:
	if config[key] is None or config[key] < min_area:
	print(f" ⚠️ Fixing {key}: {config[key]} → {min_area}")
	config[key] = min_area
	fixed = True
	else:
	# Add missing area with default value
	default_areas = {
	'kitchen_area': 15,
	'bedroom_area': 20,
	'living_room_area': 30,
	'dining_room_area': 15,
	'bathroom_area': 8
	}
	print(f" ⚠️ Adding missing {key}: {default_areas[key]}")
	config[key] = default_areas[key]
	fixed = True

	# Ensure counts are >= 1
	if 'bedroom_count' in config and config['bedroom_count'] < 1:
	config['bedroom_count'] = 1
	fixed = True

	if fixed:
	with open(config_path, 'w') as f:
	yaml.dump(config, f, default_flow_style=False)
	print(f" ✓ Configuration validated and fixed")

	return True

	except Exception as e:
	print(f" ✗ Config validation failed: {e}")
	return False


	def generate_scene_with_app(prompt: str, output_dir: Path, max_retries: int = 3) -> bool:
	"""
	Generate scene using backend components directly

	Args:
	prompt: User prompt
	output_dir: Where to save the scene
	max_retries: Maximum number of retries if generation fails

	Returns:
	True if generation succeeded
	"""
	from core.config_generator import ConfigGenerator
	from backend.infinigen_backend import InfinigenBackend
	from config import settings

	for attempt in range(max_retries):
	try:
	if attempt > 0:
	print(f" Retry {attempt}/{max_retries-1}...")
	else:
	print(f" Generating scene...")

	# Step 1: Generate configuration (or use default on failure)
	try:
	config_gen = ConfigGenerator()
	yaml_content = config_gen.generate(prompt)

	# Save configuration
	with open(settings.CONFIG_OUTPUT, 'w') as f:
	f.write(yaml_content)

	# Validate and fix configuration
	if not validate_and_fix_config(settings.CONFIG_OUTPUT):
	raise ValueError("Invalid configuration generated")
	except Exception as e:
	print(f" ⚠️ Config generation failed: {e}, using default config")
	# Use default config as fallback
	default_config = Path("config/default_params.yaml")
	if default_config.exists():
	shutil.copy(default_config, settings.CONFIG_OUTPUT)
	validate_and_fix_config(settings.CONFIG_OUTPUT)
	else:
	raise ValueError("No default config found")

	# Step 2: Generate layout with different seed on retry
	backend = InfinigenBackend()
	seed = settings.DEFAULT_SEED + attempt # Use different seed on retry
	blend_file = backend.generate(
	config_path=settings.CONFIG_OUTPUT,
	output_dir=settings.OUTPUT_DIR,
	seed=seed
	)

	# Step 3: Copy to evaluation directory
	source_dir = blend_file.parent

	if output_dir.exists():
	shutil.rmtree(output_dir)
	shutil.copytree(source_dir, output_dir)

	print(f" ✓ Generated and saved to {output_dir}")
	return True

	except Exception as e:
	print(f" ✗ Attempt {attempt+1} failed: {e}")
	if attempt == max_retries - 1:
	# Last attempt failed
	import traceback
	traceback.print_exc()
	return False
	# Continue to next retry
	continue

	return False


	def evaluate_scene(test_case, scene_path: Path) -> dict:
	"""
	Evaluate a generated scene

	Args:
	test_case: Test case with ground truth
	scene_path: Path to generated scene

	Returns:
	Evaluation result dictionary
	"""
	try:
	# Extract graph
	extractor = MeshGraphExtractor(scene_path)
	G_gen = extractor.extract_graph()

	# Evaluate with Balanced Evaluator
	evaluator = BalancedEvaluator()
	result = evaluator.evaluate(G_gen, test_case.ground_truth)

	# Add metadata
	result['test_id'] = test_case.id
	result['prompt'] = test_case.prompt
	result['description'] = test_case.description
	result['generated_rooms'] = len(G_gen['nodes'])
	result['generated_edges'] = len(G_gen['edges'])

	return result

	except Exception as e:
	print(f" ✗ Evaluation failed: {e}")
	return {
	'test_id': test_case.id,
	'overall_score': 0.0,
	'error': str(e)
	}


	def run_full_evaluation(generate: bool = True, test_ids: list = None):
	"""
	Run complete evaluation pipeline

	Args:
	generate: If True, generate new scenes. If False, use existing.
	test_ids: List of specific test IDs to run. If None, run all.
	"""
	print("="*80)
	print("EVALUATION PIPELINE - Production Ready")
	print("="*80)

	# Get test cases
	all_tests = get_test_cases()

	if test_ids:
	test_cases = [tc for tc in all_tests if tc.id in test_ids]
	else:
	test_cases = all_tests

	print(f"\nTest Cases: {len(test_cases)}")
	print(f"Mode: {'GENERATE + EVALUATE' if generate else 'EVALUATE ONLY'}\n")

	results = []
	output_base = Path("outputs/evaluation")
	output_base.mkdir(parents=True, exist_ok=True)

	for i, test_case in enumerate(test_cases, 1):
	print(f"\n{'='*80}")
	print(f"[{i}/{len(test_cases)}] {test_case.id}")
	print(f"{'='*80}")
	print(f"Prompt: {test_case.prompt}")
	print(f"Description: {test_case.description}")

	scene_dir = output_base / test_case.id

	# Generate if requested
	if generate:
	start_time = time.time()
	success = generate_scene_with_app(test_case.prompt, scene_dir)
	gen_time = time.time() - start_time

	if not success:
	print(f" ⚠️ Skipping evaluation (generation failed)")
	continue

	print(f" Generation time: {gen_time:.1f}s")
	else:
	# Check if scene exists
	if not scene_dir.exists() or not (scene_dir / "solve_state.json").exists():
	print(f" ⚠️ Scene not found, skipping...")
	continue
	print(f" Using existing scene")

	# Evaluate
	print(f" Evaluating...")
	result = evaluate_scene(test_case, scene_dir)
	results.append(result)

	# Display result
	score = result['overall_score']
	emoji = "🏆" if score >= 0.9 else "✓" if score >= 0.8 else "⚠️" if score >= 0.7 else "✗"

	print(f"\n {emoji} Overall Score: {score:.1%}")
	print(f" Interpretation: {result.get('interpretation', 'N/A')}")
	print(f" Generated: {result.get('generated_rooms', 0)} rooms, "
	f"{result.get('generated_edges', 0)} edges")

	# Display component scores
	print(f" Component Scores:")
	if 'room_counts' in result:
	print(f" - Room Counts: {result['room_counts']['score']:.1%} (weight 30%)")
	if 'room_presence' in result:
	print(f" - Room Presence: {result['room_presence']['score']:.1%} (weight 20%)")
	if 'adjacency' in result:
	print(f" - Adjacency: {result['adjacency']['score']:.1%} (weight 30%)")
	if 'constraints' in result:
	print(f" - Constraints: {result['constraints']['score']:.1%} (weight 20%)")

	# Final summary
	print("\n" + "="*80)
	print("SUMMARY")
	print("="*80)

	if not results:
	print("\n⚠️ No results to summarize")
	return

	scores = [r['overall_score'] for r in results]
	avg_score = sum(scores) / len(scores)

	print(f"\nTotal Tests: {len(results)}")
	print(f"Average Score: {avg_score:.1%}")
	print(f"Score Range: {min(scores):.1%} - {max(scores):.1%}")

	# Score distribution
	excellent = sum(1 for s in scores if s >= 0.9)
	good = sum(1 for s in scores if 0.8 <= s < 0.9)
	acceptable = sum(1 for s in scores if 0.7 <= s < 0.8)
	poor = sum(1 for s in scores if s < 0.7)

	print(f"\nScore Distribution:")
	print(f" 🏆 Excellent (≥90%): {excellent}")
	print(f" ✓ Good (80-89%): {good}")
	print(f" ⚠️ Acceptable (70-79%): {acceptable}")
	print(f" ✗ Poor (<70%): {poor}")

	# Individual results
	print(f"\nIndividual Results:")
	print("-"*80)

	for result in sorted(results, key=lambda x: x['overall_score'], reverse=True):
	score = result['overall_score']
	emoji = "🏆" if score >= 0.9 else "✓" if score >= 0.8 else "⚠️" if score >= 0.7 else "✗"
	print(f"{emoji} {result['test_id']:<30} {score:>6.1%}")

	# Save results to files
	save_results_to_files(results, test_cases)


	def save_results_to_files(results, test_cases):
	"""Save evaluation results to JSON and CSV files (append mode)"""
	import json
	import csv
	from pathlib import Path

	if not results:
	return

	# Prepare new data
	new_data = []
	for result in results:
	# Find corresponding test case for description and prompt
	test_case = next((tc for tc in test_cases if tc.id == result['test_id']), None)

	data = {
	'test_id': result['test_id'],
	'description': test_case.description if test_case else '',
	'prompt': test_case.prompt if test_case else '',
	'score': result['overall_score'] * 100,
	'room_counts': result.get('room_counts', {}).get('score', 0) * 100,
	'room_presence': result.get('room_presence', {}).get('score', 0) * 100,
	'adjacency': result.get('adjacency', {}).get('score', 0) * 100,
	'constraints': result.get('constraints', {}).get('score', 0) * 100,
	}
	new_data.append(data)

	# Load existing JSON data
	json_file = Path('evaluation_results.json')
	existing_data = []
	if json_file.exists():
	try:
	with open(json_file, 'r') as f:
	existing_data = json.load(f)
	except:
	existing_data = []

	# Merge: update existing entries or add new ones
	existing_ids = {item['test_id']: i for i, item in enumerate(existing_data)}
	for data in new_data:
	if data['test_id'] in existing_ids:
	# Update existing entry
	existing_data[existing_ids[data['test_id']]] = data
	else:
	# Add new entry
	existing_data.append(data)

	# Save merged JSON
	with open('evaluation_results.json', 'w') as f:
	json.dump(existing_data, f, indent=2)
	print(f"\n✓ Saved results to evaluation_results.json ({len(new_data)} new/updated)")

	# Save merged CSV
	with open('evaluation_results.csv', 'w', newline='') as f:
	fieldnames = ['test_id', 'score', 'room_counts', 'room_presence', 'adjacency', 'constraints']
	writer = csv.DictWriter(f, fieldnames=fieldnames)
	writer.writeheader()

	for data in existing_data:
	writer.writerow({k: data[k] for k in fieldnames})
	print(f"✓ Saved results to evaluation_results.csv (total: {len(existing_data)} entries)")


	if __name__ == "__main__":
	import argparse

	parser = argparse.ArgumentParser(description="Final Evaluation Pipeline")
	parser.add_argument("--generate", action="store_true",
	help="Generate new scenes (default: use existing)")
	parser.add_argument("--test-ids", nargs="+",
	help="Specific test IDs to run (default: all)")
	parser.add_argument("--quick", action="store_true",
	help="Quick test with only 3 cases")

	args = parser.parse_args()

	# Quick mode: test 3 representative cases
	if args.quick:
	test_ids = ["basic_studio_01", "two_bedroom_apt_01", "family_home_01"]
	print("🚀 QUICK MODE: Testing 3 representative cases\n")
	else:
	test_ids = args.test_ids

	run_full_evaluation(
	generate=args.generate,
	test_ids=test_ids
	)