Spaces:

Chunteng
/

Scene_Foundry_Demo

Runtime error

App Files Files Community

Scene_Foundry_Demo / evaluation /balanced_evaluator.py

Chunteng

Initial commit (Fresh Start)

a03fc9e about 2 months ago

raw

history blame contribute delete

14.6 kB

	"""
	Balanced Evaluator
	Between Realistic and Strict evaluators
	- Checks room counts with tolerance
	- Moderate penalty for extra rooms
	- Balanced scoring approach
	"""

	from typing import Dict, List
	import numpy as np
	from collections import Counter


	class BalancedEvaluator:
	"""Balanced evaluator - discriminating but not overly strict"""

	def evaluate(self, G_gen: Dict, G_gt: Dict) -> Dict:
	"""
	Balanced evaluation

	Args:
	G_gen: Generated graph
	G_gt: Ground truth

	Returns:
	Evaluation result
	"""
	# 1. Room count accuracy (with tolerance)
	count_score = self._evaluate_room_counts(G_gen, G_gt)

	# 2. Room type presence
	presence_score = self._evaluate_room_presence(G_gen, G_gt)

	# 3. Adjacency constraints
	adjacency_score = self._evaluate_adjacency(G_gen, G_gt)

	# 4. Other constraints
	constraint_score = self._evaluate_constraints(G_gen, G_gt)

	# Overall score
	overall_score = (
	0.30 * count_score['score'] + # 30%: Count accuracy
	0.20 * presence_score['score'] + # 20%: Room presence
	0.30 * adjacency_score['score'] + # 30%: Adjacency
	0.20 * constraint_score['score'] # 20%: Other constraints
	)

	return {
	"overall_score": overall_score,
	"room_counts": count_score,
	"room_presence": presence_score,
	"adjacency": adjacency_score,
	"constraints": constraint_score,
	"interpretation": self._generate_interpretation(overall_score, count_score),
	"generated_summary": {
	"total_rooms": len(G_gen['nodes']),
	"room_types": self._count_room_types(G_gen['nodes'])
	}
	}

	def _evaluate_room_counts(self, G_gen: Dict, G_gt: Dict) -> Dict:
	"""
	Evaluate room counts with tolerance

	Different from Strict:
	- Full score when no constraints (no penalty)
	- Tolerance allows some deviation
	"""
	constraints = G_gt.get('constraints', {}).get('room_counts', [])

	if not constraints:
	# No count constraints, only check type presence
	return {"score": 1.0, "details": ["No count constraints - types checked only"]}

	gen_counts = self._count_room_types(G_gen['nodes'])

	satisfied = 0
	partial = 0
	total = len(constraints)
	details = []

	for constraint in constraints:
	room_type = constraint['type']
	required_count = constraint['count']
	tolerance = constraint.get('tolerance', 0)

	actual_count = gen_counts.get(room_type, 0)
	diff = abs(actual_count - required_count)

	# Fully satisfied
	if diff <= tolerance:
	satisfied += 1
	details.append(f"✓ {room_type}: {actual_count}/{required_count} (within tolerance)")
	# Partially satisfied (exceeds tolerance but room exists)
	elif actual_count > 0:
	# Give partial score
	partial_score = max(0, 1 - (diff - tolerance) / (required_count + 1))
	partial += partial_score
	details.append(f"△ {room_type}: {actual_count}/{required_count} (diff: {diff-tolerance}, partial: {partial_score:.1f})")
	# Completely missing
	else:
	details.append(f"✗ {room_type}: MISSING (required: {required_count})")

	# Score: fully satisfied + partially satisfied
	score = (satisfied + partial) / total if total > 0 else 1.0

	return {
	"score": score,
	"satisfied": satisfied,
	"partial": partial,
	"total": total,
	"details": details
	}

	def _evaluate_room_presence(self, G_gen: Dict, G_gt: Dict) -> Dict:
	"""Evaluate if required rooms exist"""
	required_nodes = [n for n in G_gt['nodes'] if n.get('required', True)]

	if not required_nodes:
	return {"score": 1.0, "details": ["No required rooms"]}

	gen_types = {n['type'] for n in G_gen['nodes']}
	required_types = [n['type'] for n in required_nodes]

	found = 0
	details = []

	for node in required_nodes:
	room_type = node['type']
	if room_type in gen_types:
	found += 1
	details.append(f"✓ {room_type}")
	else:
	details.append(f"✗ {room_type} MISSING")

	score = found / len(required_nodes)

	return {
	"score": score,
	"found": found,
	"total": len(required_nodes),
	"details": details
	}

	def _evaluate_adjacency(self, G_gen: Dict, G_gt: Dict) -> Dict:
	"""Evaluate adjacency constraints"""
	constraints = G_gt.get('constraints', {}).get('adjacency', [])

	if not constraints:
	return {"score": 1.0, "details": ["No adjacency constraints"]}

	# Build node matching
	node_matching = self._get_node_matching(G_gen, G_gt)

	# Build adjacency dictionary
	gen_adjacency = self._build_adjacency_dict(G_gen['edges'])

	satisfied = 0
	total = 0
	details = []

	for constraint in constraints:
	rooms = constraint['rooms']
	must_be_adjacent = constraint['must_be_adjacent']

	# Check if rooms exist
	if not all(r in node_matching for r in rooms):
	details.append(f"⊘ {rooms[0]} ↔ {rooms[1]}: rooms not found")
	continue

	total += 1

	gen_rooms = [node_matching[r] for r in rooms]
	is_adjacent = gen_rooms[1] in gen_adjacency.get(gen_rooms[0], [])

	if is_adjacent == must_be_adjacent:
	satisfied += 1
	symbol = "✓" if must_be_adjacent else "✓(NOT)"
	details.append(f"{symbol} {rooms[0]} ↔ {rooms[1]}")
	else:
	symbol = "✗" if must_be_adjacent else "✗(IS)"
	details.append(f"{symbol} {rooms[0]} ↔ {rooms[1]} - Expected: {must_be_adjacent}, Got: {is_adjacent}")

	score = satisfied / total if total > 0 else 1.0

	return {
	"score": score,
	"satisfied": satisfied,
	"total": total,
	"details": details
	}

	def _evaluate_constraints(self, G_gen: Dict, G_gt: Dict) -> Dict:
	"""Evaluate min/max area constraints"""
	min_area_result = self._evaluate_min_area(G_gen, G_gt)
	max_area_result = self._evaluate_max_area(G_gen, G_gt)

	# 50% min area, 50% max area
	overall_score = 0.5 * min_area_result['score'] + 0.5 * max_area_result['score']

	return {
	"score": overall_score,
	"min_area": min_area_result,
	"max_area": max_area_result,
	"details": min_area_result['details'] + max_area_result['details']
	}

	def _evaluate_min_area(self, G_gen: Dict, G_gt: Dict) -> Dict:
	"""Evaluate minimum area constraints with tolerance"""
	constraints = G_gt.get('constraints', {}).get('min_areas', {})

	if not constraints:
	return {"score": 1.0, "details": ["No min area constraints"]}

	# Build room type to nodes mapping
	gen_rooms_by_type = {}
	for node in G_gen['nodes']:
	room_type = node['type']
	if room_type not in gen_rooms_by_type:
	gen_rooms_by_type[room_type] = []
	gen_rooms_by_type[room_type].append(node)

	total_score = 0
	total = 0
	details = []

	for room_type, min_area in constraints.items():
	rooms = gen_rooms_by_type.get(room_type, [])

	if not rooms:
	details.append(f"⊘ {room_type}: not generated")
	continue

	# Check each room with tolerance
	for room in rooms:
	total += 1
	actual_area = room.get('area', 0)

	# Tolerant scoring:
	# - actual >= min → 100%
	# - actual >= min*0.8 → partial credit (linear interpolation)
	# - actual < min*0.8 → 0%

	if actual_area >= min_area:
	room_score = 1.0
	total_score += 1.0
	details.append(f"✓ {room_type}: {actual_area:.1f}m² ≥ {min_area}m²")
	elif actual_area >= min_area * 0.8:
	# Partial credit: interpolate between 0.8*min and min
	ratio = (actual_area - min_area * 0.8) / (min_area * 0.2)
	room_score = ratio
	total_score += ratio
	details.append(f"◐ {room_type}: {actual_area:.1f}m² ≈ {min_area}m² (partial: {ratio:.0%})")
	else:
	room_score = 0.0
	details.append(f"✗ {room_type}: {actual_area:.1f}m² < {min_area}m² (deficit: {min_area - actual_area:.1f}m²)")

	score = total_score / total if total > 0 else 1.0

	return {
	"score": score,
	"satisfied": int(total_score),
	"total": total,
	"details": details
	}

	def _evaluate_max_area(self, G_gen: Dict, G_gt: Dict) -> Dict:
	"""Evaluate maximum area constraints with tolerance"""
	constraints = G_gt.get('constraints', {}).get('max_areas', {})

	if not constraints:
	return {"score": 1.0, "details": ["No max area constraints"]}

	# Build room type to nodes mapping
	gen_rooms_by_type = {}
	for node in G_gen['nodes']:
	room_type = node['type']
	if room_type not in gen_rooms_by_type:
	gen_rooms_by_type[room_type] = []
	gen_rooms_by_type[room_type].append(node)

	total_score = 0
	total = 0
	details = []

	for room_type, max_area in constraints.items():
	rooms = gen_rooms_by_type.get(room_type, [])

	if not rooms:
	details.append(f"⊘ {room_type}: not generated")
	continue

	# Check each room with tolerance
	for room in rooms:
	total += 1
	actual_area = room.get('area', 0)

	# Tolerant scoring:
	# - actual <= max → 100%
	# - actual <= max*1.2 → partial credit (linear interpolation)
	# - actual > max*1.2 → 0%

	if actual_area <= max_area:
	room_score = 1.0
	total_score += 1.0
	details.append(f"✓ {room_type}: {actual_area:.1f}m² ≤ {max_area}m²")
	elif actual_area <= max_area * 1.2:
	# Partial credit: interpolate between max and max*1.2
	excess_ratio = (actual_area - max_area) / (max_area * 0.2)
	room_score = 1.0 - excess_ratio
	total_score += room_score
	details.append(f"◐ {room_type}: {actual_area:.1f}m² ≈ {max_area}m² (partial: {room_score:.0%})")
	else:
	room_score = 0.0
	details.append(f"✗ {room_type}: {actual_area:.1f}m² > {max_area}m² (excess: {actual_area - max_area:.1f}m²)")

	score = total_score / total if total > 0 else 1.0

	return {
	"score": score,
	"satisfied": int(total_score),
	"total": total,
	"details": details
	}

	def _count_room_types(self, nodes: List[Dict]) -> Dict[str, int]:
	"""Count room types"""
	return Counter(n['type'] for n in nodes)

	def _get_node_matching(self, G_gen: Dict, G_gt: Dict) -> Dict[str, str]:
	"""Match GT and Generated nodes"""
	matching = {}

	gen_by_type = {}
	for node in G_gen['nodes']:
	room_type = node['type']
	if room_type not in gen_by_type:
	gen_by_type[room_type] = []
	gen_by_type[room_type].append(node['id'])

	for gt_node in G_gt['nodes']:
	if not gt_node.get('required', True):
	continue

	room_type = gt_node['type']
	gen_nodes = gen_by_type.get(room_type, [])

	if gen_nodes:
	# Simple pairing: take first unused
	for gen_id in gen_nodes:
	if gen_id not in matching.values():
	matching[gt_node['id']] = gen_id
	break

	return matching

	def _build_adjacency_dict(self, edges: List[Dict]) -> Dict[str, List[str]]:
	"""Build adjacency dictionary"""
	adjacency = {}
	for edge in edges:
	from_node = edge['from']
	to_node = edge['to']

	if from_node not in adjacency:
	adjacency[from_node] = []
	if to_node not in adjacency:
	adjacency[to_node] = []

	adjacency[from_node].append(to_node)
	adjacency[to_node].append(from_node)

	return adjacency

	def _generate_interpretation(self, overall: float, count_score: Dict) -> str:
	"""Generate score interpretation"""
	if overall >= 0.9:
	quality = "Excellent"
	elif overall >= 0.8:
	quality = "Good"
	elif overall >= 0.7:
	quality = "Acceptable"
	else:
	quality = "Needs improvement"

	interpretation = f"{quality} ({overall:.1%})"

	# Add specific issues
	if count_score['score'] < 0.8:
	satisfied = count_score.get('satisfied', 0)
	total = count_score.get('total', 1)
	interpretation += f" - Room count issues ({satisfied}/{total} correct)"

	return interpretation