Scene_Foundry_Demo / evaluation /balanced_evaluator.py
Chunteng's picture
Initial commit (Fresh Start)
a03fc9e
"""
Balanced Evaluator
Between Realistic and Strict evaluators
- Checks room counts with tolerance
- Moderate penalty for extra rooms
- Balanced scoring approach
"""
from typing import Dict, List
import numpy as np
from collections import Counter
class BalancedEvaluator:
"""Balanced evaluator - discriminating but not overly strict"""
def evaluate(self, G_gen: Dict, G_gt: Dict) -> Dict:
"""
Balanced evaluation
Args:
G_gen: Generated graph
G_gt: Ground truth
Returns:
Evaluation result
"""
# 1. Room count accuracy (with tolerance)
count_score = self._evaluate_room_counts(G_gen, G_gt)
# 2. Room type presence
presence_score = self._evaluate_room_presence(G_gen, G_gt)
# 3. Adjacency constraints
adjacency_score = self._evaluate_adjacency(G_gen, G_gt)
# 4. Other constraints
constraint_score = self._evaluate_constraints(G_gen, G_gt)
# Overall score
overall_score = (
0.30 * count_score['score'] + # 30%: Count accuracy
0.20 * presence_score['score'] + # 20%: Room presence
0.30 * adjacency_score['score'] + # 30%: Adjacency
0.20 * constraint_score['score'] # 20%: Other constraints
)
return {
"overall_score": overall_score,
"room_counts": count_score,
"room_presence": presence_score,
"adjacency": adjacency_score,
"constraints": constraint_score,
"interpretation": self._generate_interpretation(overall_score, count_score),
"generated_summary": {
"total_rooms": len(G_gen['nodes']),
"room_types": self._count_room_types(G_gen['nodes'])
}
}
def _evaluate_room_counts(self, G_gen: Dict, G_gt: Dict) -> Dict:
"""
Evaluate room counts with tolerance
Different from Strict:
- Full score when no constraints (no penalty)
- Tolerance allows some deviation
"""
constraints = G_gt.get('constraints', {}).get('room_counts', [])
if not constraints:
# No count constraints, only check type presence
return {"score": 1.0, "details": ["No count constraints - types checked only"]}
gen_counts = self._count_room_types(G_gen['nodes'])
satisfied = 0
partial = 0
total = len(constraints)
details = []
for constraint in constraints:
room_type = constraint['type']
required_count = constraint['count']
tolerance = constraint.get('tolerance', 0)
actual_count = gen_counts.get(room_type, 0)
diff = abs(actual_count - required_count)
# Fully satisfied
if diff <= tolerance:
satisfied += 1
details.append(f"✓ {room_type}: {actual_count}/{required_count} (within tolerance)")
# Partially satisfied (exceeds tolerance but room exists)
elif actual_count > 0:
# Give partial score
partial_score = max(0, 1 - (diff - tolerance) / (required_count + 1))
partial += partial_score
details.append(f"△ {room_type}: {actual_count}/{required_count} (diff: {diff-tolerance}, partial: {partial_score:.1f})")
# Completely missing
else:
details.append(f"✗ {room_type}: MISSING (required: {required_count})")
# Score: fully satisfied + partially satisfied
score = (satisfied + partial) / total if total > 0 else 1.0
return {
"score": score,
"satisfied": satisfied,
"partial": partial,
"total": total,
"details": details
}
def _evaluate_room_presence(self, G_gen: Dict, G_gt: Dict) -> Dict:
"""Evaluate if required rooms exist"""
required_nodes = [n for n in G_gt['nodes'] if n.get('required', True)]
if not required_nodes:
return {"score": 1.0, "details": ["No required rooms"]}
gen_types = {n['type'] for n in G_gen['nodes']}
required_types = [n['type'] for n in required_nodes]
found = 0
details = []
for node in required_nodes:
room_type = node['type']
if room_type in gen_types:
found += 1
details.append(f"✓ {room_type}")
else:
details.append(f"✗ {room_type} MISSING")
score = found / len(required_nodes)
return {
"score": score,
"found": found,
"total": len(required_nodes),
"details": details
}
def _evaluate_adjacency(self, G_gen: Dict, G_gt: Dict) -> Dict:
"""Evaluate adjacency constraints"""
constraints = G_gt.get('constraints', {}).get('adjacency', [])
if not constraints:
return {"score": 1.0, "details": ["No adjacency constraints"]}
# Build node matching
node_matching = self._get_node_matching(G_gen, G_gt)
# Build adjacency dictionary
gen_adjacency = self._build_adjacency_dict(G_gen['edges'])
satisfied = 0
total = 0
details = []
for constraint in constraints:
rooms = constraint['rooms']
must_be_adjacent = constraint['must_be_adjacent']
# Check if rooms exist
if not all(r in node_matching for r in rooms):
details.append(f"⊘ {rooms[0]}{rooms[1]}: rooms not found")
continue
total += 1
gen_rooms = [node_matching[r] for r in rooms]
is_adjacent = gen_rooms[1] in gen_adjacency.get(gen_rooms[0], [])
if is_adjacent == must_be_adjacent:
satisfied += 1
symbol = "✓" if must_be_adjacent else "✓(NOT)"
details.append(f"{symbol} {rooms[0]}{rooms[1]}")
else:
symbol = "✗" if must_be_adjacent else "✗(IS)"
details.append(f"{symbol} {rooms[0]}{rooms[1]} - Expected: {must_be_adjacent}, Got: {is_adjacent}")
score = satisfied / total if total > 0 else 1.0
return {
"score": score,
"satisfied": satisfied,
"total": total,
"details": details
}
def _evaluate_constraints(self, G_gen: Dict, G_gt: Dict) -> Dict:
"""Evaluate min/max area constraints"""
min_area_result = self._evaluate_min_area(G_gen, G_gt)
max_area_result = self._evaluate_max_area(G_gen, G_gt)
# 50% min area, 50% max area
overall_score = 0.5 * min_area_result['score'] + 0.5 * max_area_result['score']
return {
"score": overall_score,
"min_area": min_area_result,
"max_area": max_area_result,
"details": min_area_result['details'] + max_area_result['details']
}
def _evaluate_min_area(self, G_gen: Dict, G_gt: Dict) -> Dict:
"""Evaluate minimum area constraints with tolerance"""
constraints = G_gt.get('constraints', {}).get('min_areas', {})
if not constraints:
return {"score": 1.0, "details": ["No min area constraints"]}
# Build room type to nodes mapping
gen_rooms_by_type = {}
for node in G_gen['nodes']:
room_type = node['type']
if room_type not in gen_rooms_by_type:
gen_rooms_by_type[room_type] = []
gen_rooms_by_type[room_type].append(node)
total_score = 0
total = 0
details = []
for room_type, min_area in constraints.items():
rooms = gen_rooms_by_type.get(room_type, [])
if not rooms:
details.append(f"⊘ {room_type}: not generated")
continue
# Check each room with tolerance
for room in rooms:
total += 1
actual_area = room.get('area', 0)
# Tolerant scoring:
# - actual >= min → 100%
# - actual >= min*0.8 → partial credit (linear interpolation)
# - actual < min*0.8 → 0%
if actual_area >= min_area:
room_score = 1.0
total_score += 1.0
details.append(f"✓ {room_type}: {actual_area:.1f}m² ≥ {min_area}m²")
elif actual_area >= min_area * 0.8:
# Partial credit: interpolate between 0.8*min and min
ratio = (actual_area - min_area * 0.8) / (min_area * 0.2)
room_score = ratio
total_score += ratio
details.append(f"◐ {room_type}: {actual_area:.1f}m² ≈ {min_area}m² (partial: {ratio:.0%})")
else:
room_score = 0.0
details.append(f"✗ {room_type}: {actual_area:.1f}m² < {min_area}m² (deficit: {min_area - actual_area:.1f}m²)")
score = total_score / total if total > 0 else 1.0
return {
"score": score,
"satisfied": int(total_score),
"total": total,
"details": details
}
def _evaluate_max_area(self, G_gen: Dict, G_gt: Dict) -> Dict:
"""Evaluate maximum area constraints with tolerance"""
constraints = G_gt.get('constraints', {}).get('max_areas', {})
if not constraints:
return {"score": 1.0, "details": ["No max area constraints"]}
# Build room type to nodes mapping
gen_rooms_by_type = {}
for node in G_gen['nodes']:
room_type = node['type']
if room_type not in gen_rooms_by_type:
gen_rooms_by_type[room_type] = []
gen_rooms_by_type[room_type].append(node)
total_score = 0
total = 0
details = []
for room_type, max_area in constraints.items():
rooms = gen_rooms_by_type.get(room_type, [])
if not rooms:
details.append(f"⊘ {room_type}: not generated")
continue
# Check each room with tolerance
for room in rooms:
total += 1
actual_area = room.get('area', 0)
# Tolerant scoring:
# - actual <= max → 100%
# - actual <= max*1.2 → partial credit (linear interpolation)
# - actual > max*1.2 → 0%
if actual_area <= max_area:
room_score = 1.0
total_score += 1.0
details.append(f"✓ {room_type}: {actual_area:.1f}m² ≤ {max_area}m²")
elif actual_area <= max_area * 1.2:
# Partial credit: interpolate between max and max*1.2
excess_ratio = (actual_area - max_area) / (max_area * 0.2)
room_score = 1.0 - excess_ratio
total_score += room_score
details.append(f"◐ {room_type}: {actual_area:.1f}m² ≈ {max_area}m² (partial: {room_score:.0%})")
else:
room_score = 0.0
details.append(f"✗ {room_type}: {actual_area:.1f}m² > {max_area}m² (excess: {actual_area - max_area:.1f}m²)")
score = total_score / total if total > 0 else 1.0
return {
"score": score,
"satisfied": int(total_score),
"total": total,
"details": details
}
def _count_room_types(self, nodes: List[Dict]) -> Dict[str, int]:
"""Count room types"""
return Counter(n['type'] for n in nodes)
def _get_node_matching(self, G_gen: Dict, G_gt: Dict) -> Dict[str, str]:
"""Match GT and Generated nodes"""
matching = {}
gen_by_type = {}
for node in G_gen['nodes']:
room_type = node['type']
if room_type not in gen_by_type:
gen_by_type[room_type] = []
gen_by_type[room_type].append(node['id'])
for gt_node in G_gt['nodes']:
if not gt_node.get('required', True):
continue
room_type = gt_node['type']
gen_nodes = gen_by_type.get(room_type, [])
if gen_nodes:
# Simple pairing: take first unused
for gen_id in gen_nodes:
if gen_id not in matching.values():
matching[gt_node['id']] = gen_id
break
return matching
def _build_adjacency_dict(self, edges: List[Dict]) -> Dict[str, List[str]]:
"""Build adjacency dictionary"""
adjacency = {}
for edge in edges:
from_node = edge['from']
to_node = edge['to']
if from_node not in adjacency:
adjacency[from_node] = []
if to_node not in adjacency:
adjacency[to_node] = []
adjacency[from_node].append(to_node)
adjacency[to_node].append(from_node)
return adjacency
def _generate_interpretation(self, overall: float, count_score: Dict) -> str:
"""Generate score interpretation"""
if overall >= 0.9:
quality = "Excellent"
elif overall >= 0.8:
quality = "Good"
elif overall >= 0.7:
quality = "Acceptable"
else:
quality = "Needs improvement"
interpretation = f"{quality} ({overall:.1%})"
# Add specific issues
if count_score['score'] < 0.8:
satisfied = count_score.get('satisfied', 0)
total = count_score.get('total', 1)
interpretation += f" - Room count issues ({satisfied}/{total} correct)"
return interpretation