# Copyright (c) 2026 CtrlAltWin Team """ Deterministic Grader — Scores packing quality from 0.0 to 1.0. Scoring formula: score = 0.4 * validity + 0.3 * efficiency + 0.2 * constraints + 0.1 * neatness Each component: validity — food placed in type-compatible container? efficiency — space utilization vs total capacity used constraints — temperature separation, fragility, flavor isolation neatness — all items packed? nothing dropped? """ from __future__ import annotations from typing import Any, Dict, List, Optional from .tasks import TaskConfig from .simulation.engine import is_type_compatible def grade( packing_log: List[Dict[str, Any]], task_config: TaskConfig, ) -> float: """ Grade a packing episode. Returns score between 0.0 and 1.0. Args: packing_log: List of placement records from the simulation. task_config: The task configuration used for this episode. Returns: Final score (0.0 to 1.0), rounded to 4 decimal places. """ total_items = len(task_config.food_items) if total_items == 0: return 0.0 # ---- Validity (40%) ---- validity = _score_validity(packing_log, total_items) # ---- Efficiency (30%) ---- efficiency = _score_efficiency(packing_log, task_config) # ---- Constraint Satisfaction (20%) ---- constraints = _score_constraints(packing_log, task_config) # ---- Neatness (10%) ---- neatness = _score_neatness(packing_log, total_items) # ---- Final score ---- score = 0.4 * validity + 0.3 * efficiency + 0.2 * constraints + 0.1 * neatness return round(max(0.0, min(1.0, score)), 4) def grade_detailed( packing_log: List[Dict[str, Any]], task_config: TaskConfig, ) -> Dict[str, Any]: """Grade with full breakdown for debugging.""" total_items = len(task_config.food_items) validity = _score_validity(packing_log, total_items) efficiency = _score_efficiency(packing_log, task_config) constraints = _score_constraints(packing_log, task_config) neatness = _score_neatness(packing_log, total_items) score = 0.4 * validity + 0.3 * efficiency + 0.2 * constraints + 0.1 * neatness score = round(max(0.0, min(1.0, score)), 4) return { "final_score": score, "validity": round(validity, 4), "efficiency": round(efficiency, 4), "constraints": round(constraints, 4), "neatness": round(neatness, 4), "items_packed": len(packing_log), "total_items": total_items, "weights": { "validity": 0.4, "efficiency": 0.3, "constraints": 0.2, "neatness": 0.1, }, } # ----------------------------------------------------------------------- # Component scorers # ----------------------------------------------------------------------- def _score_validity(packing_log: List[Dict], total_items: int) -> float: """Score: food placed in type-compatible container? (0-1)""" if not packing_log: return 0.0 correct = sum(1 for entry in packing_log if entry.get("type_compatible", False)) return correct / max(total_items, 1) def _score_efficiency(packing_log: List[Dict], task_config: TaskConfig) -> float: """Score: how well is container space utilized? (0-1)""" if not packing_log: return 0.0 total_food_vol = sum(entry.get("food_volume", 0) for entry in packing_log) # Find which containers were used used_container_ids = set(entry.get("container_id") for entry in packing_log) total_capacity = sum( c.capacity_ml for c in task_config.containers if c.id in used_container_ids ) if total_capacity == 0: return 0.0 utilization = total_food_vol / total_capacity # Penalize overflow overflow_count = sum(1 for entry in packing_log if entry.get("overflow", False)) if overflow_count > 0: utilization *= max(0.3, 1.0 - 0.2 * overflow_count) return min(1.0, utilization) def _score_constraints(packing_log: List[Dict], task_config: TaskConfig) -> float: """Score: task-specific constraints satisfied? (0-1)""" if not packing_log: return 0.0 scores = [] active = set(task_config.constraints) if "temperature_separation" in active: scores.append(_check_temperature(packing_log)) if "fragility_ordering" in active: scores.append(_check_fragility(packing_log)) if "flavor_isolation" in active: scores.append(_check_flavor_isolation(packing_log)) if "no_overflow" in active: overflow_count = sum(1 for e in packing_log if e.get("overflow", False)) scores.append(1.0 if overflow_count == 0 else max(0.0, 1.0 - 0.3 * overflow_count)) if "type_match" in active: correct = sum(1 for e in packing_log if e.get("type_compatible", False)) scores.append(correct / max(len(packing_log), 1)) if not scores: return 1.0 # no constraints to violate return sum(scores) / len(scores) def _check_temperature(packing_log: List[Dict]) -> float: """Check if hot and cold items are kept separate.""" # Group items by container container_temps: Dict[int, List[str]] = {} for entry in packing_log: cid = entry.get("container_id") temp = entry.get("food_temperature", "room") container_temps.setdefault(cid, []).append(temp) violations = 0 total_containers = len(container_temps) for temps in container_temps.values(): if "hot" in temps and "cold" in temps: violations += 1 if total_containers == 0: return 1.0 return max(0.0, 1.0 - violations / total_containers) def _check_fragility(packing_log: List[Dict]) -> float: """Check if fragile items are not crushed by heavy items placed after them.""" # Group by container, check placement order container_order: Dict[int, List[float]] = {} for entry in packing_log: cid = entry.get("container_id") frag = entry.get("food_fragility", 0.5) container_order.setdefault(cid, []).append(frag) violations = 0 checks = 0 for fragilites in container_order.values(): for i in range(1, len(fragilites)): checks += 1 # If a less fragile (heavy) item is placed AFTER a more fragile item if fragilites[i] < 0.4 and fragilites[i - 1] > 0.6: violations += 1 if checks == 0: return 1.0 return max(0.0, 1.0 - violations / max(checks, 1)) def _check_flavor_isolation(packing_log: List[Dict]) -> float: """Check that strong-flavor items (pickle, chutney) are isolated.""" strong_flavors = {"pickle", "chutney"} # Group by container container_contents: Dict[int, List[str]] = {} for entry in packing_log: cid = entry.get("container_id") name = entry.get("food_name", "") container_contents.setdefault(cid, []).append(name) violations = 0 total = 0 for contents in container_contents.values(): has_strong = any(c in strong_flavors for c in contents) has_others = any(c not in strong_flavors for c in contents) if has_strong and has_others and len(contents) > 1: violations += 1 total += 1 elif has_strong: total += 1 if total == 0: return 1.0 return max(0.0, 1.0 - violations / max(total, 1)) def _score_neatness(packing_log: List[Dict], total_items: int) -> float: """Score: fraction of items successfully packed. (0-1)""" if total_items == 0: return 0.0 return len(packing_log) / total_items