"""Deterministic grading — 5-component reward for CommitmentOS. Components: constraint_satisfaction (0.35) — binary per scenario constraint conflict_resolution (0.20) — final calendar free of overlaps commitment_coherence (0.20) — ledger violations penalised communication_quality (0.15) — keyword matching on sent emails step_efficiency (0.10) — fewer steps = higher score """ from __future__ import annotations from typing import Any, Dict, List, Tuple from server.domain import ScenarioDef from server.world import WorldState, _time_to_min WEIGHTS: Dict[str, float] = { "constraint_satisfaction": 0.35, "conflict_resolution": 0.20, "commitment_coherence": 0.20, "communication_quality": 0.15, "step_efficiency": 0.10, } def _keyword_score(text: str, keywords: List[str], min_matches: int = 2) -> Tuple[float, List[str]]: """0 hits -> 0.0, < min_matches -> 0.5 (partial), >= min_matches -> 1.0.""" text_lower = text.lower() matched = [kw for kw in keywords if kw.lower() in text_lower] if len(matched) == 0: return 0.0, matched if len(matched) < min_matches: return 0.5, matched return 1.0, matched def _check_constraint(constraint, world: WorldState) -> bool: """Evaluate a single ConstraintDef against the world state.""" ct = constraint.check_type params = constraint.check_params if ct == "calendar_no_conflict": return _calendar_has_no_overlaps(world) elif ct == "event_exists": eid = params.get("event_id", "") return eid in world.calendar elif ct == "event_cancelled": eid = params.get("event_id", "") return eid not in world.calendar elif ct == "email_sent": to = params.get("to", "").lower() keywords = params.get("keywords", []) for em in world.emails_sent: if to in em.get("to", "").lower(): if keywords: score, _ = _keyword_score(em.get("body", ""), keywords, min_matches=1) if score > 0: return True else: return True return False elif ct == "restaurant_match": name = params.get("name", "") if name: return world.booked_restaurant.lower() == name.lower() criteria = params.get("criteria", {}) if not world.booked_restaurant: return False r = world.restaurants.get(world.booked_restaurant) if r is None: return False if "dietary" in criteria and criteria["dietary"].lower() not in [d.lower() for d in r.dietary_options]: return False if "max_price" in criteria and r.price_per_person > criteria["max_price"]: return False if "max_distance" in criteria and r.distance_miles > criteria["max_distance"]: return False if "near_airport" in criteria and criteria["near_airport"] and not r.near_airport: return False return True elif ct == "priority_order": higher = params.get("higher", "").lower() lower = params.get("lower", "").lower() higher_kept = any( ev.title.lower() == higher or higher in ev.title.lower() for ev in world.calendar.values() ) lower_moved = not any( ev.title.lower() == lower or lower in ev.title.lower() for ev in world.calendar.values() ) or any( em.get("to", "").lower() == lower or lower in em.get("body", "").lower() for em in world.emails_sent ) return higher_kept and lower_moved return False def _calendar_has_no_overlaps(world: WorldState) -> bool: events = list(world.calendar.values()) for i, a in enumerate(events): for b in events[i + 1:]: if a.date != b.date: continue a_start = _time_to_min(a.time) a_end = a_start + a.duration_min b_start = _time_to_min(b.time) b_end = b_start + b.duration_min if a_start < b_end and b_start < a_end: return False return True def _score_constraint_satisfaction(scenario: ScenarioDef, world: WorldState) -> Tuple[float, str]: if not scenario.constraints: return 1.0, "No constraints defined" met = sum(1 for c in scenario.constraints if _check_constraint(c, world)) total = len(scenario.constraints) score = met / total return score, f"{met}/{total} constraints met" def _score_conflict_resolution(world: WorldState) -> Tuple[float, str]: clean = _calendar_has_no_overlaps(world) return (1.0 if clean else 0.0), ("No calendar conflicts" if clean else "Calendar has overlapping events") def _score_commitment_coherence(world: WorldState) -> Tuple[float, str]: total = len(world.commitment_ledger) if total == 0: return 1.0, "No commitments created" violations = world.get_silent_violations() silent_count = len(violations) renegotiated = sum(1 for c in world.commitment_ledger if c.renegotiated_at is not None) honored = total - silent_count - renegotiated score = (total - silent_count) / total parts = [] if honored > 0: parts.append(f"{honored} honored") if renegotiated > 0: parts.append(f"{renegotiated} renegotiated") if silent_count > 0: parts.append(f"{silent_count} SILENTLY BROKEN") return score, " | ".join(parts) if parts else "OK" def _score_communication(scenario: ScenarioDef, world: WorldState) -> Tuple[float, str]: reqs = scenario.communication_requirements if not reqs: return 1.0, "No communication requirements" total_score = 0.0 feedback_parts: List[str] = [] for req in reqs: to_lower = req.to.lower() matching_emails = [ em for em in world.emails_sent if to_lower in em.get("to", "").lower() ] if not matching_emails: feedback_parts.append(f"MISSING email to {req.to}") continue best_score = 0.0 for em in matching_emails: body = em.get("body", "") + " " + em.get("subject", "") if req.required_keywords: ks, matched = _keyword_score(body, req.required_keywords, min_matches=1) best_score = max(best_score, ks) else: best_score = 1.0 total_score += best_score if best_score >= 1.0: feedback_parts.append(f"Email to {req.to}: full credit") elif best_score > 0: feedback_parts.append(f"Email to {req.to}: partial ({best_score:.1f})") else: feedback_parts.append(f"Email to {req.to}: missing keywords") score = total_score / len(reqs) if reqs else 1.0 return score, " | ".join(feedback_parts) def _score_step_efficiency(scenario: ScenarioDef, world: WorldState) -> Tuple[float, str]: optimal = scenario.optimal_steps actual = world.step_count if actual <= optimal: return 1.0, f"{actual} steps (optimal: {optimal})" penalty = (actual - optimal) * 0.1 score = max(0.0, 1.0 - penalty) return score, f"{actual} steps (optimal: {optimal}, penalty: -{penalty:.1f})" def grade_scenario( scenario: ScenarioDef, world: WorldState, ) -> Tuple[float, Dict[str, float], str]: """Returns ``(total_reward, breakdown, feedback)``.""" breakdown: Dict[str, float] = {} feedback_parts: List[str] = [] cs_score, cs_fb = _score_constraint_satisfaction(scenario, world) breakdown["constraint_satisfaction"] = round(cs_score * WEIGHTS["constraint_satisfaction"], 4) feedback_parts.append(f"[constraints] {cs_fb}") cr_score, cr_fb = _score_conflict_resolution(world) breakdown["conflict_resolution"] = round(cr_score * WEIGHTS["conflict_resolution"], 4) feedback_parts.append(f"[conflicts] {cr_fb}") cc_score, cc_fb = _score_commitment_coherence(world) breakdown["commitment_coherence"] = round(cc_score * WEIGHTS["commitment_coherence"], 4) feedback_parts.append(f"[commitments] {cc_fb}") cq_score, cq_fb = _score_communication(scenario, world) breakdown["communication_quality"] = round(cq_score * WEIGHTS["communication_quality"], 4) feedback_parts.append(f"[communication] {cq_fb}") se_score, se_fb = _score_step_efficiency(scenario, world) breakdown["step_efficiency"] = round(se_score * WEIGHTS["step_efficiency"], 4) feedback_parts.append(f"[efficiency] {se_fb}") total_reward = round(sum(breakdown.values()), 4) total_reward = max(0.01, min(0.99, total_reward)) feedback = " | ".join(feedback_parts) return total_reward, breakdown, feedback