Spaces:

Jayant2304
/

commitment-os

Sleeping

File size: 8,771 Bytes

"""Deterministic grading — 5-component reward for CommitmentOS.

Components:
  constraint_satisfaction (0.35) — binary per scenario constraint
  conflict_resolution     (0.20) — final calendar free of overlaps
  commitment_coherence    (0.20) — ledger violations penalised
  communication_quality   (0.15) — keyword matching on sent emails
  step_efficiency         (0.10) — fewer steps = higher score
"""

from __future__ import annotations

from typing import Any, Dict, List, Tuple

from server.domain import ScenarioDef
from server.world import WorldState, _time_to_min

WEIGHTS: Dict[str, float] = {
    "constraint_satisfaction": 0.35,
    "conflict_resolution": 0.20,
    "commitment_coherence": 0.20,
    "communication_quality": 0.15,
    "step_efficiency": 0.10,
}


def _keyword_score(text: str, keywords: List[str], min_matches: int = 2) -> Tuple[float, List[str]]:
    """0 hits -> 0.0, < min_matches -> 0.5 (partial), >= min_matches -> 1.0."""
    text_lower = text.lower()
    matched = [kw for kw in keywords if kw.lower() in text_lower]
    if len(matched) == 0:
        return 0.0, matched
    if len(matched) < min_matches:
        return 0.5, matched
    return 1.0, matched


def _check_constraint(constraint, world: WorldState) -> bool:
    """Evaluate a single ConstraintDef against the world state."""
    ct = constraint.check_type
    params = constraint.check_params

    if ct == "calendar_no_conflict":
        return _calendar_has_no_overlaps(world)

    elif ct == "event_exists":
        eid = params.get("event_id", "")
        return eid in world.calendar

    elif ct == "event_cancelled":
        eid = params.get("event_id", "")
        return eid not in world.calendar

    elif ct == "email_sent":
        to = params.get("to", "").lower()
        keywords = params.get("keywords", [])
        for em in world.emails_sent:
            if to in em.get("to", "").lower():
                if keywords:
                    score, _ = _keyword_score(em.get("body", ""), keywords, min_matches=1)
                    if score > 0:
                        return True
                else:
                    return True
        return False

    elif ct == "restaurant_match":
        name = params.get("name", "")
        if name:
            return world.booked_restaurant.lower() == name.lower()
        criteria = params.get("criteria", {})
        if not world.booked_restaurant:
            return False
        r = world.restaurants.get(world.booked_restaurant)
        if r is None:
            return False
        if "dietary" in criteria and criteria["dietary"].lower() not in [d.lower() for d in r.dietary_options]:
            return False
        if "max_price" in criteria and r.price_per_person > criteria["max_price"]:
            return False
        if "max_distance" in criteria and r.distance_miles > criteria["max_distance"]:
            return False
        if "near_airport" in criteria and criteria["near_airport"] and not r.near_airport:
            return False
        return True

    elif ct == "priority_order":
        higher = params.get("higher", "").lower()
        lower = params.get("lower", "").lower()
        higher_kept = any(
            ev.title.lower() == higher or higher in ev.title.lower()
            for ev in world.calendar.values()
        )
        lower_moved = not any(
            ev.title.lower() == lower or lower in ev.title.lower()
            for ev in world.calendar.values()
        ) or any(
            em.get("to", "").lower() == lower or lower in em.get("body", "").lower()
            for em in world.emails_sent
        )
        return higher_kept and lower_moved

    return False


def _calendar_has_no_overlaps(world: WorldState) -> bool:
    events = list(world.calendar.values())
    for i, a in enumerate(events):
        for b in events[i + 1:]:
            if a.date != b.date:
                continue
            a_start = _time_to_min(a.time)
            a_end = a_start + a.duration_min
            b_start = _time_to_min(b.time)
            b_end = b_start + b.duration_min
            if a_start < b_end and b_start < a_end:
                return False
    return True


def _score_constraint_satisfaction(scenario: ScenarioDef, world: WorldState) -> Tuple[float, str]:
    if not scenario.constraints:
        return 1.0, "No constraints defined"
    met = sum(1 for c in scenario.constraints if _check_constraint(c, world))
    total = len(scenario.constraints)
    score = met / total
    return score, f"{met}/{total} constraints met"


def _score_conflict_resolution(world: WorldState) -> Tuple[float, str]:
    clean = _calendar_has_no_overlaps(world)
    return (1.0 if clean else 0.0), ("No calendar conflicts" if clean else "Calendar has overlapping events")


def _score_commitment_coherence(world: WorldState) -> Tuple[float, str]:
    total = len(world.commitment_ledger)
    if total == 0:
        return 1.0, "No commitments created"
    violations = world.get_silent_violations()
    silent_count = len(violations)

    renegotiated = sum(1 for c in world.commitment_ledger if c.renegotiated_at is not None)
    honored = total - silent_count - renegotiated

    score = (total - silent_count) / total
    parts = []
    if honored > 0:
        parts.append(f"{honored} honored")
    if renegotiated > 0:
        parts.append(f"{renegotiated} renegotiated")
    if silent_count > 0:
        parts.append(f"{silent_count} SILENTLY BROKEN")
    return score, " | ".join(parts) if parts else "OK"


def _score_communication(scenario: ScenarioDef, world: WorldState) -> Tuple[float, str]:
    reqs = scenario.communication_requirements
    if not reqs:
        return 1.0, "No communication requirements"

    total_score = 0.0
    feedback_parts: List[str] = []
    for req in reqs:
        to_lower = req.to.lower()
        matching_emails = [
            em for em in world.emails_sent
            if to_lower in em.get("to", "").lower()
        ]
        if not matching_emails:
            feedback_parts.append(f"MISSING email to {req.to}")
            continue

        best_score = 0.0
        for em in matching_emails:
            body = em.get("body", "") + " " + em.get("subject", "")
            if req.required_keywords:
                ks, matched = _keyword_score(body, req.required_keywords, min_matches=1)
                best_score = max(best_score, ks)
            else:
                best_score = 1.0

        total_score += best_score
        if best_score >= 1.0:
            feedback_parts.append(f"Email to {req.to}: full credit")
        elif best_score > 0:
            feedback_parts.append(f"Email to {req.to}: partial ({best_score:.1f})")
        else:
            feedback_parts.append(f"Email to {req.to}: missing keywords")

    score = total_score / len(reqs) if reqs else 1.0
    return score, " | ".join(feedback_parts)


def _score_step_efficiency(scenario: ScenarioDef, world: WorldState) -> Tuple[float, str]:
    optimal = scenario.optimal_steps
    actual = world.step_count
    if actual <= optimal:
        return 1.0, f"{actual} steps (optimal: {optimal})"
    penalty = (actual - optimal) * 0.1
    score = max(0.0, 1.0 - penalty)
    return score, f"{actual} steps (optimal: {optimal}, penalty: -{penalty:.1f})"


def grade_scenario(
    scenario: ScenarioDef,
    world: WorldState,
) -> Tuple[float, Dict[str, float], str]:
    """Returns ``(total_reward, breakdown, feedback)``."""
    breakdown: Dict[str, float] = {}
    feedback_parts: List[str] = []

    cs_score, cs_fb = _score_constraint_satisfaction(scenario, world)
    breakdown["constraint_satisfaction"] = round(cs_score * WEIGHTS["constraint_satisfaction"], 4)
    feedback_parts.append(f"[constraints] {cs_fb}")

    cr_score, cr_fb = _score_conflict_resolution(world)
    breakdown["conflict_resolution"] = round(cr_score * WEIGHTS["conflict_resolution"], 4)
    feedback_parts.append(f"[conflicts] {cr_fb}")

    cc_score, cc_fb = _score_commitment_coherence(world)
    breakdown["commitment_coherence"] = round(cc_score * WEIGHTS["commitment_coherence"], 4)
    feedback_parts.append(f"[commitments] {cc_fb}")

    cq_score, cq_fb = _score_communication(scenario, world)
    breakdown["communication_quality"] = round(cq_score * WEIGHTS["communication_quality"], 4)
    feedback_parts.append(f"[communication] {cq_fb}")

    se_score, se_fb = _score_step_efficiency(scenario, world)
    breakdown["step_efficiency"] = round(se_score * WEIGHTS["step_efficiency"], 4)
    feedback_parts.append(f"[efficiency] {se_fb}")

    total_reward = round(sum(breakdown.values()), 4)
    total_reward = max(0.01, min(0.99, total_reward))

    feedback = " | ".join(feedback_parts)
    return total_reward, breakdown, feedback