Spaces:

5ivatej
/

meta-hackathon

Sleeping

File size: 14,702 Bytes

807d5cc

"""Deterministic seeker simulator with hidden internal state.

Why rule-based / deterministic?
-------------------------------
The OpenEnv graders must be reproducible. An LLM-driven seeker would make
reward non-deterministic and fail the "score variance check" in Phase 2 of
judging. We deliberately trade some linguistic realism for full determinism
so that the same action sequence always yields the same reward — a hard
requirement of the hackathon rubric ("graders deterministic and reproducible").

Design
------
The seeker is a finite-state machine with continuous hidden variables:

    distress   ∈ [0, 1]   — how emotionally overwhelmed the seeker feels
    trust      ∈ [0, 1]   — how safe the seeker feels with the agent
    openness   ∈ [0, 1]   — willingness to reveal the *true* issue
    revealed   ∈ {0, 1}   — has the core issue surfaced yet?
    stage      ∈ enum     — opening / exploring / reflecting / planning / closing

On each turn, the environment analyses the agent's reply with a small bank of
deterministic feature detectors (keyword/regex based), then applies a
transition rule to update the hidden state and pick the seeker's next
utterance from a scripted response tree indexed by (stage, features).
"""
from __future__ import annotations

import re
from dataclasses import dataclass, field
from enum import Enum
from typing import Dict, List, Tuple


class Stage(str, Enum):
    OPENING = "opening"
    EXPLORING = "exploring"
    REFLECTING = "reflecting"
    PLANNING = "planning"
    CLOSING = "closing"


# ---------------------------------------------------------------------------
# Feature detectors — deterministic text analysis of the agent's reply.
# ---------------------------------------------------------------------------

EMPATHY_PATTERNS = [
    r"\bi\s+(hear|understand|get|see)\s+(you|that|how)",
    r"\bthat\s+(sounds|must\s+be|seems)\b",
    r"\bit\s+makes\s+sense\b",
    r"\bi\s+can\s+imagine\b",
    r"\bthank\s+you\s+for\s+sharing\b",
    r"\bi'?m\s+(here|glad|sorry)\b",
]

VALIDATION_PATTERNS = [
    r"\byour\s+feelings?\s+(are|make)\s+(valid|sense)",
    r"\bit'?s\s+(okay|ok|normal|understandable)\s+to\s+feel",
    r"\banyone\s+would\s+feel\b",
    r"\bof\s+course\s+you\s+(feel|are)\b",
]

OPEN_QUESTION_PATTERNS = [
    r"\bhow\s+(are|do|did|does)\b",
    r"\bwhat\s+(is|are|do|does|has|makes|brought|happened)\b",
    r"\bcan\s+you\s+tell\s+me\s+more\b",
    r"\bwould\s+you\s+like\s+to\s+(talk|share)\b",
]

ADVICE_PATTERNS = [
    r"\byou\s+should\b",
    r"\byou\s+(need|have|ought)\s+to\b",
    r"\btry\s+(to|doing|this)\b",
    r"\bjust\s+(do|go|try|stop|start)\b",
    r"\bwhy\s+don'?t\s+you\b",
    r"\bmy\s+advice\b",
]

DISMISSIVE_PATTERNS = [
    r"\bget\s+over\s+it\b",
    r"\bstop\s+(complaining|whining|crying)\b",
    r"\byou'?re\s+overreacting\b",
    r"\bit'?s\s+not\s+a\s+big\s+deal\b",
    r"\bcalm\s+down\b",
    r"\bit\s+could\s+be\s+worse\b",
]

INTERROGATIVE_PATTERNS = [  # rapid-fire closed questions (trust drain when high)
    r"\?\s*\?",
]

SAFETY_PATTERNS = [
    r"\bare\s+you\s+safe\b",
    r"\bprofessional\s+help\b",
    r"\bcrisis\s+line\b",
    r"\btherapist\b",
]


def _count_matches(patterns: List[str], text: str) -> int:
    t = text.lower()
    return sum(1 for p in patterns if re.search(p, t))


@dataclass
class Features:
    empathy: int
    validation: int
    open_question: int
    advice: int
    dismissive: int
    interrogative: int
    safety: int
    length: int
    closed_question: int  # any '?' not matched by open
    bare: bool  # very short / empty reply


def extract_features(text: str) -> Features:
    stripped = (text or "").strip()
    lower = stripped.lower()
    empathy = _count_matches(EMPATHY_PATTERNS, lower)
    validation = _count_matches(VALIDATION_PATTERNS, lower)
    open_q = _count_matches(OPEN_QUESTION_PATTERNS, lower)
    advice = _count_matches(ADVICE_PATTERNS, lower)
    dismissive = _count_matches(DISMISSIVE_PATTERNS, lower)
    interrogative = _count_matches(INTERROGATIVE_PATTERNS, lower)
    safety = _count_matches(SAFETY_PATTERNS, lower)
    total_q = lower.count("?")
    closed_q = max(0, total_q - open_q)
    bare = len(stripped) < 8
    return Features(
        empathy=empathy,
        validation=validation,
        open_question=open_q,
        advice=advice,
        dismissive=dismissive,
        interrogative=interrogative,
        safety=safety,
        length=len(stripped),
        closed_question=closed_q,
        bare=bare,
    )


# ---------------------------------------------------------------------------
# Seeker state + scripted persona
# ---------------------------------------------------------------------------

@dataclass
class SeekerPersona:
    """Static configuration describing the seeker's initial state + script."""

    task_id: str
    scenario_brief: str
    surface_concern: str  # what seeker says at turn 0
    true_issue: str  # hidden; only revealed if openness crosses threshold
    initial_distress: float
    initial_trust: float
    initial_openness: float
    reveal_threshold: float  # openness value at which true_issue is revealed
    trust_fragility: float  # how much a misstep drops trust (0..1)
    openness_gain_per_empathy: float
    distress_drop_per_validation: float
    # Scripted utterances by stage when cooperative
    opening_lines: List[str]
    exploring_lines: List[str]
    reflecting_lines: List[str]
    planning_lines: List[str]
    closing_lines: List[str]
    reveal_line: str  # said the turn openness crosses reveal_threshold
    # Adverse reactions
    dismissed_lines: List[str] = field(default_factory=list)
    advice_too_early_lines: List[str] = field(default_factory=list)


@dataclass
class SeekerState:
    """Mutable hidden state updated each turn."""

    persona: SeekerPersona
    distress: float
    trust: float
    openness: float
    revealed: bool
    stage: Stage
    last_line_idx_by_stage: Dict[Stage, int]
    turn: int

    @classmethod
    def from_persona(cls, persona: SeekerPersona) -> "SeekerState":
        return cls(
            persona=persona,
            distress=persona.initial_distress,
            trust=persona.initial_trust,
            openness=persona.initial_openness,
            revealed=False,
            stage=Stage.OPENING,
            last_line_idx_by_stage={s: -1 for s in Stage},
            turn=0,
        )

    # Snapshot for lookahead simulation — must be cheap and pure.
    def snapshot(self) -> "SeekerState":
        return SeekerState(
            persona=self.persona,
            distress=self.distress,
            trust=self.trust,
            openness=self.openness,
            revealed=self.revealed,
            stage=self.stage,
            last_line_idx_by_stage=dict(self.last_line_idx_by_stage),
            turn=self.turn,
        )


def _clip(x: float) -> float:
    return max(0.0, min(1.0, x))


# Stage ordering used for "progress" scalar in [0,1]
STAGE_ORDER: List[Stage] = [
    Stage.OPENING,
    Stage.EXPLORING,
    Stage.REFLECTING,
    Stage.PLANNING,
    Stage.CLOSING,
]


def stage_progress(stage: Stage) -> float:
    return STAGE_ORDER.index(stage) / (len(STAGE_ORDER) - 1)


def resolution_score(state: SeekerState) -> float:
    """Scalar summary of how 'resolved' the conversation currently is, in [0,1].

    Weighted combination of stage progress, trust gained, distress relieved,
    and whether the true issue surfaced. This is the quantity the
    future-oriented reward tries to project forward under an oracle policy.
    """
    p = state.persona
    progress = stage_progress(state.stage)
    trust_gain = max(0.0, state.trust - p.initial_trust)
    distress_relief = max(0.0, p.initial_distress - state.distress)
    reveal_bonus = 1.0 if state.revealed else 0.0
    return _clip(
        0.40 * progress
        + 0.25 * trust_gain / max(1e-6, 1.0 - p.initial_trust)
        + 0.25 * distress_relief / max(1e-6, p.initial_distress)
        + 0.10 * reveal_bonus
    )


# ---------------------------------------------------------------------------
# Transition: given current state + agent features, produce new state +
# seeker's next utterance + transition info.
# ---------------------------------------------------------------------------

@dataclass
class Transition:
    new_state: SeekerState
    seeker_utterance: str
    flags: Dict[str, bool]  # e.g. {"dismissed": True, "advice_too_early": False, ...}


def _next_line(state: SeekerState, stage: Stage, pool: List[str]) -> str:
    if not pool:
        return "..."
    idx = (state.last_line_idx_by_stage[stage] + 1) % len(pool)
    state.last_line_idx_by_stage[stage] = idx
    return pool[idx]


def step_seeker(state: SeekerState, features: Features) -> Transition:
    """Apply one turn of seeker dynamics given the agent's extracted features.

    Pure-ish: mutates a *copy* of state (caller should pass a snapshot if they
    want to preserve the original — the env always passes the live state).
    """
    p = state.persona
    flags: Dict[str, bool] = {
        "dismissed": False,
        "advice_too_early": False,
        "bare_reply": features.bare,
        "empathic": features.empathy + features.validation > 0,
        "interrogated": False,
        "revealed_this_turn": False,
    }

    # --- 1. Dismissive / hostile language: hard drop on trust & distress spike.
    if features.dismissive > 0:
        state.trust = _clip(state.trust - 0.4 * (1.0 + p.trust_fragility))
        state.distress = _clip(state.distress + 0.15)
        state.openness = _clip(state.openness - 0.2)
        flags["dismissed"] = True

    # --- 2. Premature advice (advice before trust ≥ 0.55): trust drop, openness drop.
    if features.advice > 0 and state.trust < 0.55:
        state.trust = _clip(state.trust - 0.15 * (1.0 + p.trust_fragility))
        state.openness = _clip(state.openness - 0.1)
        flags["advice_too_early"] = True

    # --- 3. Empathy & validation: trust + openness up, distress down.
    if features.empathy > 0 or features.validation > 0:
        gain = p.openness_gain_per_empathy * (features.empathy + features.validation)
        state.trust = _clip(state.trust + 0.12 * (features.empathy + features.validation))
        state.openness = _clip(state.openness + gain)
        state.distress = _clip(state.distress - p.distress_drop_per_validation * features.validation)

    # --- 4. Open questions: small trust gain, nudges stage forward.
    if features.open_question > 0:
        state.trust = _clip(state.trust + 0.05)
        state.openness = _clip(state.openness + 0.04)

    # --- 5. Interrogation (many closed questions or multiple "?"): trust drain.
    if features.closed_question >= 3 or features.interrogative > 0:
        state.trust = _clip(state.trust - 0.1)
        flags["interrogated"] = True

    # --- 6. Bare / empty reply: small penalty across the board.
    if features.bare:
        state.trust = _clip(state.trust - 0.05)
        state.distress = _clip(state.distress + 0.02)

    # --- 7. Stage progression (monotonic forward with cooperative conditions).
    def advance_to(s: Stage) -> None:
        if STAGE_ORDER.index(s) > STAGE_ORDER.index(state.stage):
            state.stage = s

    if state.stage == Stage.OPENING and (
        features.empathy + features.validation + features.open_question > 0
    ):
        advance_to(Stage.EXPLORING)
    elif state.stage == Stage.EXPLORING and state.trust >= 0.5 and state.openness >= 0.5:
        advance_to(Stage.REFLECTING)
    elif state.stage == Stage.REFLECTING and state.revealed and state.distress <= 0.5:
        advance_to(Stage.PLANNING)
    elif state.stage == Stage.PLANNING and features.open_question + features.empathy > 0:
        advance_to(Stage.CLOSING)

    # --- 8. Reveal check (cross threshold once).
    if not state.revealed and state.openness >= p.reveal_threshold:
        state.revealed = True
        flags["revealed_this_turn"] = True

    # --- 9. Pick seeker's next utterance.
    if flags["dismissed"] and p.dismissed_lines:
        utterance = _next_line(state, state.stage, p.dismissed_lines)
    elif flags["advice_too_early"] and p.advice_too_early_lines:
        utterance = _next_line(state, state.stage, p.advice_too_early_lines)
    elif flags["revealed_this_turn"]:
        utterance = p.reveal_line
    else:
        pool_by_stage = {
            Stage.OPENING: p.opening_lines,
            Stage.EXPLORING: p.exploring_lines,
            Stage.REFLECTING: p.reflecting_lines,
            Stage.PLANNING: p.planning_lines,
            Stage.CLOSING: p.closing_lines,
        }
        utterance = _next_line(state, state.stage, pool_by_stage[state.stage])

    state.turn += 1
    return Transition(new_state=state, seeker_utterance=utterance, flags=flags)


# ---------------------------------------------------------------------------
# Oracle policy for the future-oriented reward lookahead.
# ---------------------------------------------------------------------------

def oracle_features(state: SeekerState) -> Features:
    """What the 'oracle' agent would do from this state.

    Picks the stage-appropriate ideal action:
      - opening/exploring: empathy + open question
      - reflecting: empathy + validation
      - planning: open question + mild advice (trust is high here)
      - closing: empathy + safety mention
    """
    s = state.stage
    if s in (Stage.OPENING, Stage.EXPLORING):
        return Features(
            empathy=1, validation=0, open_question=1, advice=0,
            dismissive=0, interrogative=0, safety=0, length=80,
            closed_question=0, bare=False,
        )
    if s == Stage.REFLECTING:
        return Features(
            empathy=1, validation=1, open_question=0, advice=0,
            dismissive=0, interrogative=0, safety=0, length=90,
            closed_question=0, bare=False,
        )
    if s == Stage.PLANNING:
        return Features(
            empathy=0, validation=0, open_question=1, advice=1,
            dismissive=0, interrogative=0, safety=0, length=90,
            closed_question=0, bare=False,
        )
    return Features(  # CLOSING
        empathy=1, validation=0, open_question=0, advice=0,
        dismissive=0, interrogative=0, safety=1, length=90,
        closed_question=0, bare=False,
    )


def simulate_oracle_rollout(state: SeekerState, k: int) -> float:
    """Run the oracle policy from a snapshot for k steps and return the final
    resolution_score. Used by the future-oriented reward."""
    sim = state.snapshot()
    for _ in range(k):
        step_seeker(sim, oracle_features(sim))
    return resolution_score(sim)