Spaces:

ashutosh111
/

negotiation-arena

Sleeping

File size: 12,527 Bytes

21a8fc4

"""Free, fast Client opponent used during training. NO API calls.

The ScriptedClient implements a deterministic-given-seed negotiation policy
in three phases (open / mid / end) with three strategy variants
(aggressive / balanced / conciliatory).
"""
from __future__ import annotations

import random
from typing import Any, Dict, List, Optional

from .utility import compute_utility


class ScriptedClient:
    STRATEGIES = ("aggressive", "balanced", "conciliatory")

    def __init__(
        self,
        brief: Dict[str, Any],
        fixture: Dict[str, Any],
        strategy: str = "balanced",
        seed: Optional[int] = None,
    ):
        if strategy not in self.STRATEGIES:
            strategy = "balanced"
        self.brief = brief
        self.fixture = fixture
        self.strategy = strategy
        self.rng = random.Random(seed if seed is not None else 0)
        self.max_turns = int(fixture.get("max_turns", 30))

        # Track what we've conceded on so we don't keep folding on the same issue.
        self._concessions: Dict[str, int] = {}
        # Track our most recent counter so we can be consistent.
        self._last_counter: Optional[Dict[str, Any]] = None

        # Per-strategy parameters
        if strategy == "aggressive":
            self._accept_threshold = 0.65
            self._concession_step = 0.10
            self._walk_threshold = 0.45
        elif strategy == "conciliatory":
            self._accept_threshold = 0.45
            self._concession_step = 0.25
            self._walk_threshold = 0.30
        else:  # balanced
            self._accept_threshold = 0.55
            self._concession_step = 0.18
            self._walk_threshold = 0.40

    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------
    def respond(
        self,
        vendor_action: Dict[str, Any],
        history: List[Dict[str, Any]],
        turn: int,
    ) -> Dict[str, Any]:
        """Return a NegotiationAction-like dict representing the client's move."""
        if vendor_action is None:
            vendor_action = {}

        # If vendor walked away, mirror that (terminal handled by arena).
        if vendor_action.get("action_type") == "walk_away":
            return self._build_walkaway("vendor walked first")

        # If vendor accepted: client's response is moot (arena already locks deal).
        if vendor_action.get("action_type") == "accept_offer":
            return {
                "action_type": "accept_offer",
                "agent_role": "client",
                "reasoning": "vendor already accepted",
            }

        # Evaluate the most current proposed terms (vendor's, falling back
        # to whatever's on the table from the offer history).
        proposed = self._extract_proposed_terms(vendor_action, history)
        client_util = self._evaluate_offer(proposed)

        phase = self._phase(turn)

        # Hard dealbreaker: walk if the proposed deal violates ours.
        if proposed and self._violates_dealbreaker(proposed):
            return self._build_walkaway("vendor proposal hits a dealbreaker")

        if phase == "end":
            # Last 5 turns: if acceptable, accept; else walk if too low.
            if client_util >= self._accept_threshold and proposed:
                return {
                    "action_type": "accept_offer",
                    "agent_role": "client",
                    "proposed_terms": proposed,
                    "reasoning": (
                        f"acceptable end-game utility ({client_util:.2f}) "
                        f"under {self.strategy} strategy"
                    ),
                }
            if client_util < self._walk_threshold:
                return self._build_walkaway(
                    f"end-game utility {client_util:.2f} below walk threshold"
                )
            # Otherwise make one more counter
            return self._craft_counter_offer(proposed, turn, end_game=True)

        if phase == "open":
            # Demand near-ideal terms. If vendor opener is already great, accept.
            if client_util >= 0.85 and proposed:
                return {
                    "action_type": "accept_offer",
                    "agent_role": "client",
                    "proposed_terms": proposed,
                    "reasoning": "vendor opening already meets our ideal",
                }
            return self._craft_counter_offer(proposed, turn, end_game=False, opening=True)

        # Mid-game: reciprocate concessions / counter
        return self._craft_counter_offer(proposed, turn, end_game=False, opening=False)

    # ------------------------------------------------------------------
    # Helpers
    # ------------------------------------------------------------------
    def _phase(self, turn: int) -> str:
        if turn <= 3:
            return "open"
        if turn >= self.max_turns - 4:
            return "end"
        return "mid"

    def _extract_proposed_terms(
        self, vendor_action: Dict[str, Any], history: List[Dict[str, Any]]
    ) -> Dict[str, Any]:
        terms = dict(vendor_action.get("proposed_terms") or {})
        # If vendor used concede/demand, fold the single-issue change
        # into the running terms from history.
        if vendor_action.get("action_type") in ("concede", "demand"):
            running = self._latest_terms_from_history(history) or {}
            running = dict(running)
            issue = vendor_action.get("issue_name")
            new_value = vendor_action.get("new_value")
            if issue is not None and new_value is not None:
                running[issue] = new_value
            return running
        if not terms:
            return self._latest_terms_from_history(history) or {}
        return terms

    def _latest_terms_from_history(
        self, history: List[Dict[str, Any]]
    ) -> Optional[Dict[str, Any]]:
        for entry in reversed(history):
            if entry.get("proposed_terms"):
                return dict(entry["proposed_terms"])
        return None

    def _evaluate_offer(self, terms: Dict[str, Any]) -> float:
        """compute client's utility for the proposed terms."""
        if not terms:
            return 0.0
        return compute_utility(terms, self.brief, self.fixture)

    def _violates_dealbreaker(self, terms: Dict[str, Any]) -> bool:
        from .utility import _is_dealbreaker_violated  # local import to avoid cycles

        return _is_dealbreaker_violated(terms, self.brief.get("dealbreakers"))

    def _should_walk_away(self, turn: int, history: List[Dict[str, Any]]) -> bool:
        latest = self._latest_terms_from_history(history)
        if not latest:
            return False
        util = self._evaluate_offer(latest)
        if self._violates_dealbreaker(latest):
            return True
        if turn >= self.max_turns - 1 and util < self._walk_threshold:
            return True
        return False

    def _pick_concession_issue(
        self, current_terms: Dict[str, Any]
    ) -> Optional[str]:
        """Pick the lowest-priority issue we haven't already conceded much on."""
        priorities: Dict[str, float] = self.brief.get("private_priorities", {})
        # Sort issues ascending by priority — concede on cheapest first.
        ordered = sorted(priorities.items(), key=lambda kv: (kv[1], kv[0]))
        for name, _ in ordered:
            if self._concessions.get(name, 0) >= 3:
                continue
            if name in current_terms:
                return name
        return ordered[0][0] if ordered else None

    def _craft_counter_offer(
        self,
        vendor_terms: Dict[str, Any],
        turn: int,
        end_game: bool,
        opening: bool = False,
    ) -> Dict[str, Any]:
        """Produce a counter offer that nudges the deal toward our ideals.

        The strength of the nudge depends on the strategy and phase.
        """
        ideals: Dict[str, Any] = self.brief.get("ideal_outcomes", {})
        walkaways: Dict[str, Any] = self.brief.get("walkaway_thresholds", {})
        value_maps: Dict[str, Dict[str, float]] = self.brief.get("value_maps", {})
        priorities: Dict[str, float] = self.brief.get("private_priorities", {})

        # Start from whatever's currently proposed; if empty, start from our opening.
        if vendor_terms:
            running = dict(vendor_terms)
        else:
            running = {
                issue["name"]: issue["client_opening"]
                for issue in self.fixture["issues"]
            }
            self._last_counter = dict(running)
            return {
                "action_type": "counter_offer",
                "agent_role": "client",
                "proposed_terms": running,
                "reasoning": "client opening counter-offer",
            }

        # Pick how aggressively we move toward the vendor's terms.
        if opening:
            # Stick near our opening; only cosmetic concessions.
            move_fraction = 0.05
        else:
            move_fraction = self._concession_step * (1.4 if end_game else 1.0)

        for issue in self.fixture["issues"]:
            name = issue["name"]
            if name not in running:
                running[name] = issue["client_opening"]

            current = running[name]
            ideal = ideals.get(name)
            walk = walkaways.get(name)

            # On HIGH-priority issues, demand harder (push toward ideal).
            # On LOW-priority issues, concede toward the vendor's value.
            prio = priorities.get(name, 0.0)

            if issue["type"] == "numeric":
                try:
                    cur_v = float(current)
                    ideal_v = float(ideal)
                    walk_v = float(walk)
                except (TypeError, ValueError):
                    continue

                if prio >= 0.20:
                    # demand toward our ideal
                    target = ideal_v
                else:
                    # concede toward vendor's last value (if any) but not past walkaway
                    target = walk_v
                    self._concessions[name] = self._concessions.get(name, 0) + 1

                new_v = cur_v + (target - cur_v) * move_fraction
                # Round numerics sensibly
                if isinstance(issue["vendor_opening"], int) and isinstance(
                    issue["client_opening"], int
                ):
                    new_v = int(round(new_v))
                else:
                    new_v = round(new_v, 2)
                running[name] = new_v

            elif issue["type"] == "categorical":
                vmap = value_maps.get(name, {})
                if not vmap:
                    continue
                # If vendor proposal is already 'good enough' for us, accept it.
                cur_score = vmap.get(str(current), 0.0)
                if prio >= 0.20 and cur_score < 0.5:
                    # demand our ideal
                    running[name] = ideal
                elif prio < 0.20 and cur_score < 0.4 and not opening:
                    # concede: pick the next-best value above current
                    sorted_vals = sorted(vmap.items(), key=lambda kv: kv[1])
                    # Find a value strictly worse for us than ideal but better than current
                    for opt, sc in sorted_vals:
                        if sc > cur_score and sc < 1.0:
                            running[name] = opt
                            self._concessions[name] = self._concessions.get(name, 0) + 1
                            break

        # Final dealbreaker safety check: if our own counter would violate
        # our dealbreakers, refuse to send it and walk instead.
        if self._violates_dealbreaker(running):
            return self._build_walkaway("our own counter would violate our dealbreaker")

        self._last_counter = dict(running)
        return {
            "action_type": "counter_offer",
            "agent_role": "client",
            "proposed_terms": running,
            "reasoning": (
                f"{self.strategy} {'end-game' if end_game else ('opening' if opening else 'mid-game')} counter"
            ),
        }

    def _build_walkaway(self, reason: str) -> Dict[str, Any]:
        return {
            "action_type": "walk_away",
            "agent_role": "client",
            "reasoning": reason,
        }