Spaces:
Sleeping
Sleeping
| """Free, fast Client opponent used during training. NO API calls. | |
| The ScriptedClient implements a deterministic-given-seed negotiation policy | |
| in three phases (open / mid / end) with three strategy variants | |
| (aggressive / balanced / conciliatory). | |
| """ | |
| from __future__ import annotations | |
| import random | |
| from typing import Any, Dict, List, Optional | |
| from .utility import compute_utility | |
| class ScriptedClient: | |
| STRATEGIES = ("aggressive", "balanced", "conciliatory") | |
| def __init__( | |
| self, | |
| brief: Dict[str, Any], | |
| fixture: Dict[str, Any], | |
| strategy: str = "balanced", | |
| seed: Optional[int] = None, | |
| ): | |
| if strategy not in self.STRATEGIES: | |
| strategy = "balanced" | |
| self.brief = brief | |
| self.fixture = fixture | |
| self.strategy = strategy | |
| self.rng = random.Random(seed if seed is not None else 0) | |
| self.max_turns = int(fixture.get("max_turns", 30)) | |
| # Track what we've conceded on so we don't keep folding on the same issue. | |
| self._concessions: Dict[str, int] = {} | |
| # Track our most recent counter so we can be consistent. | |
| self._last_counter: Optional[Dict[str, Any]] = None | |
| # Per-strategy parameters | |
| if strategy == "aggressive": | |
| self._accept_threshold = 0.65 | |
| self._concession_step = 0.10 | |
| self._walk_threshold = 0.45 | |
| elif strategy == "conciliatory": | |
| self._accept_threshold = 0.45 | |
| self._concession_step = 0.25 | |
| self._walk_threshold = 0.30 | |
| else: # balanced | |
| self._accept_threshold = 0.55 | |
| self._concession_step = 0.18 | |
| self._walk_threshold = 0.40 | |
| # ------------------------------------------------------------------ | |
| # Public API | |
| # ------------------------------------------------------------------ | |
| def respond( | |
| self, | |
| vendor_action: Dict[str, Any], | |
| history: List[Dict[str, Any]], | |
| turn: int, | |
| ) -> Dict[str, Any]: | |
| """Return a NegotiationAction-like dict representing the client's move.""" | |
| if vendor_action is None: | |
| vendor_action = {} | |
| # If vendor walked away, mirror that (terminal handled by arena). | |
| if vendor_action.get("action_type") == "walk_away": | |
| return self._build_walkaway("vendor walked first") | |
| # If vendor accepted: client's response is moot (arena already locks deal). | |
| if vendor_action.get("action_type") == "accept_offer": | |
| return { | |
| "action_type": "accept_offer", | |
| "agent_role": "client", | |
| "reasoning": "vendor already accepted", | |
| } | |
| # Evaluate the most current proposed terms (vendor's, falling back | |
| # to whatever's on the table from the offer history). | |
| proposed = self._extract_proposed_terms(vendor_action, history) | |
| client_util = self._evaluate_offer(proposed) | |
| phase = self._phase(turn) | |
| # Hard dealbreaker: walk if the proposed deal violates ours. | |
| if proposed and self._violates_dealbreaker(proposed): | |
| return self._build_walkaway("vendor proposal hits a dealbreaker") | |
| if phase == "end": | |
| # Last 5 turns: if acceptable, accept; else walk if too low. | |
| if client_util >= self._accept_threshold and proposed: | |
| return { | |
| "action_type": "accept_offer", | |
| "agent_role": "client", | |
| "proposed_terms": proposed, | |
| "reasoning": ( | |
| f"acceptable end-game utility ({client_util:.2f}) " | |
| f"under {self.strategy} strategy" | |
| ), | |
| } | |
| if client_util < self._walk_threshold: | |
| return self._build_walkaway( | |
| f"end-game utility {client_util:.2f} below walk threshold" | |
| ) | |
| # Otherwise make one more counter | |
| return self._craft_counter_offer(proposed, turn, end_game=True) | |
| if phase == "open": | |
| # Demand near-ideal terms. If vendor opener is already great, accept. | |
| if client_util >= 0.85 and proposed: | |
| return { | |
| "action_type": "accept_offer", | |
| "agent_role": "client", | |
| "proposed_terms": proposed, | |
| "reasoning": "vendor opening already meets our ideal", | |
| } | |
| return self._craft_counter_offer(proposed, turn, end_game=False, opening=True) | |
| # Mid-game: reciprocate concessions / counter | |
| return self._craft_counter_offer(proposed, turn, end_game=False, opening=False) | |
| # ------------------------------------------------------------------ | |
| # Helpers | |
| # ------------------------------------------------------------------ | |
| def _phase(self, turn: int) -> str: | |
| if turn <= 3: | |
| return "open" | |
| if turn >= self.max_turns - 4: | |
| return "end" | |
| return "mid" | |
| def _extract_proposed_terms( | |
| self, vendor_action: Dict[str, Any], history: List[Dict[str, Any]] | |
| ) -> Dict[str, Any]: | |
| terms = dict(vendor_action.get("proposed_terms") or {}) | |
| # If vendor used concede/demand, fold the single-issue change | |
| # into the running terms from history. | |
| if vendor_action.get("action_type") in ("concede", "demand"): | |
| running = self._latest_terms_from_history(history) or {} | |
| running = dict(running) | |
| issue = vendor_action.get("issue_name") | |
| new_value = vendor_action.get("new_value") | |
| if issue is not None and new_value is not None: | |
| running[issue] = new_value | |
| return running | |
| if not terms: | |
| return self._latest_terms_from_history(history) or {} | |
| return terms | |
| def _latest_terms_from_history( | |
| self, history: List[Dict[str, Any]] | |
| ) -> Optional[Dict[str, Any]]: | |
| for entry in reversed(history): | |
| if entry.get("proposed_terms"): | |
| return dict(entry["proposed_terms"]) | |
| return None | |
| def _evaluate_offer(self, terms: Dict[str, Any]) -> float: | |
| """compute client's utility for the proposed terms.""" | |
| if not terms: | |
| return 0.0 | |
| return compute_utility(terms, self.brief, self.fixture) | |
| def _violates_dealbreaker(self, terms: Dict[str, Any]) -> bool: | |
| from .utility import _is_dealbreaker_violated # local import to avoid cycles | |
| return _is_dealbreaker_violated(terms, self.brief.get("dealbreakers")) | |
| def _should_walk_away(self, turn: int, history: List[Dict[str, Any]]) -> bool: | |
| latest = self._latest_terms_from_history(history) | |
| if not latest: | |
| return False | |
| util = self._evaluate_offer(latest) | |
| if self._violates_dealbreaker(latest): | |
| return True | |
| if turn >= self.max_turns - 1 and util < self._walk_threshold: | |
| return True | |
| return False | |
| def _pick_concession_issue( | |
| self, current_terms: Dict[str, Any] | |
| ) -> Optional[str]: | |
| """Pick the lowest-priority issue we haven't already conceded much on.""" | |
| priorities: Dict[str, float] = self.brief.get("private_priorities", {}) | |
| # Sort issues ascending by priority — concede on cheapest first. | |
| ordered = sorted(priorities.items(), key=lambda kv: (kv[1], kv[0])) | |
| for name, _ in ordered: | |
| if self._concessions.get(name, 0) >= 3: | |
| continue | |
| if name in current_terms: | |
| return name | |
| return ordered[0][0] if ordered else None | |
| def _craft_counter_offer( | |
| self, | |
| vendor_terms: Dict[str, Any], | |
| turn: int, | |
| end_game: bool, | |
| opening: bool = False, | |
| ) -> Dict[str, Any]: | |
| """Produce a counter offer that nudges the deal toward our ideals. | |
| The strength of the nudge depends on the strategy and phase. | |
| """ | |
| ideals: Dict[str, Any] = self.brief.get("ideal_outcomes", {}) | |
| walkaways: Dict[str, Any] = self.brief.get("walkaway_thresholds", {}) | |
| value_maps: Dict[str, Dict[str, float]] = self.brief.get("value_maps", {}) | |
| priorities: Dict[str, float] = self.brief.get("private_priorities", {}) | |
| # Start from whatever's currently proposed; if empty, start from our opening. | |
| if vendor_terms: | |
| running = dict(vendor_terms) | |
| else: | |
| running = { | |
| issue["name"]: issue["client_opening"] | |
| for issue in self.fixture["issues"] | |
| } | |
| self._last_counter = dict(running) | |
| return { | |
| "action_type": "counter_offer", | |
| "agent_role": "client", | |
| "proposed_terms": running, | |
| "reasoning": "client opening counter-offer", | |
| } | |
| # Pick how aggressively we move toward the vendor's terms. | |
| if opening: | |
| # Stick near our opening; only cosmetic concessions. | |
| move_fraction = 0.05 | |
| else: | |
| move_fraction = self._concession_step * (1.4 if end_game else 1.0) | |
| for issue in self.fixture["issues"]: | |
| name = issue["name"] | |
| if name not in running: | |
| running[name] = issue["client_opening"] | |
| current = running[name] | |
| ideal = ideals.get(name) | |
| walk = walkaways.get(name) | |
| # On HIGH-priority issues, demand harder (push toward ideal). | |
| # On LOW-priority issues, concede toward the vendor's value. | |
| prio = priorities.get(name, 0.0) | |
| if issue["type"] == "numeric": | |
| try: | |
| cur_v = float(current) | |
| ideal_v = float(ideal) | |
| walk_v = float(walk) | |
| except (TypeError, ValueError): | |
| continue | |
| if prio >= 0.20: | |
| # demand toward our ideal | |
| target = ideal_v | |
| else: | |
| # concede toward vendor's last value (if any) but not past walkaway | |
| target = walk_v | |
| self._concessions[name] = self._concessions.get(name, 0) + 1 | |
| new_v = cur_v + (target - cur_v) * move_fraction | |
| # Round numerics sensibly | |
| if isinstance(issue["vendor_opening"], int) and isinstance( | |
| issue["client_opening"], int | |
| ): | |
| new_v = int(round(new_v)) | |
| else: | |
| new_v = round(new_v, 2) | |
| running[name] = new_v | |
| elif issue["type"] == "categorical": | |
| vmap = value_maps.get(name, {}) | |
| if not vmap: | |
| continue | |
| # If vendor proposal is already 'good enough' for us, accept it. | |
| cur_score = vmap.get(str(current), 0.0) | |
| if prio >= 0.20 and cur_score < 0.5: | |
| # demand our ideal | |
| running[name] = ideal | |
| elif prio < 0.20 and cur_score < 0.4 and not opening: | |
| # concede: pick the next-best value above current | |
| sorted_vals = sorted(vmap.items(), key=lambda kv: kv[1]) | |
| # Find a value strictly worse for us than ideal but better than current | |
| for opt, sc in sorted_vals: | |
| if sc > cur_score and sc < 1.0: | |
| running[name] = opt | |
| self._concessions[name] = self._concessions.get(name, 0) + 1 | |
| break | |
| # Final dealbreaker safety check: if our own counter would violate | |
| # our dealbreakers, refuse to send it and walk instead. | |
| if self._violates_dealbreaker(running): | |
| return self._build_walkaway("our own counter would violate our dealbreaker") | |
| self._last_counter = dict(running) | |
| return { | |
| "action_type": "counter_offer", | |
| "agent_role": "client", | |
| "proposed_terms": running, | |
| "reasoning": ( | |
| f"{self.strategy} {'end-game' if end_game else ('opening' if opening else 'mid-game')} counter" | |
| ), | |
| } | |
| def _build_walkaway(self, reason: str) -> Dict[str, Any]: | |
| return { | |
| "action_type": "walk_away", | |
| "agent_role": "client", | |
| "reasoning": reason, | |
| } | |