"""Free, fast Client opponent used during training. NO API calls. The ScriptedClient implements a deterministic-given-seed negotiation policy in three phases (open / mid / end) with three strategy variants (aggressive / balanced / conciliatory). """ from __future__ import annotations import random from typing import Any, Dict, List, Optional from .utility import compute_utility class ScriptedClient: STRATEGIES = ("aggressive", "balanced", "conciliatory") def __init__( self, brief: Dict[str, Any], fixture: Dict[str, Any], strategy: str = "balanced", seed: Optional[int] = None, ): if strategy not in self.STRATEGIES: strategy = "balanced" self.brief = brief self.fixture = fixture self.strategy = strategy self.rng = random.Random(seed if seed is not None else 0) self.max_turns = int(fixture.get("max_turns", 30)) # Track what we've conceded on so we don't keep folding on the same issue. self._concessions: Dict[str, int] = {} # Track our most recent counter so we can be consistent. self._last_counter: Optional[Dict[str, Any]] = None # Per-strategy parameters if strategy == "aggressive": self._accept_threshold = 0.65 self._concession_step = 0.10 self._walk_threshold = 0.45 elif strategy == "conciliatory": self._accept_threshold = 0.45 self._concession_step = 0.25 self._walk_threshold = 0.30 else: # balanced self._accept_threshold = 0.55 self._concession_step = 0.18 self._walk_threshold = 0.40 # ------------------------------------------------------------------ # Public API # ------------------------------------------------------------------ def respond( self, vendor_action: Dict[str, Any], history: List[Dict[str, Any]], turn: int, ) -> Dict[str, Any]: """Return a NegotiationAction-like dict representing the client's move.""" if vendor_action is None: vendor_action = {} # If vendor walked away, mirror that (terminal handled by arena). if vendor_action.get("action_type") == "walk_away": return self._build_walkaway("vendor walked first") # If vendor accepted: client's response is moot (arena already locks deal). if vendor_action.get("action_type") == "accept_offer": return { "action_type": "accept_offer", "agent_role": "client", "reasoning": "vendor already accepted", } # Evaluate the most current proposed terms (vendor's, falling back # to whatever's on the table from the offer history). proposed = self._extract_proposed_terms(vendor_action, history) client_util = self._evaluate_offer(proposed) phase = self._phase(turn) # Hard dealbreaker: walk if the proposed deal violates ours. if proposed and self._violates_dealbreaker(proposed): return self._build_walkaway("vendor proposal hits a dealbreaker") if phase == "end": # Last 5 turns: if acceptable, accept; else walk if too low. if client_util >= self._accept_threshold and proposed: return { "action_type": "accept_offer", "agent_role": "client", "proposed_terms": proposed, "reasoning": ( f"acceptable end-game utility ({client_util:.2f}) " f"under {self.strategy} strategy" ), } if client_util < self._walk_threshold: return self._build_walkaway( f"end-game utility {client_util:.2f} below walk threshold" ) # Otherwise make one more counter return self._craft_counter_offer(proposed, turn, end_game=True) if phase == "open": # Demand near-ideal terms. If vendor opener is already great, accept. if client_util >= 0.85 and proposed: return { "action_type": "accept_offer", "agent_role": "client", "proposed_terms": proposed, "reasoning": "vendor opening already meets our ideal", } return self._craft_counter_offer(proposed, turn, end_game=False, opening=True) # Mid-game: reciprocate concessions / counter return self._craft_counter_offer(proposed, turn, end_game=False, opening=False) # ------------------------------------------------------------------ # Helpers # ------------------------------------------------------------------ def _phase(self, turn: int) -> str: if turn <= 3: return "open" if turn >= self.max_turns - 4: return "end" return "mid" def _extract_proposed_terms( self, vendor_action: Dict[str, Any], history: List[Dict[str, Any]] ) -> Dict[str, Any]: terms = dict(vendor_action.get("proposed_terms") or {}) # If vendor used concede/demand, fold the single-issue change # into the running terms from history. if vendor_action.get("action_type") in ("concede", "demand"): running = self._latest_terms_from_history(history) or {} running = dict(running) issue = vendor_action.get("issue_name") new_value = vendor_action.get("new_value") if issue is not None and new_value is not None: running[issue] = new_value return running if not terms: return self._latest_terms_from_history(history) or {} return terms def _latest_terms_from_history( self, history: List[Dict[str, Any]] ) -> Optional[Dict[str, Any]]: for entry in reversed(history): if entry.get("proposed_terms"): return dict(entry["proposed_terms"]) return None def _evaluate_offer(self, terms: Dict[str, Any]) -> float: """compute client's utility for the proposed terms.""" if not terms: return 0.0 return compute_utility(terms, self.brief, self.fixture) def _violates_dealbreaker(self, terms: Dict[str, Any]) -> bool: from .utility import _is_dealbreaker_violated # local import to avoid cycles return _is_dealbreaker_violated(terms, self.brief.get("dealbreakers")) def _should_walk_away(self, turn: int, history: List[Dict[str, Any]]) -> bool: latest = self._latest_terms_from_history(history) if not latest: return False util = self._evaluate_offer(latest) if self._violates_dealbreaker(latest): return True if turn >= self.max_turns - 1 and util < self._walk_threshold: return True return False def _pick_concession_issue( self, current_terms: Dict[str, Any] ) -> Optional[str]: """Pick the lowest-priority issue we haven't already conceded much on.""" priorities: Dict[str, float] = self.brief.get("private_priorities", {}) # Sort issues ascending by priority — concede on cheapest first. ordered = sorted(priorities.items(), key=lambda kv: (kv[1], kv[0])) for name, _ in ordered: if self._concessions.get(name, 0) >= 3: continue if name in current_terms: return name return ordered[0][0] if ordered else None def _craft_counter_offer( self, vendor_terms: Dict[str, Any], turn: int, end_game: bool, opening: bool = False, ) -> Dict[str, Any]: """Produce a counter offer that nudges the deal toward our ideals. The strength of the nudge depends on the strategy and phase. """ ideals: Dict[str, Any] = self.brief.get("ideal_outcomes", {}) walkaways: Dict[str, Any] = self.brief.get("walkaway_thresholds", {}) value_maps: Dict[str, Dict[str, float]] = self.brief.get("value_maps", {}) priorities: Dict[str, float] = self.brief.get("private_priorities", {}) # Start from whatever's currently proposed; if empty, start from our opening. if vendor_terms: running = dict(vendor_terms) else: running = { issue["name"]: issue["client_opening"] for issue in self.fixture["issues"] } self._last_counter = dict(running) return { "action_type": "counter_offer", "agent_role": "client", "proposed_terms": running, "reasoning": "client opening counter-offer", } # Pick how aggressively we move toward the vendor's terms. if opening: # Stick near our opening; only cosmetic concessions. move_fraction = 0.05 else: move_fraction = self._concession_step * (1.4 if end_game else 1.0) for issue in self.fixture["issues"]: name = issue["name"] if name not in running: running[name] = issue["client_opening"] current = running[name] ideal = ideals.get(name) walk = walkaways.get(name) # On HIGH-priority issues, demand harder (push toward ideal). # On LOW-priority issues, concede toward the vendor's value. prio = priorities.get(name, 0.0) if issue["type"] == "numeric": try: cur_v = float(current) ideal_v = float(ideal) walk_v = float(walk) except (TypeError, ValueError): continue if prio >= 0.20: # demand toward our ideal target = ideal_v else: # concede toward vendor's last value (if any) but not past walkaway target = walk_v self._concessions[name] = self._concessions.get(name, 0) + 1 new_v = cur_v + (target - cur_v) * move_fraction # Round numerics sensibly if isinstance(issue["vendor_opening"], int) and isinstance( issue["client_opening"], int ): new_v = int(round(new_v)) else: new_v = round(new_v, 2) running[name] = new_v elif issue["type"] == "categorical": vmap = value_maps.get(name, {}) if not vmap: continue # If vendor proposal is already 'good enough' for us, accept it. cur_score = vmap.get(str(current), 0.0) if prio >= 0.20 and cur_score < 0.5: # demand our ideal running[name] = ideal elif prio < 0.20 and cur_score < 0.4 and not opening: # concede: pick the next-best value above current sorted_vals = sorted(vmap.items(), key=lambda kv: kv[1]) # Find a value strictly worse for us than ideal but better than current for opt, sc in sorted_vals: if sc > cur_score and sc < 1.0: running[name] = opt self._concessions[name] = self._concessions.get(name, 0) + 1 break # Final dealbreaker safety check: if our own counter would violate # our dealbreakers, refuse to send it and walk instead. if self._violates_dealbreaker(running): return self._build_walkaway("our own counter would violate our dealbreaker") self._last_counter = dict(running) return { "action_type": "counter_offer", "agent_role": "client", "proposed_terms": running, "reasoning": ( f"{self.strategy} {'end-game' if end_game else ('opening' if opening else 'mid-game')} counter" ), } def _build_walkaway(self, reason: str) -> Dict[str, Any]: return { "action_type": "walk_away", "agent_role": "client", "reasoning": reason, }