negotiation-arena / server /scripted_client.py
Ashutosh Kumar
Negotiation Arena V2: full OpenEnv environment
21a8fc4
"""Free, fast Client opponent used during training. NO API calls.
The ScriptedClient implements a deterministic-given-seed negotiation policy
in three phases (open / mid / end) with three strategy variants
(aggressive / balanced / conciliatory).
"""
from __future__ import annotations
import random
from typing import Any, Dict, List, Optional
from .utility import compute_utility
class ScriptedClient:
STRATEGIES = ("aggressive", "balanced", "conciliatory")
def __init__(
self,
brief: Dict[str, Any],
fixture: Dict[str, Any],
strategy: str = "balanced",
seed: Optional[int] = None,
):
if strategy not in self.STRATEGIES:
strategy = "balanced"
self.brief = brief
self.fixture = fixture
self.strategy = strategy
self.rng = random.Random(seed if seed is not None else 0)
self.max_turns = int(fixture.get("max_turns", 30))
# Track what we've conceded on so we don't keep folding on the same issue.
self._concessions: Dict[str, int] = {}
# Track our most recent counter so we can be consistent.
self._last_counter: Optional[Dict[str, Any]] = None
# Per-strategy parameters
if strategy == "aggressive":
self._accept_threshold = 0.65
self._concession_step = 0.10
self._walk_threshold = 0.45
elif strategy == "conciliatory":
self._accept_threshold = 0.45
self._concession_step = 0.25
self._walk_threshold = 0.30
else: # balanced
self._accept_threshold = 0.55
self._concession_step = 0.18
self._walk_threshold = 0.40
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def respond(
self,
vendor_action: Dict[str, Any],
history: List[Dict[str, Any]],
turn: int,
) -> Dict[str, Any]:
"""Return a NegotiationAction-like dict representing the client's move."""
if vendor_action is None:
vendor_action = {}
# If vendor walked away, mirror that (terminal handled by arena).
if vendor_action.get("action_type") == "walk_away":
return self._build_walkaway("vendor walked first")
# If vendor accepted: client's response is moot (arena already locks deal).
if vendor_action.get("action_type") == "accept_offer":
return {
"action_type": "accept_offer",
"agent_role": "client",
"reasoning": "vendor already accepted",
}
# Evaluate the most current proposed terms (vendor's, falling back
# to whatever's on the table from the offer history).
proposed = self._extract_proposed_terms(vendor_action, history)
client_util = self._evaluate_offer(proposed)
phase = self._phase(turn)
# Hard dealbreaker: walk if the proposed deal violates ours.
if proposed and self._violates_dealbreaker(proposed):
return self._build_walkaway("vendor proposal hits a dealbreaker")
if phase == "end":
# Last 5 turns: if acceptable, accept; else walk if too low.
if client_util >= self._accept_threshold and proposed:
return {
"action_type": "accept_offer",
"agent_role": "client",
"proposed_terms": proposed,
"reasoning": (
f"acceptable end-game utility ({client_util:.2f}) "
f"under {self.strategy} strategy"
),
}
if client_util < self._walk_threshold:
return self._build_walkaway(
f"end-game utility {client_util:.2f} below walk threshold"
)
# Otherwise make one more counter
return self._craft_counter_offer(proposed, turn, end_game=True)
if phase == "open":
# Demand near-ideal terms. If vendor opener is already great, accept.
if client_util >= 0.85 and proposed:
return {
"action_type": "accept_offer",
"agent_role": "client",
"proposed_terms": proposed,
"reasoning": "vendor opening already meets our ideal",
}
return self._craft_counter_offer(proposed, turn, end_game=False, opening=True)
# Mid-game: reciprocate concessions / counter
return self._craft_counter_offer(proposed, turn, end_game=False, opening=False)
# ------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------
def _phase(self, turn: int) -> str:
if turn <= 3:
return "open"
if turn >= self.max_turns - 4:
return "end"
return "mid"
def _extract_proposed_terms(
self, vendor_action: Dict[str, Any], history: List[Dict[str, Any]]
) -> Dict[str, Any]:
terms = dict(vendor_action.get("proposed_terms") or {})
# If vendor used concede/demand, fold the single-issue change
# into the running terms from history.
if vendor_action.get("action_type") in ("concede", "demand"):
running = self._latest_terms_from_history(history) or {}
running = dict(running)
issue = vendor_action.get("issue_name")
new_value = vendor_action.get("new_value")
if issue is not None and new_value is not None:
running[issue] = new_value
return running
if not terms:
return self._latest_terms_from_history(history) or {}
return terms
def _latest_terms_from_history(
self, history: List[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
for entry in reversed(history):
if entry.get("proposed_terms"):
return dict(entry["proposed_terms"])
return None
def _evaluate_offer(self, terms: Dict[str, Any]) -> float:
"""compute client's utility for the proposed terms."""
if not terms:
return 0.0
return compute_utility(terms, self.brief, self.fixture)
def _violates_dealbreaker(self, terms: Dict[str, Any]) -> bool:
from .utility import _is_dealbreaker_violated # local import to avoid cycles
return _is_dealbreaker_violated(terms, self.brief.get("dealbreakers"))
def _should_walk_away(self, turn: int, history: List[Dict[str, Any]]) -> bool:
latest = self._latest_terms_from_history(history)
if not latest:
return False
util = self._evaluate_offer(latest)
if self._violates_dealbreaker(latest):
return True
if turn >= self.max_turns - 1 and util < self._walk_threshold:
return True
return False
def _pick_concession_issue(
self, current_terms: Dict[str, Any]
) -> Optional[str]:
"""Pick the lowest-priority issue we haven't already conceded much on."""
priorities: Dict[str, float] = self.brief.get("private_priorities", {})
# Sort issues ascending by priority — concede on cheapest first.
ordered = sorted(priorities.items(), key=lambda kv: (kv[1], kv[0]))
for name, _ in ordered:
if self._concessions.get(name, 0) >= 3:
continue
if name in current_terms:
return name
return ordered[0][0] if ordered else None
def _craft_counter_offer(
self,
vendor_terms: Dict[str, Any],
turn: int,
end_game: bool,
opening: bool = False,
) -> Dict[str, Any]:
"""Produce a counter offer that nudges the deal toward our ideals.
The strength of the nudge depends on the strategy and phase.
"""
ideals: Dict[str, Any] = self.brief.get("ideal_outcomes", {})
walkaways: Dict[str, Any] = self.brief.get("walkaway_thresholds", {})
value_maps: Dict[str, Dict[str, float]] = self.brief.get("value_maps", {})
priorities: Dict[str, float] = self.brief.get("private_priorities", {})
# Start from whatever's currently proposed; if empty, start from our opening.
if vendor_terms:
running = dict(vendor_terms)
else:
running = {
issue["name"]: issue["client_opening"]
for issue in self.fixture["issues"]
}
self._last_counter = dict(running)
return {
"action_type": "counter_offer",
"agent_role": "client",
"proposed_terms": running,
"reasoning": "client opening counter-offer",
}
# Pick how aggressively we move toward the vendor's terms.
if opening:
# Stick near our opening; only cosmetic concessions.
move_fraction = 0.05
else:
move_fraction = self._concession_step * (1.4 if end_game else 1.0)
for issue in self.fixture["issues"]:
name = issue["name"]
if name not in running:
running[name] = issue["client_opening"]
current = running[name]
ideal = ideals.get(name)
walk = walkaways.get(name)
# On HIGH-priority issues, demand harder (push toward ideal).
# On LOW-priority issues, concede toward the vendor's value.
prio = priorities.get(name, 0.0)
if issue["type"] == "numeric":
try:
cur_v = float(current)
ideal_v = float(ideal)
walk_v = float(walk)
except (TypeError, ValueError):
continue
if prio >= 0.20:
# demand toward our ideal
target = ideal_v
else:
# concede toward vendor's last value (if any) but not past walkaway
target = walk_v
self._concessions[name] = self._concessions.get(name, 0) + 1
new_v = cur_v + (target - cur_v) * move_fraction
# Round numerics sensibly
if isinstance(issue["vendor_opening"], int) and isinstance(
issue["client_opening"], int
):
new_v = int(round(new_v))
else:
new_v = round(new_v, 2)
running[name] = new_v
elif issue["type"] == "categorical":
vmap = value_maps.get(name, {})
if not vmap:
continue
# If vendor proposal is already 'good enough' for us, accept it.
cur_score = vmap.get(str(current), 0.0)
if prio >= 0.20 and cur_score < 0.5:
# demand our ideal
running[name] = ideal
elif prio < 0.20 and cur_score < 0.4 and not opening:
# concede: pick the next-best value above current
sorted_vals = sorted(vmap.items(), key=lambda kv: kv[1])
# Find a value strictly worse for us than ideal but better than current
for opt, sc in sorted_vals:
if sc > cur_score and sc < 1.0:
running[name] = opt
self._concessions[name] = self._concessions.get(name, 0) + 1
break
# Final dealbreaker safety check: if our own counter would violate
# our dealbreakers, refuse to send it and walk instead.
if self._violates_dealbreaker(running):
return self._build_walkaway("our own counter would violate our dealbreaker")
self._last_counter = dict(running)
return {
"action_type": "counter_offer",
"agent_role": "client",
"proposed_terms": running,
"reasoning": (
f"{self.strategy} {'end-game' if end_game else ('opening' if opening else 'mid-game')} counter"
),
}
def _build_walkaway(self, reason: str) -> Dict[str, Any]:
return {
"action_type": "walk_away",
"agent_role": "client",
"reasoning": reason,
}