Spaces:

ashutosh111
/

negotiation-arena

Sleeping

negotiation-arena / server /scripted_client.py

Ashutosh Kumar

Negotiation Arena V2: full OpenEnv environment

21a8fc4 about 1 month ago

12.5 kB

	"""Free, fast Client opponent used during training. NO API calls.

	The ScriptedClient implements a deterministic-given-seed negotiation policy
	in three phases (open / mid / end) with three strategy variants
	(aggressive / balanced / conciliatory).
	"""
	from __future__ import annotations

	import random
	from typing import Any, Dict, List, Optional

	from .utility import compute_utility


	class ScriptedClient:
	STRATEGIES = ("aggressive", "balanced", "conciliatory")

	def __init__(
	self,
	brief: Dict[str, Any],
	fixture: Dict[str, Any],
	strategy: str = "balanced",
	seed: Optional[int] = None,
	):
	if strategy not in self.STRATEGIES:
	strategy = "balanced"
	self.brief = brief
	self.fixture = fixture
	self.strategy = strategy
	self.rng = random.Random(seed if seed is not None else 0)
	self.max_turns = int(fixture.get("max_turns", 30))

	# Track what we've conceded on so we don't keep folding on the same issue.
	self._concessions: Dict[str, int] = {}
	# Track our most recent counter so we can be consistent.
	self._last_counter: Optional[Dict[str, Any]] = None

	# Per-strategy parameters
	if strategy == "aggressive":
	self._accept_threshold = 0.65
	self._concession_step = 0.10
	self._walk_threshold = 0.45
	elif strategy == "conciliatory":
	self._accept_threshold = 0.45
	self._concession_step = 0.25
	self._walk_threshold = 0.30
	else: # balanced
	self._accept_threshold = 0.55
	self._concession_step = 0.18
	self._walk_threshold = 0.40

	# ------------------------------------------------------------------
	# Public API
	# ------------------------------------------------------------------
	def respond(
	self,
	vendor_action: Dict[str, Any],
	history: List[Dict[str, Any]],
	turn: int,
	) -> Dict[str, Any]:
	"""Return a NegotiationAction-like dict representing the client's move."""
	if vendor_action is None:
	vendor_action = {}

	# If vendor walked away, mirror that (terminal handled by arena).
	if vendor_action.get("action_type") == "walk_away":
	return self._build_walkaway("vendor walked first")

	# If vendor accepted: client's response is moot (arena already locks deal).
	if vendor_action.get("action_type") == "accept_offer":
	return {
	"action_type": "accept_offer",
	"agent_role": "client",
	"reasoning": "vendor already accepted",
	}

	# Evaluate the most current proposed terms (vendor's, falling back
	# to whatever's on the table from the offer history).
	proposed = self._extract_proposed_terms(vendor_action, history)
	client_util = self._evaluate_offer(proposed)

	phase = self._phase(turn)

	# Hard dealbreaker: walk if the proposed deal violates ours.
	if proposed and self._violates_dealbreaker(proposed):
	return self._build_walkaway("vendor proposal hits a dealbreaker")

	if phase == "end":
	# Last 5 turns: if acceptable, accept; else walk if too low.
	if client_util >= self._accept_threshold and proposed:
	return {
	"action_type": "accept_offer",
	"agent_role": "client",
	"proposed_terms": proposed,
	"reasoning": (
	f"acceptable end-game utility ({client_util:.2f}) "
	f"under {self.strategy} strategy"
	),
	}
	if client_util < self._walk_threshold:
	return self._build_walkaway(
	f"end-game utility {client_util:.2f} below walk threshold"
	)
	# Otherwise make one more counter
	return self._craft_counter_offer(proposed, turn, end_game=True)

	if phase == "open":
	# Demand near-ideal terms. If vendor opener is already great, accept.
	if client_util >= 0.85 and proposed:
	return {
	"action_type": "accept_offer",
	"agent_role": "client",
	"proposed_terms": proposed,
	"reasoning": "vendor opening already meets our ideal",
	}
	return self._craft_counter_offer(proposed, turn, end_game=False, opening=True)

	# Mid-game: reciprocate concessions / counter
	return self._craft_counter_offer(proposed, turn, end_game=False, opening=False)

	# ------------------------------------------------------------------
	# Helpers
	# ------------------------------------------------------------------
	def _phase(self, turn: int) -> str:
	if turn <= 3:
	return "open"
	if turn >= self.max_turns - 4:
	return "end"
	return "mid"

	def _extract_proposed_terms(
	self, vendor_action: Dict[str, Any], history: List[Dict[str, Any]]
	) -> Dict[str, Any]:
	terms = dict(vendor_action.get("proposed_terms") or {})
	# If vendor used concede/demand, fold the single-issue change
	# into the running terms from history.
	if vendor_action.get("action_type") in ("concede", "demand"):
	running = self._latest_terms_from_history(history) or {}
	running = dict(running)
	issue = vendor_action.get("issue_name")
	new_value = vendor_action.get("new_value")
	if issue is not None and new_value is not None:
	running[issue] = new_value
	return running
	if not terms:
	return self._latest_terms_from_history(history) or {}
	return terms

	def _latest_terms_from_history(
	self, history: List[Dict[str, Any]]
	) -> Optional[Dict[str, Any]]:
	for entry in reversed(history):
	if entry.get("proposed_terms"):
	return dict(entry["proposed_terms"])
	return None

	def _evaluate_offer(self, terms: Dict[str, Any]) -> float:
	"""compute client's utility for the proposed terms."""
	if not terms:
	return 0.0
	return compute_utility(terms, self.brief, self.fixture)

	def _violates_dealbreaker(self, terms: Dict[str, Any]) -> bool:
	from .utility import _is_dealbreaker_violated # local import to avoid cycles

	return _is_dealbreaker_violated(terms, self.brief.get("dealbreakers"))

	def _should_walk_away(self, turn: int, history: List[Dict[str, Any]]) -> bool:
	latest = self._latest_terms_from_history(history)
	if not latest:
	return False
	util = self._evaluate_offer(latest)
	if self._violates_dealbreaker(latest):
	return True
	if turn >= self.max_turns - 1 and util < self._walk_threshold:
	return True
	return False

	def _pick_concession_issue(
	self, current_terms: Dict[str, Any]
	) -> Optional[str]:
	"""Pick the lowest-priority issue we haven't already conceded much on."""
	priorities: Dict[str, float] = self.brief.get("private_priorities", {})
	# Sort issues ascending by priority — concede on cheapest first.
	ordered = sorted(priorities.items(), key=lambda kv: (kv[1], kv[0]))
	for name, _ in ordered:
	if self._concessions.get(name, 0) >= 3:
	continue
	if name in current_terms:
	return name
	return ordered[0][0] if ordered else None

	def _craft_counter_offer(
	self,
	vendor_terms: Dict[str, Any],
	turn: int,
	end_game: bool,
	opening: bool = False,
	) -> Dict[str, Any]:
	"""Produce a counter offer that nudges the deal toward our ideals.

	The strength of the nudge depends on the strategy and phase.
	"""
	ideals: Dict[str, Any] = self.brief.get("ideal_outcomes", {})
	walkaways: Dict[str, Any] = self.brief.get("walkaway_thresholds", {})
	value_maps: Dict[str, Dict[str, float]] = self.brief.get("value_maps", {})
	priorities: Dict[str, float] = self.brief.get("private_priorities", {})

	# Start from whatever's currently proposed; if empty, start from our opening.
	if vendor_terms:
	running = dict(vendor_terms)
	else:
	running = {
	issue["name"]: issue["client_opening"]
	for issue in self.fixture["issues"]
	}
	self._last_counter = dict(running)
	return {
	"action_type": "counter_offer",
	"agent_role": "client",
	"proposed_terms": running,
	"reasoning": "client opening counter-offer",
	}

	# Pick how aggressively we move toward the vendor's terms.
	if opening:
	# Stick near our opening; only cosmetic concessions.
	move_fraction = 0.05
	else:
	move_fraction = self._concession_step * (1.4 if end_game else 1.0)

	for issue in self.fixture["issues"]:
	name = issue["name"]
	if name not in running:
	running[name] = issue["client_opening"]

	current = running[name]
	ideal = ideals.get(name)
	walk = walkaways.get(name)

	# On HIGH-priority issues, demand harder (push toward ideal).
	# On LOW-priority issues, concede toward the vendor's value.
	prio = priorities.get(name, 0.0)

	if issue["type"] == "numeric":
	try:
	cur_v = float(current)
	ideal_v = float(ideal)
	walk_v = float(walk)
	except (TypeError, ValueError):
	continue

	if prio >= 0.20:
	# demand toward our ideal
	target = ideal_v
	else:
	# concede toward vendor's last value (if any) but not past walkaway
	target = walk_v
	self._concessions[name] = self._concessions.get(name, 0) + 1

	new_v = cur_v + (target - cur_v) * move_fraction
	# Round numerics sensibly
	if isinstance(issue["vendor_opening"], int) and isinstance(
	issue["client_opening"], int
	):
	new_v = int(round(new_v))
	else:
	new_v = round(new_v, 2)
	running[name] = new_v

	elif issue["type"] == "categorical":
	vmap = value_maps.get(name, {})
	if not vmap:
	continue
	# If vendor proposal is already 'good enough' for us, accept it.
	cur_score = vmap.get(str(current), 0.0)
	if prio >= 0.20 and cur_score < 0.5:
	# demand our ideal
	running[name] = ideal
	elif prio < 0.20 and cur_score < 0.4 and not opening:
	# concede: pick the next-best value above current
	sorted_vals = sorted(vmap.items(), key=lambda kv: kv[1])
	# Find a value strictly worse for us than ideal but better than current
	for opt, sc in sorted_vals:
	if sc > cur_score and sc < 1.0:
	running[name] = opt
	self._concessions[name] = self._concessions.get(name, 0) + 1
	break

	# Final dealbreaker safety check: if our own counter would violate
	# our dealbreakers, refuse to send it and walk instead.
	if self._violates_dealbreaker(running):
	return self._build_walkaway("our own counter would violate our dealbreaker")

	self._last_counter = dict(running)
	return {
	"action_type": "counter_offer",
	"agent_role": "client",
	"proposed_terms": running,
	"reasoning": (
	f"{self.strategy} {'end-game' if end_game else ('opening' if opening else 'mid-game')} counter"
	),
	}

	def _build_walkaway(self, reason: str) -> Dict[str, Any]:
	return {
	"action_type": "walk_away",
	"agent_role": "client",
	"reasoning": reason,
	}