File size: 12,527 Bytes
21a8fc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
"""Free, fast Client opponent used during training. NO API calls.

The ScriptedClient implements a deterministic-given-seed negotiation policy
in three phases (open / mid / end) with three strategy variants
(aggressive / balanced / conciliatory).
"""
from __future__ import annotations

import random
from typing import Any, Dict, List, Optional

from .utility import compute_utility


class ScriptedClient:
    STRATEGIES = ("aggressive", "balanced", "conciliatory")

    def __init__(
        self,
        brief: Dict[str, Any],
        fixture: Dict[str, Any],
        strategy: str = "balanced",
        seed: Optional[int] = None,
    ):
        if strategy not in self.STRATEGIES:
            strategy = "balanced"
        self.brief = brief
        self.fixture = fixture
        self.strategy = strategy
        self.rng = random.Random(seed if seed is not None else 0)
        self.max_turns = int(fixture.get("max_turns", 30))

        # Track what we've conceded on so we don't keep folding on the same issue.
        self._concessions: Dict[str, int] = {}
        # Track our most recent counter so we can be consistent.
        self._last_counter: Optional[Dict[str, Any]] = None

        # Per-strategy parameters
        if strategy == "aggressive":
            self._accept_threshold = 0.65
            self._concession_step = 0.10
            self._walk_threshold = 0.45
        elif strategy == "conciliatory":
            self._accept_threshold = 0.45
            self._concession_step = 0.25
            self._walk_threshold = 0.30
        else:  # balanced
            self._accept_threshold = 0.55
            self._concession_step = 0.18
            self._walk_threshold = 0.40

    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------
    def respond(
        self,
        vendor_action: Dict[str, Any],
        history: List[Dict[str, Any]],
        turn: int,
    ) -> Dict[str, Any]:
        """Return a NegotiationAction-like dict representing the client's move."""
        if vendor_action is None:
            vendor_action = {}

        # If vendor walked away, mirror that (terminal handled by arena).
        if vendor_action.get("action_type") == "walk_away":
            return self._build_walkaway("vendor walked first")

        # If vendor accepted: client's response is moot (arena already locks deal).
        if vendor_action.get("action_type") == "accept_offer":
            return {
                "action_type": "accept_offer",
                "agent_role": "client",
                "reasoning": "vendor already accepted",
            }

        # Evaluate the most current proposed terms (vendor's, falling back
        # to whatever's on the table from the offer history).
        proposed = self._extract_proposed_terms(vendor_action, history)
        client_util = self._evaluate_offer(proposed)

        phase = self._phase(turn)

        # Hard dealbreaker: walk if the proposed deal violates ours.
        if proposed and self._violates_dealbreaker(proposed):
            return self._build_walkaway("vendor proposal hits a dealbreaker")

        if phase == "end":
            # Last 5 turns: if acceptable, accept; else walk if too low.
            if client_util >= self._accept_threshold and proposed:
                return {
                    "action_type": "accept_offer",
                    "agent_role": "client",
                    "proposed_terms": proposed,
                    "reasoning": (
                        f"acceptable end-game utility ({client_util:.2f}) "
                        f"under {self.strategy} strategy"
                    ),
                }
            if client_util < self._walk_threshold:
                return self._build_walkaway(
                    f"end-game utility {client_util:.2f} below walk threshold"
                )
            # Otherwise make one more counter
            return self._craft_counter_offer(proposed, turn, end_game=True)

        if phase == "open":
            # Demand near-ideal terms. If vendor opener is already great, accept.
            if client_util >= 0.85 and proposed:
                return {
                    "action_type": "accept_offer",
                    "agent_role": "client",
                    "proposed_terms": proposed,
                    "reasoning": "vendor opening already meets our ideal",
                }
            return self._craft_counter_offer(proposed, turn, end_game=False, opening=True)

        # Mid-game: reciprocate concessions / counter
        return self._craft_counter_offer(proposed, turn, end_game=False, opening=False)

    # ------------------------------------------------------------------
    # Helpers
    # ------------------------------------------------------------------
    def _phase(self, turn: int) -> str:
        if turn <= 3:
            return "open"
        if turn >= self.max_turns - 4:
            return "end"
        return "mid"

    def _extract_proposed_terms(
        self, vendor_action: Dict[str, Any], history: List[Dict[str, Any]]
    ) -> Dict[str, Any]:
        terms = dict(vendor_action.get("proposed_terms") or {})
        # If vendor used concede/demand, fold the single-issue change
        # into the running terms from history.
        if vendor_action.get("action_type") in ("concede", "demand"):
            running = self._latest_terms_from_history(history) or {}
            running = dict(running)
            issue = vendor_action.get("issue_name")
            new_value = vendor_action.get("new_value")
            if issue is not None and new_value is not None:
                running[issue] = new_value
            return running
        if not terms:
            return self._latest_terms_from_history(history) or {}
        return terms

    def _latest_terms_from_history(
        self, history: List[Dict[str, Any]]
    ) -> Optional[Dict[str, Any]]:
        for entry in reversed(history):
            if entry.get("proposed_terms"):
                return dict(entry["proposed_terms"])
        return None

    def _evaluate_offer(self, terms: Dict[str, Any]) -> float:
        """compute client's utility for the proposed terms."""
        if not terms:
            return 0.0
        return compute_utility(terms, self.brief, self.fixture)

    def _violates_dealbreaker(self, terms: Dict[str, Any]) -> bool:
        from .utility import _is_dealbreaker_violated  # local import to avoid cycles

        return _is_dealbreaker_violated(terms, self.brief.get("dealbreakers"))

    def _should_walk_away(self, turn: int, history: List[Dict[str, Any]]) -> bool:
        latest = self._latest_terms_from_history(history)
        if not latest:
            return False
        util = self._evaluate_offer(latest)
        if self._violates_dealbreaker(latest):
            return True
        if turn >= self.max_turns - 1 and util < self._walk_threshold:
            return True
        return False

    def _pick_concession_issue(
        self, current_terms: Dict[str, Any]
    ) -> Optional[str]:
        """Pick the lowest-priority issue we haven't already conceded much on."""
        priorities: Dict[str, float] = self.brief.get("private_priorities", {})
        # Sort issues ascending by priority — concede on cheapest first.
        ordered = sorted(priorities.items(), key=lambda kv: (kv[1], kv[0]))
        for name, _ in ordered:
            if self._concessions.get(name, 0) >= 3:
                continue
            if name in current_terms:
                return name
        return ordered[0][0] if ordered else None

    def _craft_counter_offer(
        self,
        vendor_terms: Dict[str, Any],
        turn: int,
        end_game: bool,
        opening: bool = False,
    ) -> Dict[str, Any]:
        """Produce a counter offer that nudges the deal toward our ideals.

        The strength of the nudge depends on the strategy and phase.
        """
        ideals: Dict[str, Any] = self.brief.get("ideal_outcomes", {})
        walkaways: Dict[str, Any] = self.brief.get("walkaway_thresholds", {})
        value_maps: Dict[str, Dict[str, float]] = self.brief.get("value_maps", {})
        priorities: Dict[str, float] = self.brief.get("private_priorities", {})

        # Start from whatever's currently proposed; if empty, start from our opening.
        if vendor_terms:
            running = dict(vendor_terms)
        else:
            running = {
                issue["name"]: issue["client_opening"]
                for issue in self.fixture["issues"]
            }
            self._last_counter = dict(running)
            return {
                "action_type": "counter_offer",
                "agent_role": "client",
                "proposed_terms": running,
                "reasoning": "client opening counter-offer",
            }

        # Pick how aggressively we move toward the vendor's terms.
        if opening:
            # Stick near our opening; only cosmetic concessions.
            move_fraction = 0.05
        else:
            move_fraction = self._concession_step * (1.4 if end_game else 1.0)

        for issue in self.fixture["issues"]:
            name = issue["name"]
            if name not in running:
                running[name] = issue["client_opening"]

            current = running[name]
            ideal = ideals.get(name)
            walk = walkaways.get(name)

            # On HIGH-priority issues, demand harder (push toward ideal).
            # On LOW-priority issues, concede toward the vendor's value.
            prio = priorities.get(name, 0.0)

            if issue["type"] == "numeric":
                try:
                    cur_v = float(current)
                    ideal_v = float(ideal)
                    walk_v = float(walk)
                except (TypeError, ValueError):
                    continue

                if prio >= 0.20:
                    # demand toward our ideal
                    target = ideal_v
                else:
                    # concede toward vendor's last value (if any) but not past walkaway
                    target = walk_v
                    self._concessions[name] = self._concessions.get(name, 0) + 1

                new_v = cur_v + (target - cur_v) * move_fraction
                # Round numerics sensibly
                if isinstance(issue["vendor_opening"], int) and isinstance(
                    issue["client_opening"], int
                ):
                    new_v = int(round(new_v))
                else:
                    new_v = round(new_v, 2)
                running[name] = new_v

            elif issue["type"] == "categorical":
                vmap = value_maps.get(name, {})
                if not vmap:
                    continue
                # If vendor proposal is already 'good enough' for us, accept it.
                cur_score = vmap.get(str(current), 0.0)
                if prio >= 0.20 and cur_score < 0.5:
                    # demand our ideal
                    running[name] = ideal
                elif prio < 0.20 and cur_score < 0.4 and not opening:
                    # concede: pick the next-best value above current
                    sorted_vals = sorted(vmap.items(), key=lambda kv: kv[1])
                    # Find a value strictly worse for us than ideal but better than current
                    for opt, sc in sorted_vals:
                        if sc > cur_score and sc < 1.0:
                            running[name] = opt
                            self._concessions[name] = self._concessions.get(name, 0) + 1
                            break

        # Final dealbreaker safety check: if our own counter would violate
        # our dealbreakers, refuse to send it and walk instead.
        if self._violates_dealbreaker(running):
            return self._build_walkaway("our own counter would violate our dealbreaker")

        self._last_counter = dict(running)
        return {
            "action_type": "counter_offer",
            "agent_role": "client",
            "proposed_terms": running,
            "reasoning": (
                f"{self.strategy} {'end-game' if end_game else ('opening' if opening else 'mid-game')} counter"
            ),
        }

    def _build_walkaway(self, reason: str) -> Dict[str, Any]:
        return {
            "action_type": "walk_away",
            "agent_role": "client",
            "reasoning": reason,
        }