File size: 16,201 Bytes
89e2588
 
 
81ed7f7
89e2588
 
 
231a0fb
 
 
 
 
89e2588
 
 
 
 
 
 
231a0fb
 
89e2588
 
 
 
 
 
 
 
 
 
231a0fb
 
89e2588
 
 
 
 
 
 
 
231a0fb
89e2588
 
 
231a0fb
89e2588
 
 
 
 
 
 
 
 
 
231a0fb
89e2588
4a67ae9
231a0fb
 
 
89e2588
 
 
231a0fb
 
89e2588
231a0fb
 
 
89e2588
231a0fb
89e2588
231a0fb
 
89e2588
4a67ae9
231a0fb
 
4a67ae9
 
231a0fb
4a67ae9
231a0fb
 
 
 
 
89e2588
 
 
 
231a0fb
 
 
 
89e2588
 
231a0fb
 
89e2588
231a0fb
 
89e2588
 
 
231a0fb
89e2588
231a0fb
 
89e2588
231a0fb
 
89e2588
950537f
 
 
 
 
 
231a0fb
950537f
 
 
 
231a0fb
950537f
 
 
 
 
 
 
 
 
 
 
 
 
 
c5cd6a7
 
950537f
 
 
 
 
 
 
 
231a0fb
950537f
 
 
 
 
 
231a0fb
950537f
 
 
 
 
c5cd6a7
950537f
 
 
 
 
 
 
 
 
 
 
 
 
 
c5cd6a7
950537f
 
 
 
 
231a0fb
 
950537f
231a0fb
950537f
 
c5cd6a7
231a0fb
950537f
 
 
 
 
 
 
 
 
 
 
 
 
81ed7f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b234c35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5cd6a7
b234c35
 
 
 
 
 
 
 
 
 
 
 
c5cd6a7
b234c35
 
 
 
 
 
 
f98fb0d
 
 
 
 
 
 
b234c35
 
 
 
 
 
 
 
 
 
c5cd6a7
b234c35
 
 
 
 
 
 
 
 
 
 
 
c5cd6a7
b234c35
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
# squid_game.py

from functools import lru_cache
import math

def parse_tier_map(tier_str: str):
    """
    Example tier_str:
      \"1-4:1\n5-6:3\"
    means:
      if a player has 1..4 squids => each worth x1 => total = k*1
      if a player has 5..6 squids => each worth x3 => total = k*3
    """
    lines = tier_str.strip().splitlines()
    tier_map = []
    for line in lines:
        range_part, mult_part = line.split(":")
        per_squid_mult = float(mult_part.strip())

        if "-" in range_part:
            low_str, high_str = range_part.split("-")
            low, high = int(low_str), int(high_str)
        else:
            low = high = int(range_part.strip())

        tier_map.append((low, high, per_squid_mult))
    tier_map.sort(key=lambda x: x[0])
    return tier_map

def tierValue(k: int, tier_map) -> float:
    """
    For k squids, find bracket => return k * bracket_multiplier.
    If k <= 0 => 0.
    """
    if k <= 0:
        return 0.0
    for (low, high, mult) in tier_map:
        if low <= k <= high:
            return k * mult
    if tier_map and k > tier_map[-1][1]:
        return k * tier_map[-1][2]
    return 0.0  # fallback if no bracket matches

def is_terminal(distribution, remaining) -> bool:
    """
    Game ends if exactly one zero-squid player or no squids remain.
    """
    zero_count = sum(1 for x in distribution if x == 0)
    if zero_count == 1:
        return True
    if remaining == 0:
        return True
    return False

def compute_final_payout(distribution, tier_map):
    """
    distribution: e.g. (0,0,4)
    
    RULES:
      - If exactly 1 zero-squid => that one pays sum_of_winners_tier
      - If multiple zeros => each zero-squid pays sum_of_winners_tier individually
        => each winner gets (number_of_zero_squids * winner_tier_value).
    """
    n = len(distribution)
    zero_indices = [i for i, x in enumerate(distribution) if x == 0]
    m = len(zero_indices)  # number of zero-squid players
    winner_indices = [i for i, x in enumerate(distribution) if x > 0]

    # sum of each winner's bracketed total
    winner_values = [tierValue(distribution[w], tier_map) for w in winner_indices]
    sum_winner_values = sum(winner_values)

    payoffs = [0.0]*n
    if m == 0:
        # No zeros => no payment => everyone gets 0
        return payoffs
    else:
        # Each zero pays sum_winner_values
        for z in zero_indices:
            payoffs[z] = -sum_winner_values
        # Each winner gets their tierValue (if single loser)
        # or m * their tierValue (if multiple losers)
        for i, w in enumerate(winner_indices):
            payoffs[w] = winner_values[i] if m == 1 else m * winner_values[i]
        return payoffs

def format_state(distribution, remaining):
    """Format a game state for display"""
    return f"({','.join(map(str, distribution))}, {remaining})"

@lru_cache(None)
def get_expected_value(distribution, remaining, tier_map_tuple):
    """
    Memoized DP: returns an N-tuple of payoffs from state=(distribution, remaining).
    distribution: tuple of ints
    remaining: int (squids left)
    tier_map_tuple: bracket info (like ( (1,4,1.0), (5,6,3.0) ))
    """
    if is_terminal(distribution, remaining):
        final_pay = compute_final_payout(distribution, tier_map_tuple)
        return tuple(final_pay)
    
    n = len(distribution)
    accumulated = [0.0]*n
    for winner in range(n):
        new_dist = list(distribution)
        new_dist[winner] += 1
        sub_ev = get_expected_value(tuple(new_dist), remaining-1, tier_map_tuple)
        for i in range(n):
            accumulated[i] += sub_ev[i]
    # average
    for i in range(n):
        accumulated[i] /= n
    return tuple(accumulated)

def get_expected_value_forced_win(
    i, 
    distribution, 
    leftover, 
    tier_map_tuple
):
    """
    假设下一只乌贼 100% 给玩家 i。
    则先把 distribution[i] += 1, leftover -=1,
    然后对 (distribution', leftover') 做完全随机的 get_expected_value(...)。
    返回:一个长度 N 的 tuple,表示每个玩家在这种强制赢前提下的期望最终收益。
    """
    if leftover <= 0:
        # 没乌贼剩了,也可能是某些奇怪边界,直接算终局:
        return get_expected_value(distribution, leftover, tier_map_tuple)

    dist_forced = list(distribution)
    dist_forced[i] += 1
    new_dist = tuple(dist_forced)
    return get_expected_value(new_dist, leftover - 1, tier_map_tuple)


def get_expected_value_forced_lose(
    i, 
    distribution, 
    leftover, 
    tier_map_tuple,
    folded_players
):
    """
    假设下一只乌贼 100% 不会给玩家 i,
    即本轮发乌贼只在其余 (n-1) 人中随机选 winner,
    然后后续 (leftover-1) 轮恢复正常 n 人随机。
    做法:遍历所有 winner != i (prob=1/(n-1)),发给那个 winner,
    然后 leftover-1 的状态再用 get_expected_value 完全随机。
    返回:一个长度 N 的 tuple (每个玩家最终EV)
    """
    n = len(distribution)
    if leftover <= 0:
        return get_expected_value(distribution, leftover, tier_map_tuple)

    # 如果 n=1,那就无可比了……(此处不太可能)
    # 一般 n>=2, leftover>=1
    
    # 假设我们这里显式地做一次 "下一只的发放" 的平均
    # winner只能在 [0..n-1] - {i} 之中。
    # Probability = 1/(n-1)

    accumulated = [0.0]*n
    valid_winners = [w for w in range(n) if (w != i and not folded_players[w])]

    for w in valid_winners:
        dist_next = list(distribution)
        dist_next[w] += 1
        sub_ev = get_expected_value(tuple(dist_next), leftover-1, tier_map_tuple)
        for p in range(n):
            accumulated[p] += sub_ev[p]

    # 做平均
    for p in range(n):
        accumulated[p] /= len(valid_winners)  # == (n-1)

    return tuple(accumulated)

def compute_ev_win_lose_two_extremes(distribution, leftover, tier_map_tuple,folded_players):
    """
    返回一个数据结构,记录每个玩家 i 在:
      - forced_win    时的期望收益
      - forced_lose   时的期望收益
      - difference    = forced_win - forced_lose
    """
    n = len(distribution)
    results = []

    for i in range(n):
        forced_win_vec = get_expected_value_forced_win(i, distribution, leftover, tier_map_tuple)
        forced_lose_vec = get_expected_value_forced_lose(i, distribution, leftover, tier_map_tuple,folded_players)

        # 我们可能只关心玩家 i 本人的比较, 也可以把全部人都算,
        # 这里演示只关心 i
        forced_win_i = forced_win_vec[i]
        forced_lose_i = forced_lose_vec[i]
        diff_i = forced_win_i - forced_lose_i

        results.append({
            'player': i,
            'forcedWinEV': forced_win_i,
            'forcedLoseEV': forced_lose_i,
            'difference': diff_i
        })
    return results

# Infinite Squid Game Implementation
def infinite_squid_game_expected_counts_partial(distribution):
    """
    For the "infinite squid + cumulative score, stop when only 1 player has 0" game,
    given the current distribution (list/tuple, representing each player's current count),
    returns a list representing the expected number of squids each player will have at the end of the game.
    
    Rule summary:
      - If the current number of players with 0 squids = z:
        * z=1 => Game stops immediately, no more squids distributed => final value = current holdings
        * z=0 => "Exactly 1 player with 0" can never occur, infinite loop => return inf
        * z>=2 => Expected to distribute n*(H_z -1) more times (H_z=1+1/2+...+1/z),
                  Expected number of times each player is selected = (H_z-1),
                  Therefore final = distribution[i] + (H_z-1).
    """
    n = len(distribution)
    zero_count = sum(1 for x in distribution if x == 0)
    
    # Already satisfied or exceeded "end/cannot end" condition
    if zero_count == 1:
        # Game ends immediately => maintain current state
        return list(distribution)
    if zero_count == 0:
        # "Exactly 1 player with 0" can never occur => game never ends => infinite score
        return [math.inf]*n
    
    # zero_count >= 2 => continue distributing squids until z=1
    H_z = sum(1.0 / k for k in range(1, zero_count+1))  # Harmonic number H_z
    increment = (H_z - 1.0)
    return [x + increment for x in distribution]

# Finite Squid Game Implementation
def get_bitmask(n):
    """
    Returns a list [0,1,2,...,(1<<n)-1], purely for iteration reference
    """
    return list(range(1 << n))

def count_ones(x):
    """Count how many 1s are in the binary representation of x"""
    return bin(x).count("1")

@lru_cache(None)
def dp_ev(n, bitmask_u, leftover):
    """
    Returns a length-n tuple (prob0, prob1, ..., prob_{n-1}),
    representing the "final probability" of each player getting a squid in state (U=bitmask_u, leftover).
    
    - n: total number of players
    - bitmask_u: which players haven't received a squid yet (binary mask)
    - leftover: remaining squids that can be distributed
    """
    # 1) Terminal condition
    num_u = count_ones(bitmask_u)  # Number of players who haven't received a squid yet
    if num_u <= 1 or leftover == 0:
        # Game ends immediately
        # For players who already have a squid, their probability=1
        # For those who don't, probability=0
        ret = []
        for i in range(n):
            # If i is in bitmask_u => probability=0, otherwise =1
            if (bitmask_u & (1 << i)) != 0:
                ret.append(0.0)
            else:
                ret.append(1.0)
        return tuple(ret)
    
    # 2) Not terminated => randomly select 1 person from U to give a squid
    ret_accum = [0.0]*n
    # Randomly select w from U with equal probability
    for w in range(n):
        bit_w = (1 << w)
        if (bitmask_u & bit_w) != 0:
            # w is in U
            # Next state: after w gets a squid, remove w from bitmask
            next_u = bitmask_u ^ bit_w  # Set to 0
            sub_ev = dp_ev(n, next_u, leftover - 1)
            
            for i in range(n):
                ret_accum[i] += sub_ev[i] / num_u
    
    # 3) Return accumulated result
    return tuple(ret_accum)

def finite_squid_game_probabilities(n, r):
    """
    Returns a length-n probability vector: prob[i] = probability of player i getting a squid
    Based on dp_ev().
    
    - n: number of players
    - r: number of squids to distribute
    """
    # Initial U = (1<<n)-1, meaning no one has a squid yet
    bitmask_u = (1 << n) - 1
    return dp_ev(n, bitmask_u, r)


def _compute_final_payout_finite_tuple(distribution):
    """
    根据玩家鱿鱼分布元组,为有限游戏计算终局收益。
    规则:输家(0个鱿鱼)支付所有赢家(1个鱿鱼)的鱿鱼价值总和。
    """
    n = len(distribution)
    winners = [i for i, count in enumerate(distribution) if count == 1]
    losers = [i for i, count in enumerate(distribution) if count == 0]
    
    num_winners = len(winners)
    num_losers = len(losers)
    
    payoffs = [0.0] * n
    
    if num_winners == 0 or num_losers == 0:
        return tuple(payoffs)
        
    sum_winner_values = float(num_winners) # 每个赢家价值为1
    
    for i in losers:
        payoffs[i] = -sum_winner_values
    
    for i in winners:
        payoffs[i] = 1.0 * num_losers
        
    return tuple(payoffs)

def _is_terminal_finite(distribution, remaining):
    """
    判断有限游戏是否结束。
    结束条件:1. 没鱿鱼了。 2. 最多只剩一个玩家还没有鱿鱼。
    """
    if remaining == 0:
        return True
    
    players_without_squid = sum(1 for count in distribution if count == 0)
    if players_without_squid <= 1:
        return True
        
    return False

@lru_cache(None)
def get_expected_value_finite(distribution, remaining):
    """
    (模仿 get_expected_value)
    使用元组作为状态,计算有限游戏的期望收益 (EV) 向量。
    """
    if _is_terminal_finite(distribution, remaining):
        return _compute_final_payout_finite_tuple(distribution)
    
    n = len(distribution)
    accumulated_ev = [0.0] * n
    
    # 找出所有还没有鱿鱼的合格候选人
    eligible_players = [i for i, count in enumerate(distribution) if count == 0]
    num_eligible = len(eligible_players)
    
    if num_eligible == 0: # 理论上不应该发生,除非游戏已结束
        return _compute_final_payout_finite_tuple(distribution)

    # 遍历每一个合格的候选人,模拟他获得下一个鱿鱼
    for winner_idx in eligible_players:
        new_dist_list = list(distribution)
        new_dist_list[winner_idx] += 1
        
        # 递归调用
        sub_ev = get_expected_value_finite(tuple(new_dist_list), remaining - 1)
        
        for i in range(n):
            accumulated_ev[i] += sub_ev[i]
            
    # 对所有可能的结果求平均
    for i in range(n):
        accumulated_ev[i] /= num_eligible
        
    return tuple(accumulated_ev)

def get_expected_value_finite_forced_win(i, distribution, remaining):
    """
    (模仿 get_expected_value_forced_win)
    假设下一个鱿鱼 100% 给玩家 i (前提是i合格)。
    """
    if remaining <= 0 or distribution[i] == 1:
        # 无法强制 (没鱿鱼了,或者i已经有鱿鱼),返回常规EV
        return get_expected_value_finite(distribution, remaining)
        
    # 强制给 i
    new_dist_list = list(distribution)
    new_dist_list[i] += 1
    
    # 在新状态上进行完全随机的EV计算
    return get_expected_value_finite(tuple(new_dist_list), remaining - 1)

def get_expected_value_finite_forced_lose(i, distribution, remaining,folded_players):
    """
    (模仿 get_expected_value_forced_lose)
    假设下一个鱿鱼 100% 不会给玩家 i。
    """
    if remaining <= 0 or distribution[i] == 1:
         # i 已经有鱿鱼,下一个本来就不会给他,等同于常规EV
         return get_expected_value_finite(distribution, remaining)
    
    n = len(distribution)
    accumulated_ev = [0.0] * n
    
    # 找出除了i之外所有合格的候选人
    eligible_players = [p for p, count in enumerate(distribution) if (p != i and not folded_players[p])]
    num_eligible = len(eligible_players)
    
    if num_eligible == 0: # 如果除了i没有别人可选了,游戏结束
        return _compute_final_payout_finite_tuple(distribution)
        
    # 遍历每一个合格的候选人,模拟他获得下一个鱿鱼
    for winner_idx in eligible_players:
        if distribution[winner_idx] == 0:
            new_dist_list = list(distribution)
            new_dist_list[winner_idx] += 1
            sub_ev = get_expected_value_finite(tuple(new_dist_list), remaining - 1)
        else:
            new_dist_list = list(distribution)
            sub_ev = get_expected_value_finite(tuple(new_dist_list), remaining)
        
        for p in range(n):
            accumulated_ev[p] += sub_ev[p]
            
    # 对所有可能的结果求平均
    for p in range(n):
        accumulated_ev[p] /= num_eligible
        
    return tuple(accumulated_ev)

def compute_ev_win_lose_finite_tuple(distribution, remaining,folded_players):
    """
    包装函数,为每个合格玩家计算 forced_win 和 forced_lose 的EV。
    """
    n = len(distribution)
    results = []
    
    base_ev = get_expected_value_finite(distribution, remaining)
    
    eligible_players = [i for i, count in enumerate(distribution) if count == 0]

    for i in eligible_players:
        win_ev_vec = get_expected_value_finite_forced_win(i, distribution, remaining)
        lose_ev_vec = get_expected_value_finite_forced_lose(i, distribution, remaining,folded_players)
        
        forced_win_i = win_ev_vec[i]
        forced_lose_i = lose_ev_vec[i]
        diff_i = forced_win_i - forced_lose_i
        
        results.append({
            'player': i,
            'forcedWinEV': forced_win_i,
            'forcedLoseEV': forced_lose_i,
            'difference': diff_i
        })
        
    return base_ev, results