|
|
|
|
|
|
|
|
from functools import lru_cache |
|
|
import math |
|
|
|
|
|
def parse_tier_map(tier_str: str): |
|
|
""" |
|
|
Example tier_str: |
|
|
\"1-4:1\n5-6:3\" |
|
|
means: |
|
|
if a player has 1..4 squids => each worth x1 => total = k*1 |
|
|
if a player has 5..6 squids => each worth x3 => total = k*3 |
|
|
""" |
|
|
lines = tier_str.strip().splitlines() |
|
|
tier_map = [] |
|
|
for line in lines: |
|
|
range_part, mult_part = line.split(":") |
|
|
per_squid_mult = float(mult_part.strip()) |
|
|
|
|
|
if "-" in range_part: |
|
|
low_str, high_str = range_part.split("-") |
|
|
low, high = int(low_str), int(high_str) |
|
|
else: |
|
|
low = high = int(range_part.strip()) |
|
|
|
|
|
tier_map.append((low, high, per_squid_mult)) |
|
|
tier_map.sort(key=lambda x: x[0]) |
|
|
return tier_map |
|
|
|
|
|
def tierValue(k: int, tier_map) -> float: |
|
|
""" |
|
|
For k squids, find bracket => return k * bracket_multiplier. |
|
|
If k <= 0 => 0. |
|
|
""" |
|
|
if k <= 0: |
|
|
return 0.0 |
|
|
for (low, high, mult) in tier_map: |
|
|
if low <= k <= high: |
|
|
return k * mult |
|
|
if tier_map and k > tier_map[-1][1]: |
|
|
return k * tier_map[-1][2] |
|
|
return 0.0 |
|
|
|
|
|
def is_terminal(distribution, remaining) -> bool: |
|
|
""" |
|
|
Game ends if exactly one zero-squid player or no squids remain. |
|
|
""" |
|
|
zero_count = sum(1 for x in distribution if x == 0) |
|
|
if zero_count == 1: |
|
|
return True |
|
|
if remaining == 0: |
|
|
return True |
|
|
return False |
|
|
|
|
|
def compute_final_payout(distribution, tier_map): |
|
|
""" |
|
|
distribution: e.g. (0,0,4) |
|
|
|
|
|
RULES: |
|
|
- If exactly 1 zero-squid => that one pays sum_of_winners_tier |
|
|
- If multiple zeros => each zero-squid pays sum_of_winners_tier individually |
|
|
=> each winner gets (number_of_zero_squids * winner_tier_value). |
|
|
""" |
|
|
n = len(distribution) |
|
|
zero_indices = [i for i, x in enumerate(distribution) if x == 0] |
|
|
m = len(zero_indices) |
|
|
winner_indices = [i for i, x in enumerate(distribution) if x > 0] |
|
|
|
|
|
|
|
|
winner_values = [tierValue(distribution[w], tier_map) for w in winner_indices] |
|
|
sum_winner_values = sum(winner_values) |
|
|
|
|
|
payoffs = [0.0]*n |
|
|
if m == 0: |
|
|
|
|
|
return payoffs |
|
|
else: |
|
|
|
|
|
for z in zero_indices: |
|
|
payoffs[z] = -sum_winner_values |
|
|
|
|
|
|
|
|
for i, w in enumerate(winner_indices): |
|
|
payoffs[w] = winner_values[i] if m == 1 else m * winner_values[i] |
|
|
return payoffs |
|
|
|
|
|
def format_state(distribution, remaining): |
|
|
"""Format a game state for display""" |
|
|
return f"({','.join(map(str, distribution))}, {remaining})" |
|
|
|
|
|
@lru_cache(None) |
|
|
def get_expected_value(distribution, remaining, tier_map_tuple): |
|
|
""" |
|
|
Memoized DP: returns an N-tuple of payoffs from state=(distribution, remaining). |
|
|
distribution: tuple of ints |
|
|
remaining: int (squids left) |
|
|
tier_map_tuple: bracket info (like ( (1,4,1.0), (5,6,3.0) )) |
|
|
""" |
|
|
if is_terminal(distribution, remaining): |
|
|
final_pay = compute_final_payout(distribution, tier_map_tuple) |
|
|
return tuple(final_pay) |
|
|
|
|
|
n = len(distribution) |
|
|
accumulated = [0.0]*n |
|
|
for winner in range(n): |
|
|
new_dist = list(distribution) |
|
|
new_dist[winner] += 1 |
|
|
sub_ev = get_expected_value(tuple(new_dist), remaining-1, tier_map_tuple) |
|
|
for i in range(n): |
|
|
accumulated[i] += sub_ev[i] |
|
|
|
|
|
for i in range(n): |
|
|
accumulated[i] /= n |
|
|
return tuple(accumulated) |
|
|
|
|
|
def get_expected_value_forced_win( |
|
|
i, |
|
|
distribution, |
|
|
leftover, |
|
|
tier_map_tuple |
|
|
): |
|
|
""" |
|
|
假设下一只乌贼 100% 给玩家 i。 |
|
|
则先把 distribution[i] += 1, leftover -=1, |
|
|
然后对 (distribution', leftover') 做完全随机的 get_expected_value(...)。 |
|
|
返回:一个长度 N 的 tuple,表示每个玩家在这种强制赢前提下的期望最终收益。 |
|
|
""" |
|
|
if leftover <= 0: |
|
|
|
|
|
return get_expected_value(distribution, leftover, tier_map_tuple) |
|
|
|
|
|
dist_forced = list(distribution) |
|
|
dist_forced[i] += 1 |
|
|
new_dist = tuple(dist_forced) |
|
|
return get_expected_value(new_dist, leftover - 1, tier_map_tuple) |
|
|
|
|
|
|
|
|
def get_expected_value_forced_lose( |
|
|
i, |
|
|
distribution, |
|
|
leftover, |
|
|
tier_map_tuple |
|
|
): |
|
|
""" |
|
|
假设下一只乌贼 100% 不会给玩家 i, |
|
|
即本轮发乌贼只在其余 (n-1) 人中随机选 winner, |
|
|
然后后续 (leftover-1) 轮恢复正常 n 人随机。 |
|
|
|
|
|
做法:遍历所有 winner != i (prob=1/(n-1)),发给那个 winner, |
|
|
然后 leftover-1 的状态再用 get_expected_value 完全随机。 |
|
|
|
|
|
返回:一个长度 N 的 tuple (每个玩家最终EV) |
|
|
""" |
|
|
n = len(distribution) |
|
|
if leftover <= 0: |
|
|
return get_expected_value(distribution, leftover, tier_map_tuple) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
accumulated = [0.0]*n |
|
|
valid_winners = [w for w in range(n) if w != i] |
|
|
|
|
|
for w in valid_winners: |
|
|
dist_next = list(distribution) |
|
|
dist_next[w] += 1 |
|
|
sub_ev = get_expected_value(tuple(dist_next), leftover-1, tier_map_tuple) |
|
|
for p in range(n): |
|
|
accumulated[p] += sub_ev[p] |
|
|
|
|
|
|
|
|
for p in range(n): |
|
|
accumulated[p] /= len(valid_winners) |
|
|
|
|
|
return tuple(accumulated) |
|
|
|
|
|
def compute_ev_win_lose_two_extremes(distribution, leftover, tier_map_tuple): |
|
|
""" |
|
|
返回一个数据结构,记录每个玩家 i 在: |
|
|
- forced_win 时的期望收益 |
|
|
- forced_lose 时的期望收益 |
|
|
- difference = forced_win - forced_lose |
|
|
""" |
|
|
n = len(distribution) |
|
|
results = [] |
|
|
|
|
|
for i in range(n): |
|
|
forced_win_vec = get_expected_value_forced_win(i, distribution, leftover, tier_map_tuple) |
|
|
forced_lose_vec = get_expected_value_forced_lose(i, distribution, leftover, tier_map_tuple) |
|
|
|
|
|
|
|
|
|
|
|
forced_win_i = forced_win_vec[i] |
|
|
forced_lose_i = forced_lose_vec[i] |
|
|
diff_i = forced_win_i - forced_lose_i |
|
|
|
|
|
results.append({ |
|
|
'player': i, |
|
|
'forcedWinEV': forced_win_i, |
|
|
'forcedLoseEV': forced_lose_i, |
|
|
'difference': diff_i |
|
|
}) |
|
|
return results |
|
|
|
|
|
|
|
|
def infinite_squid_game_expected_counts_partial(distribution): |
|
|
""" |
|
|
For the "infinite squid + cumulative score, stop when only 1 player has 0" game, |
|
|
given the current distribution (list/tuple, representing each player's current count), |
|
|
returns a list representing the expected number of squids each player will have at the end of the game. |
|
|
|
|
|
Rule summary: |
|
|
- If the current number of players with 0 squids = z: |
|
|
* z=1 => Game stops immediately, no more squids distributed => final value = current holdings |
|
|
* z=0 => "Exactly 1 player with 0" can never occur, infinite loop => return inf |
|
|
* z>=2 => Expected to distribute n*(H_z -1) more times (H_z=1+1/2+...+1/z), |
|
|
Expected number of times each player is selected = (H_z-1), |
|
|
Therefore final = distribution[i] + (H_z-1). |
|
|
""" |
|
|
n = len(distribution) |
|
|
zero_count = sum(1 for x in distribution if x == 0) |
|
|
|
|
|
|
|
|
if zero_count == 1: |
|
|
|
|
|
return list(distribution) |
|
|
if zero_count == 0: |
|
|
|
|
|
return [math.inf]*n |
|
|
|
|
|
|
|
|
H_z = sum(1.0 / k for k in range(1, zero_count+1)) |
|
|
increment = (H_z - 1.0) |
|
|
return [x + increment for x in distribution] |
|
|
|
|
|
|
|
|
def get_bitmask(n): |
|
|
""" |
|
|
Returns a list [0,1,2,...,(1<<n)-1], purely for iteration reference |
|
|
""" |
|
|
return list(range(1 << n)) |
|
|
|
|
|
def count_ones(x): |
|
|
"""Count how many 1s are in the binary representation of x""" |
|
|
return bin(x).count("1") |
|
|
|
|
|
@lru_cache(None) |
|
|
def dp_ev(n, bitmask_u, leftover): |
|
|
""" |
|
|
Returns a length-n tuple (prob0, prob1, ..., prob_{n-1}), |
|
|
representing the "final probability" of each player getting a squid in state (U=bitmask_u, leftover). |
|
|
|
|
|
- n: total number of players |
|
|
- bitmask_u: which players haven't received a squid yet (binary mask) |
|
|
- leftover: remaining squids that can be distributed |
|
|
""" |
|
|
|
|
|
num_u = count_ones(bitmask_u) |
|
|
if num_u <= 1 or leftover == 0: |
|
|
|
|
|
|
|
|
|
|
|
ret = [] |
|
|
for i in range(n): |
|
|
|
|
|
if (bitmask_u & (1 << i)) != 0: |
|
|
ret.append(0.0) |
|
|
else: |
|
|
ret.append(1.0) |
|
|
return tuple(ret) |
|
|
|
|
|
|
|
|
ret_accum = [0.0]*n |
|
|
|
|
|
for w in range(n): |
|
|
bit_w = (1 << w) |
|
|
if (bitmask_u & bit_w) != 0: |
|
|
|
|
|
|
|
|
next_u = bitmask_u ^ bit_w |
|
|
sub_ev = dp_ev(n, next_u, leftover - 1) |
|
|
|
|
|
for i in range(n): |
|
|
ret_accum[i] += sub_ev[i] / num_u |
|
|
|
|
|
|
|
|
return tuple(ret_accum) |
|
|
|
|
|
def finite_squid_game_probabilities(n, r): |
|
|
""" |
|
|
Returns a length-n probability vector: prob[i] = probability of player i getting a squid |
|
|
Based on dp_ev(). |
|
|
|
|
|
- n: number of players |
|
|
- r: number of squids to distribute |
|
|
""" |
|
|
|
|
|
bitmask_u = (1 << n) - 1 |
|
|
return dp_ev(n, bitmask_u, r) |
|
|
|