Spaces:
Sleeping
Sleeping
| # squid_game.py | |
| from functools import lru_cache | |
| import math | |
| def parse_tier_map(tier_str: str): | |
| """ | |
| Example tier_str: | |
| \"1-4:1\n5-6:3\" | |
| means: | |
| if a player has 1..4 squids => each worth x1 => total = k*1 | |
| if a player has 5..6 squids => each worth x3 => total = k*3 | |
| """ | |
| lines = tier_str.strip().splitlines() | |
| tier_map = [] | |
| for line in lines: | |
| range_part, mult_part = line.split(":") | |
| per_squid_mult = float(mult_part.strip()) | |
| if "-" in range_part: | |
| low_str, high_str = range_part.split("-") | |
| low, high = int(low_str), int(high_str) | |
| else: | |
| low = high = int(range_part.strip()) | |
| tier_map.append((low, high, per_squid_mult)) | |
| tier_map.sort(key=lambda x: x[0]) | |
| return tier_map | |
| def tierValue(k: int, tier_map) -> float: | |
| """ | |
| For k squids, find bracket => return k * bracket_multiplier. | |
| If k <= 0 => 0. | |
| """ | |
| if k <= 0: | |
| return 0.0 | |
| for (low, high, mult) in tier_map: | |
| if low <= k <= high: | |
| return k * mult | |
| if tier_map and k > tier_map[-1][1]: | |
| return k * tier_map[-1][2] | |
| return 0.0 # fallback if no bracket matches | |
| def is_terminal(distribution, remaining) -> bool: | |
| """ | |
| Game ends if exactly one zero-squid player or no squids remain. | |
| """ | |
| zero_count = sum(1 for x in distribution if x == 0) | |
| if zero_count == 1: | |
| return True | |
| if remaining == 0: | |
| return True | |
| return False | |
| def compute_final_payout(distribution, tier_map): | |
| """ | |
| distribution: e.g. (0,0,4) | |
| RULES: | |
| - If exactly 1 zero-squid => that one pays sum_of_winners_tier | |
| - If multiple zeros => each zero-squid pays sum_of_winners_tier individually | |
| => each winner gets (number_of_zero_squids * winner_tier_value). | |
| """ | |
| n = len(distribution) | |
| zero_indices = [i for i, x in enumerate(distribution) if x == 0] | |
| m = len(zero_indices) # number of zero-squid players | |
| winner_indices = [i for i, x in enumerate(distribution) if x > 0] | |
| # sum of each winner's bracketed total | |
| winner_values = [tierValue(distribution[w], tier_map) for w in winner_indices] | |
| sum_winner_values = sum(winner_values) | |
| payoffs = [0.0]*n | |
| if m == 0: | |
| # No zeros => no payment => everyone gets 0 | |
| return payoffs | |
| else: | |
| # Each zero pays sum_winner_values | |
| for z in zero_indices: | |
| payoffs[z] = -sum_winner_values | |
| # Each winner gets their tierValue (if single loser) | |
| # or m * their tierValue (if multiple losers) | |
| for i, w in enumerate(winner_indices): | |
| payoffs[w] = winner_values[i] if m == 1 else m * winner_values[i] | |
| return payoffs | |
| def format_state(distribution, remaining): | |
| """Format a game state for display""" | |
| return f"({','.join(map(str, distribution))}, {remaining})" | |
| def get_expected_value(distribution, remaining, tier_map_tuple): | |
| """ | |
| Memoized DP: returns an N-tuple of payoffs from state=(distribution, remaining). | |
| distribution: tuple of ints | |
| remaining: int (squids left) | |
| tier_map_tuple: bracket info (like ( (1,4,1.0), (5,6,3.0) )) | |
| """ | |
| if is_terminal(distribution, remaining): | |
| final_pay = compute_final_payout(distribution, tier_map_tuple) | |
| return tuple(final_pay) | |
| n = len(distribution) | |
| accumulated = [0.0]*n | |
| for winner in range(n): | |
| new_dist = list(distribution) | |
| new_dist[winner] += 1 | |
| sub_ev = get_expected_value(tuple(new_dist), remaining-1, tier_map_tuple) | |
| for i in range(n): | |
| accumulated[i] += sub_ev[i] | |
| # average | |
| for i in range(n): | |
| accumulated[i] /= n | |
| return tuple(accumulated) | |
| def get_expected_value_forced_win( | |
| i, | |
| distribution, | |
| leftover, | |
| tier_map_tuple | |
| ): | |
| """ | |
| 假设下一只乌贼 100% 给玩家 i。 | |
| 则先把 distribution[i] += 1, leftover -=1, | |
| 然后对 (distribution', leftover') 做完全随机的 get_expected_value(...)。 | |
| 返回:一个长度 N 的 tuple,表示每个玩家在这种强制赢前提下的期望最终收益。 | |
| """ | |
| if leftover <= 0: | |
| # 没乌贼剩了,也可能是某些奇怪边界,直接算终局: | |
| return get_expected_value(distribution, leftover, tier_map_tuple) | |
| dist_forced = list(distribution) | |
| dist_forced[i] += 1 | |
| new_dist = tuple(dist_forced) | |
| return get_expected_value(new_dist, leftover - 1, tier_map_tuple) | |
| def get_expected_value_forced_lose( | |
| i, | |
| distribution, | |
| leftover, | |
| tier_map_tuple, | |
| folded_players | |
| ): | |
| """ | |
| 假设下一只乌贼 100% 不会给玩家 i, | |
| 即本轮发乌贼只在其余 (n-1) 人中随机选 winner, | |
| 然后后续 (leftover-1) 轮恢复正常 n 人随机。 | |
| 做法:遍历所有 winner != i (prob=1/(n-1)),发给那个 winner, | |
| 然后 leftover-1 的状态再用 get_expected_value 完全随机。 | |
| 返回:一个长度 N 的 tuple (每个玩家最终EV) | |
| """ | |
| n = len(distribution) | |
| if leftover <= 0: | |
| return get_expected_value(distribution, leftover, tier_map_tuple) | |
| # 如果 n=1,那就无可比了……(此处不太可能) | |
| # 一般 n>=2, leftover>=1 | |
| # 假设我们这里显式地做一次 "下一只的发放" 的平均 | |
| # winner只能在 [0..n-1] - {i} 之中。 | |
| # Probability = 1/(n-1) | |
| accumulated = [0.0]*n | |
| valid_winners = [w for w in range(n) if (w != i and not folded_players[w])] | |
| for w in valid_winners: | |
| dist_next = list(distribution) | |
| dist_next[w] += 1 | |
| sub_ev = get_expected_value(tuple(dist_next), leftover-1, tier_map_tuple) | |
| for p in range(n): | |
| accumulated[p] += sub_ev[p] | |
| # 做平均 | |
| for p in range(n): | |
| accumulated[p] /= len(valid_winners) # == (n-1) | |
| return tuple(accumulated) | |
| def compute_ev_win_lose_two_extremes(distribution, leftover, tier_map_tuple,folded_players): | |
| """ | |
| 返回一个数据结构,记录每个玩家 i 在: | |
| - forced_win 时的期望收益 | |
| - forced_lose 时的期望收益 | |
| - difference = forced_win - forced_lose | |
| """ | |
| n = len(distribution) | |
| results = [] | |
| for i in range(n): | |
| forced_win_vec = get_expected_value_forced_win(i, distribution, leftover, tier_map_tuple) | |
| forced_lose_vec = get_expected_value_forced_lose(i, distribution, leftover, tier_map_tuple,folded_players) | |
| # 我们可能只关心玩家 i 本人的比较, 也可以把全部人都算, | |
| # 这里演示只关心 i | |
| forced_win_i = forced_win_vec[i] | |
| forced_lose_i = forced_lose_vec[i] | |
| diff_i = forced_win_i - forced_lose_i | |
| results.append({ | |
| 'player': i, | |
| 'forcedWinEV': forced_win_i, | |
| 'forcedLoseEV': forced_lose_i, | |
| 'difference': diff_i | |
| }) | |
| return results | |
| # Infinite Squid Game Implementation | |
| def infinite_squid_game_expected_counts_partial(distribution): | |
| """ | |
| For the "infinite squid + cumulative score, stop when only 1 player has 0" game, | |
| given the current distribution (list/tuple, representing each player's current count), | |
| returns a list representing the expected number of squids each player will have at the end of the game. | |
| Rule summary: | |
| - If the current number of players with 0 squids = z: | |
| * z=1 => Game stops immediately, no more squids distributed => final value = current holdings | |
| * z=0 => "Exactly 1 player with 0" can never occur, infinite loop => return inf | |
| * z>=2 => Expected to distribute n*(H_z -1) more times (H_z=1+1/2+...+1/z), | |
| Expected number of times each player is selected = (H_z-1), | |
| Therefore final = distribution[i] + (H_z-1). | |
| """ | |
| n = len(distribution) | |
| zero_count = sum(1 for x in distribution if x == 0) | |
| # Already satisfied or exceeded "end/cannot end" condition | |
| if zero_count == 1: | |
| # Game ends immediately => maintain current state | |
| return list(distribution) | |
| if zero_count == 0: | |
| # "Exactly 1 player with 0" can never occur => game never ends => infinite score | |
| return [math.inf]*n | |
| # zero_count >= 2 => continue distributing squids until z=1 | |
| H_z = sum(1.0 / k for k in range(1, zero_count+1)) # Harmonic number H_z | |
| increment = (H_z - 1.0) | |
| return [x + increment for x in distribution] | |
| # Finite Squid Game Implementation | |
| def get_bitmask(n): | |
| """ | |
| Returns a list [0,1,2,...,(1<<n)-1], purely for iteration reference | |
| """ | |
| return list(range(1 << n)) | |
| def count_ones(x): | |
| """Count how many 1s are in the binary representation of x""" | |
| return bin(x).count("1") | |
| def dp_ev(n, bitmask_u, leftover): | |
| """ | |
| Returns a length-n tuple (prob0, prob1, ..., prob_{n-1}), | |
| representing the "final probability" of each player getting a squid in state (U=bitmask_u, leftover). | |
| - n: total number of players | |
| - bitmask_u: which players haven't received a squid yet (binary mask) | |
| - leftover: remaining squids that can be distributed | |
| """ | |
| # 1) Terminal condition | |
| num_u = count_ones(bitmask_u) # Number of players who haven't received a squid yet | |
| if num_u <= 1 or leftover == 0: | |
| # Game ends immediately | |
| # For players who already have a squid, their probability=1 | |
| # For those who don't, probability=0 | |
| ret = [] | |
| for i in range(n): | |
| # If i is in bitmask_u => probability=0, otherwise =1 | |
| if (bitmask_u & (1 << i)) != 0: | |
| ret.append(0.0) | |
| else: | |
| ret.append(1.0) | |
| return tuple(ret) | |
| # 2) Not terminated => randomly select 1 person from U to give a squid | |
| ret_accum = [0.0]*n | |
| # Randomly select w from U with equal probability | |
| for w in range(n): | |
| bit_w = (1 << w) | |
| if (bitmask_u & bit_w) != 0: | |
| # w is in U | |
| # Next state: after w gets a squid, remove w from bitmask | |
| next_u = bitmask_u ^ bit_w # Set to 0 | |
| sub_ev = dp_ev(n, next_u, leftover - 1) | |
| for i in range(n): | |
| ret_accum[i] += sub_ev[i] / num_u | |
| # 3) Return accumulated result | |
| return tuple(ret_accum) | |
| def finite_squid_game_probabilities(n, r): | |
| """ | |
| Returns a length-n probability vector: prob[i] = probability of player i getting a squid | |
| Based on dp_ev(). | |
| - n: number of players | |
| - r: number of squids to distribute | |
| """ | |
| # Initial U = (1<<n)-1, meaning no one has a squid yet | |
| bitmask_u = (1 << n) - 1 | |
| return dp_ev(n, bitmask_u, r) | |
| def _compute_final_payout_finite_tuple(distribution): | |
| """ | |
| 根据玩家鱿鱼分布元组,为有限游戏计算终局收益。 | |
| 规则:输家(0个鱿鱼)支付所有赢家(1个鱿鱼)的鱿鱼价值总和。 | |
| """ | |
| n = len(distribution) | |
| winners = [i for i, count in enumerate(distribution) if count == 1] | |
| losers = [i for i, count in enumerate(distribution) if count == 0] | |
| num_winners = len(winners) | |
| num_losers = len(losers) | |
| payoffs = [0.0] * n | |
| if num_winners == 0 or num_losers == 0: | |
| return tuple(payoffs) | |
| sum_winner_values = float(num_winners) # 每个赢家价值为1 | |
| for i in losers: | |
| payoffs[i] = -sum_winner_values | |
| for i in winners: | |
| payoffs[i] = 1.0 * num_losers | |
| return tuple(payoffs) | |
| def _is_terminal_finite(distribution, remaining): | |
| """ | |
| 判断有限游戏是否结束。 | |
| 结束条件:1. 没鱿鱼了。 2. 最多只剩一个玩家还没有鱿鱼。 | |
| """ | |
| if remaining == 0: | |
| return True | |
| players_without_squid = sum(1 for count in distribution if count == 0) | |
| if players_without_squid <= 1: | |
| return True | |
| return False | |
| def get_expected_value_finite(distribution, remaining): | |
| """ | |
| (模仿 get_expected_value) | |
| 使用元组作为状态,计算有限游戏的期望收益 (EV) 向量。 | |
| """ | |
| if _is_terminal_finite(distribution, remaining): | |
| return _compute_final_payout_finite_tuple(distribution) | |
| n = len(distribution) | |
| accumulated_ev = [0.0] * n | |
| # 找出所有还没有鱿鱼的合格候选人 | |
| eligible_players = [i for i, count in enumerate(distribution) if count == 0] | |
| num_eligible = len(eligible_players) | |
| if num_eligible == 0: # 理论上不应该发生,除非游戏已结束 | |
| return _compute_final_payout_finite_tuple(distribution) | |
| # 遍历每一个合格的候选人,模拟他获得下一个鱿鱼 | |
| for winner_idx in eligible_players: | |
| new_dist_list = list(distribution) | |
| new_dist_list[winner_idx] += 1 | |
| # 递归调用 | |
| sub_ev = get_expected_value_finite(tuple(new_dist_list), remaining - 1) | |
| for i in range(n): | |
| accumulated_ev[i] += sub_ev[i] | |
| # 对所有可能的结果求平均 | |
| for i in range(n): | |
| accumulated_ev[i] /= num_eligible | |
| return tuple(accumulated_ev) | |
| def get_expected_value_finite_forced_win(i, distribution, remaining): | |
| """ | |
| (模仿 get_expected_value_forced_win) | |
| 假设下一个鱿鱼 100% 给玩家 i (前提是i合格)。 | |
| """ | |
| if remaining <= 0 or distribution[i] == 1: | |
| # 无法强制 (没鱿鱼了,或者i已经有鱿鱼),返回常规EV | |
| return get_expected_value_finite(distribution, remaining) | |
| # 强制给 i | |
| new_dist_list = list(distribution) | |
| new_dist_list[i] += 1 | |
| # 在新状态上进行完全随机的EV计算 | |
| return get_expected_value_finite(tuple(new_dist_list), remaining - 1) | |
| def get_expected_value_finite_forced_lose(i, distribution, remaining,folded_players): | |
| """ | |
| (模仿 get_expected_value_forced_lose) | |
| 假设下一个鱿鱼 100% 不会给玩家 i。 | |
| """ | |
| if remaining <= 0 or distribution[i] == 1: | |
| # i 已经有鱿鱼,下一个本来就不会给他,等同于常规EV | |
| return get_expected_value_finite(distribution, remaining) | |
| n = len(distribution) | |
| accumulated_ev = [0.0] * n | |
| # 找出除了i之外所有合格的候选人 | |
| eligible_players = [p for p, count in enumerate(distribution) if (p != i and not folded_players[p])] | |
| num_eligible = len(eligible_players) | |
| if num_eligible == 0: # 如果除了i没有别人可选了,游戏结束 | |
| return _compute_final_payout_finite_tuple(distribution) | |
| # 遍历每一个合格的候选人,模拟他获得下一个鱿鱼 | |
| for winner_idx in eligible_players: | |
| if distribution[winner_idx] == 0: | |
| new_dist_list = list(distribution) | |
| new_dist_list[winner_idx] += 1 | |
| sub_ev = get_expected_value_finite(tuple(new_dist_list), remaining - 1) | |
| else: | |
| new_dist_list = list(distribution) | |
| sub_ev = get_expected_value_finite(tuple(new_dist_list), remaining) | |
| for p in range(n): | |
| accumulated_ev[p] += sub_ev[p] | |
| # 对所有可能的结果求平均 | |
| for p in range(n): | |
| accumulated_ev[p] /= num_eligible | |
| return tuple(accumulated_ev) | |
| def compute_ev_win_lose_finite_tuple(distribution, remaining,folded_players): | |
| """ | |
| 包装函数,为每个合格玩家计算 forced_win 和 forced_lose 的EV。 | |
| """ | |
| n = len(distribution) | |
| results = [] | |
| base_ev = get_expected_value_finite(distribution, remaining) | |
| eligible_players = [i for i, count in enumerate(distribution) if count == 0] | |
| for i in eligible_players: | |
| win_ev_vec = get_expected_value_finite_forced_win(i, distribution, remaining) | |
| lose_ev_vec = get_expected_value_finite_forced_lose(i, distribution, remaining,folded_players) | |
| forced_win_i = win_ev_vec[i] | |
| forced_lose_i = lose_ev_vec[i] | |
| diff_i = forced_win_i - forced_lose_i | |
| results.append({ | |
| 'player': i, | |
| 'forcedWinEV': forced_win_i, | |
| 'forcedLoseEV': forced_lose_i, | |
| 'difference': diff_i | |
| }) | |
| return base_ev, results |