# squid_game.py from functools import lru_cache import math def parse_tier_map(tier_str: str): """ Example tier_str: \"1-4:1\n5-6:3\" means: if a player has 1..4 squids => each worth x1 => total = k*1 if a player has 5..6 squids => each worth x3 => total = k*3 """ lines = tier_str.strip().splitlines() tier_map = [] for line in lines: range_part, mult_part = line.split(":") per_squid_mult = float(mult_part.strip()) if "-" in range_part: low_str, high_str = range_part.split("-") low, high = int(low_str), int(high_str) else: low = high = int(range_part.strip()) tier_map.append((low, high, per_squid_mult)) tier_map.sort(key=lambda x: x[0]) return tier_map def tierValue(k: int, tier_map) -> float: """ For k squids, find bracket => return k * bracket_multiplier. If k <= 0 => 0. """ if k <= 0: return 0.0 for (low, high, mult) in tier_map: if low <= k <= high: return k * mult if tier_map and k > tier_map[-1][1]: return k * tier_map[-1][2] return 0.0 # fallback if no bracket matches def is_terminal(distribution, remaining) -> bool: """ Game ends if exactly one zero-squid player or no squids remain. """ zero_count = sum(1 for x in distribution if x == 0) if zero_count == 1: return True if remaining == 0: return True return False def compute_final_payout(distribution, tier_map): """ distribution: e.g. (0,0,4) RULES: - If exactly 1 zero-squid => that one pays sum_of_winners_tier - If multiple zeros => each zero-squid pays sum_of_winners_tier individually => each winner gets (number_of_zero_squids * winner_tier_value). """ n = len(distribution) zero_indices = [i for i, x in enumerate(distribution) if x == 0] m = len(zero_indices) # number of zero-squid players winner_indices = [i for i, x in enumerate(distribution) if x > 0] # sum of each winner's bracketed total winner_values = [tierValue(distribution[w], tier_map) for w in winner_indices] sum_winner_values = sum(winner_values) payoffs = [0.0]*n if m == 0: # No zeros => no payment => everyone gets 0 return payoffs else: # Each zero pays sum_winner_values for z in zero_indices: payoffs[z] = -sum_winner_values # Each winner gets their tierValue (if single loser) # or m * their tierValue (if multiple losers) for i, w in enumerate(winner_indices): payoffs[w] = winner_values[i] if m == 1 else m * winner_values[i] return payoffs def format_state(distribution, remaining): """Format a game state for display""" return f"({','.join(map(str, distribution))}, {remaining})" @lru_cache(None) def get_expected_value(distribution, remaining, tier_map_tuple): """ Memoized DP: returns an N-tuple of payoffs from state=(distribution, remaining). distribution: tuple of ints remaining: int (squids left) tier_map_tuple: bracket info (like ( (1,4,1.0), (5,6,3.0) )) """ if is_terminal(distribution, remaining): final_pay = compute_final_payout(distribution, tier_map_tuple) return tuple(final_pay) n = len(distribution) accumulated = [0.0]*n for winner in range(n): new_dist = list(distribution) new_dist[winner] += 1 sub_ev = get_expected_value(tuple(new_dist), remaining-1, tier_map_tuple) for i in range(n): accumulated[i] += sub_ev[i] # average for i in range(n): accumulated[i] /= n return tuple(accumulated) def get_expected_value_forced_win( i, distribution, leftover, tier_map_tuple ): """ 假设下一只乌贼 100% 给玩家 i。 则先把 distribution[i] += 1, leftover -=1, 然后对 (distribution', leftover') 做完全随机的 get_expected_value(...)。 返回:一个长度 N 的 tuple,表示每个玩家在这种强制赢前提下的期望最终收益。 """ if leftover <= 0: # 没乌贼剩了,也可能是某些奇怪边界,直接算终局: return get_expected_value(distribution, leftover, tier_map_tuple) dist_forced = list(distribution) dist_forced[i] += 1 new_dist = tuple(dist_forced) return get_expected_value(new_dist, leftover - 1, tier_map_tuple) def get_expected_value_forced_lose( i, distribution, leftover, tier_map_tuple, folded_players ): """ 假设下一只乌贼 100% 不会给玩家 i, 即本轮发乌贼只在其余 (n-1) 人中随机选 winner, 然后后续 (leftover-1) 轮恢复正常 n 人随机。 做法:遍历所有 winner != i (prob=1/(n-1)),发给那个 winner, 然后 leftover-1 的状态再用 get_expected_value 完全随机。 返回:一个长度 N 的 tuple (每个玩家最终EV) """ n = len(distribution) if leftover <= 0: return get_expected_value(distribution, leftover, tier_map_tuple) # 如果 n=1,那就无可比了……(此处不太可能) # 一般 n>=2, leftover>=1 # 假设我们这里显式地做一次 "下一只的发放" 的平均 # winner只能在 [0..n-1] - {i} 之中。 # Probability = 1/(n-1) accumulated = [0.0]*n valid_winners = [w for w in range(n) if (w != i and not folded_players[w])] for w in valid_winners: dist_next = list(distribution) dist_next[w] += 1 sub_ev = get_expected_value(tuple(dist_next), leftover-1, tier_map_tuple) for p in range(n): accumulated[p] += sub_ev[p] # 做平均 for p in range(n): accumulated[p] /= len(valid_winners) # == (n-1) return tuple(accumulated) def compute_ev_win_lose_two_extremes(distribution, leftover, tier_map_tuple,folded_players): """ 返回一个数据结构,记录每个玩家 i 在: - forced_win 时的期望收益 - forced_lose 时的期望收益 - difference = forced_win - forced_lose """ n = len(distribution) results = [] for i in range(n): forced_win_vec = get_expected_value_forced_win(i, distribution, leftover, tier_map_tuple) forced_lose_vec = get_expected_value_forced_lose(i, distribution, leftover, tier_map_tuple,folded_players) # 我们可能只关心玩家 i 本人的比较, 也可以把全部人都算, # 这里演示只关心 i forced_win_i = forced_win_vec[i] forced_lose_i = forced_lose_vec[i] diff_i = forced_win_i - forced_lose_i results.append({ 'player': i, 'forcedWinEV': forced_win_i, 'forcedLoseEV': forced_lose_i, 'difference': diff_i }) return results # Infinite Squid Game Implementation def infinite_squid_game_expected_counts_partial(distribution): """ For the "infinite squid + cumulative score, stop when only 1 player has 0" game, given the current distribution (list/tuple, representing each player's current count), returns a list representing the expected number of squids each player will have at the end of the game. Rule summary: - If the current number of players with 0 squids = z: * z=1 => Game stops immediately, no more squids distributed => final value = current holdings * z=0 => "Exactly 1 player with 0" can never occur, infinite loop => return inf * z>=2 => Expected to distribute n*(H_z -1) more times (H_z=1+1/2+...+1/z), Expected number of times each player is selected = (H_z-1), Therefore final = distribution[i] + (H_z-1). """ n = len(distribution) zero_count = sum(1 for x in distribution if x == 0) # Already satisfied or exceeded "end/cannot end" condition if zero_count == 1: # Game ends immediately => maintain current state return list(distribution) if zero_count == 0: # "Exactly 1 player with 0" can never occur => game never ends => infinite score return [math.inf]*n # zero_count >= 2 => continue distributing squids until z=1 H_z = sum(1.0 / k for k in range(1, zero_count+1)) # Harmonic number H_z increment = (H_z - 1.0) return [x + increment for x in distribution] # Finite Squid Game Implementation def get_bitmask(n): """ Returns a list [0,1,2,...,(1< probability=0, otherwise =1 if (bitmask_u & (1 << i)) != 0: ret.append(0.0) else: ret.append(1.0) return tuple(ret) # 2) Not terminated => randomly select 1 person from U to give a squid ret_accum = [0.0]*n # Randomly select w from U with equal probability for w in range(n): bit_w = (1 << w) if (bitmask_u & bit_w) != 0: # w is in U # Next state: after w gets a squid, remove w from bitmask next_u = bitmask_u ^ bit_w # Set to 0 sub_ev = dp_ev(n, next_u, leftover - 1) for i in range(n): ret_accum[i] += sub_ev[i] / num_u # 3) Return accumulated result return tuple(ret_accum) def finite_squid_game_probabilities(n, r): """ Returns a length-n probability vector: prob[i] = probability of player i getting a squid Based on dp_ev(). - n: number of players - r: number of squids to distribute """ # Initial U = (1<