squidgame / squid_game_core.py
bupticybee
:new: add stuff
c5cd6a7
# squid_game.py
from functools import lru_cache
import math
def parse_tier_map(tier_str: str):
"""
Example tier_str:
\"1-4:1\n5-6:3\"
means:
if a player has 1..4 squids => each worth x1 => total = k*1
if a player has 5..6 squids => each worth x3 => total = k*3
"""
lines = tier_str.strip().splitlines()
tier_map = []
for line in lines:
range_part, mult_part = line.split(":")
per_squid_mult = float(mult_part.strip())
if "-" in range_part:
low_str, high_str = range_part.split("-")
low, high = int(low_str), int(high_str)
else:
low = high = int(range_part.strip())
tier_map.append((low, high, per_squid_mult))
tier_map.sort(key=lambda x: x[0])
return tier_map
def tierValue(k: int, tier_map) -> float:
"""
For k squids, find bracket => return k * bracket_multiplier.
If k <= 0 => 0.
"""
if k <= 0:
return 0.0
for (low, high, mult) in tier_map:
if low <= k <= high:
return k * mult
if tier_map and k > tier_map[-1][1]:
return k * tier_map[-1][2]
return 0.0 # fallback if no bracket matches
def is_terminal(distribution, remaining) -> bool:
"""
Game ends if exactly one zero-squid player or no squids remain.
"""
zero_count = sum(1 for x in distribution if x == 0)
if zero_count == 1:
return True
if remaining == 0:
return True
return False
def compute_final_payout(distribution, tier_map):
"""
distribution: e.g. (0,0,4)
RULES:
- If exactly 1 zero-squid => that one pays sum_of_winners_tier
- If multiple zeros => each zero-squid pays sum_of_winners_tier individually
=> each winner gets (number_of_zero_squids * winner_tier_value).
"""
n = len(distribution)
zero_indices = [i for i, x in enumerate(distribution) if x == 0]
m = len(zero_indices) # number of zero-squid players
winner_indices = [i for i, x in enumerate(distribution) if x > 0]
# sum of each winner's bracketed total
winner_values = [tierValue(distribution[w], tier_map) for w in winner_indices]
sum_winner_values = sum(winner_values)
payoffs = [0.0]*n
if m == 0:
# No zeros => no payment => everyone gets 0
return payoffs
else:
# Each zero pays sum_winner_values
for z in zero_indices:
payoffs[z] = -sum_winner_values
# Each winner gets their tierValue (if single loser)
# or m * their tierValue (if multiple losers)
for i, w in enumerate(winner_indices):
payoffs[w] = winner_values[i] if m == 1 else m * winner_values[i]
return payoffs
def format_state(distribution, remaining):
"""Format a game state for display"""
return f"({','.join(map(str, distribution))}, {remaining})"
@lru_cache(None)
def get_expected_value(distribution, remaining, tier_map_tuple):
"""
Memoized DP: returns an N-tuple of payoffs from state=(distribution, remaining).
distribution: tuple of ints
remaining: int (squids left)
tier_map_tuple: bracket info (like ( (1,4,1.0), (5,6,3.0) ))
"""
if is_terminal(distribution, remaining):
final_pay = compute_final_payout(distribution, tier_map_tuple)
return tuple(final_pay)
n = len(distribution)
accumulated = [0.0]*n
for winner in range(n):
new_dist = list(distribution)
new_dist[winner] += 1
sub_ev = get_expected_value(tuple(new_dist), remaining-1, tier_map_tuple)
for i in range(n):
accumulated[i] += sub_ev[i]
# average
for i in range(n):
accumulated[i] /= n
return tuple(accumulated)
def get_expected_value_forced_win(
i,
distribution,
leftover,
tier_map_tuple
):
"""
假设下一只乌贼 100% 给玩家 i。
则先把 distribution[i] += 1, leftover -=1,
然后对 (distribution', leftover') 做完全随机的 get_expected_value(...)。
返回:一个长度 N 的 tuple,表示每个玩家在这种强制赢前提下的期望最终收益。
"""
if leftover <= 0:
# 没乌贼剩了,也可能是某些奇怪边界,直接算终局:
return get_expected_value(distribution, leftover, tier_map_tuple)
dist_forced = list(distribution)
dist_forced[i] += 1
new_dist = tuple(dist_forced)
return get_expected_value(new_dist, leftover - 1, tier_map_tuple)
def get_expected_value_forced_lose(
i,
distribution,
leftover,
tier_map_tuple,
folded_players
):
"""
假设下一只乌贼 100% 不会给玩家 i,
即本轮发乌贼只在其余 (n-1) 人中随机选 winner,
然后后续 (leftover-1) 轮恢复正常 n 人随机。
做法:遍历所有 winner != i (prob=1/(n-1)),发给那个 winner,
然后 leftover-1 的状态再用 get_expected_value 完全随机。
返回:一个长度 N 的 tuple (每个玩家最终EV)
"""
n = len(distribution)
if leftover <= 0:
return get_expected_value(distribution, leftover, tier_map_tuple)
# 如果 n=1,那就无可比了……(此处不太可能)
# 一般 n>=2, leftover>=1
# 假设我们这里显式地做一次 "下一只的发放" 的平均
# winner只能在 [0..n-1] - {i} 之中。
# Probability = 1/(n-1)
accumulated = [0.0]*n
valid_winners = [w for w in range(n) if (w != i and not folded_players[w])]
for w in valid_winners:
dist_next = list(distribution)
dist_next[w] += 1
sub_ev = get_expected_value(tuple(dist_next), leftover-1, tier_map_tuple)
for p in range(n):
accumulated[p] += sub_ev[p]
# 做平均
for p in range(n):
accumulated[p] /= len(valid_winners) # == (n-1)
return tuple(accumulated)
def compute_ev_win_lose_two_extremes(distribution, leftover, tier_map_tuple,folded_players):
"""
返回一个数据结构,记录每个玩家 i 在:
- forced_win 时的期望收益
- forced_lose 时的期望收益
- difference = forced_win - forced_lose
"""
n = len(distribution)
results = []
for i in range(n):
forced_win_vec = get_expected_value_forced_win(i, distribution, leftover, tier_map_tuple)
forced_lose_vec = get_expected_value_forced_lose(i, distribution, leftover, tier_map_tuple,folded_players)
# 我们可能只关心玩家 i 本人的比较, 也可以把全部人都算,
# 这里演示只关心 i
forced_win_i = forced_win_vec[i]
forced_lose_i = forced_lose_vec[i]
diff_i = forced_win_i - forced_lose_i
results.append({
'player': i,
'forcedWinEV': forced_win_i,
'forcedLoseEV': forced_lose_i,
'difference': diff_i
})
return results
# Infinite Squid Game Implementation
def infinite_squid_game_expected_counts_partial(distribution):
"""
For the "infinite squid + cumulative score, stop when only 1 player has 0" game,
given the current distribution (list/tuple, representing each player's current count),
returns a list representing the expected number of squids each player will have at the end of the game.
Rule summary:
- If the current number of players with 0 squids = z:
* z=1 => Game stops immediately, no more squids distributed => final value = current holdings
* z=0 => "Exactly 1 player with 0" can never occur, infinite loop => return inf
* z>=2 => Expected to distribute n*(H_z -1) more times (H_z=1+1/2+...+1/z),
Expected number of times each player is selected = (H_z-1),
Therefore final = distribution[i] + (H_z-1).
"""
n = len(distribution)
zero_count = sum(1 for x in distribution if x == 0)
# Already satisfied or exceeded "end/cannot end" condition
if zero_count == 1:
# Game ends immediately => maintain current state
return list(distribution)
if zero_count == 0:
# "Exactly 1 player with 0" can never occur => game never ends => infinite score
return [math.inf]*n
# zero_count >= 2 => continue distributing squids until z=1
H_z = sum(1.0 / k for k in range(1, zero_count+1)) # Harmonic number H_z
increment = (H_z - 1.0)
return [x + increment for x in distribution]
# Finite Squid Game Implementation
def get_bitmask(n):
"""
Returns a list [0,1,2,...,(1<<n)-1], purely for iteration reference
"""
return list(range(1 << n))
def count_ones(x):
"""Count how many 1s are in the binary representation of x"""
return bin(x).count("1")
@lru_cache(None)
def dp_ev(n, bitmask_u, leftover):
"""
Returns a length-n tuple (prob0, prob1, ..., prob_{n-1}),
representing the "final probability" of each player getting a squid in state (U=bitmask_u, leftover).
- n: total number of players
- bitmask_u: which players haven't received a squid yet (binary mask)
- leftover: remaining squids that can be distributed
"""
# 1) Terminal condition
num_u = count_ones(bitmask_u) # Number of players who haven't received a squid yet
if num_u <= 1 or leftover == 0:
# Game ends immediately
# For players who already have a squid, their probability=1
# For those who don't, probability=0
ret = []
for i in range(n):
# If i is in bitmask_u => probability=0, otherwise =1
if (bitmask_u & (1 << i)) != 0:
ret.append(0.0)
else:
ret.append(1.0)
return tuple(ret)
# 2) Not terminated => randomly select 1 person from U to give a squid
ret_accum = [0.0]*n
# Randomly select w from U with equal probability
for w in range(n):
bit_w = (1 << w)
if (bitmask_u & bit_w) != 0:
# w is in U
# Next state: after w gets a squid, remove w from bitmask
next_u = bitmask_u ^ bit_w # Set to 0
sub_ev = dp_ev(n, next_u, leftover - 1)
for i in range(n):
ret_accum[i] += sub_ev[i] / num_u
# 3) Return accumulated result
return tuple(ret_accum)
def finite_squid_game_probabilities(n, r):
"""
Returns a length-n probability vector: prob[i] = probability of player i getting a squid
Based on dp_ev().
- n: number of players
- r: number of squids to distribute
"""
# Initial U = (1<<n)-1, meaning no one has a squid yet
bitmask_u = (1 << n) - 1
return dp_ev(n, bitmask_u, r)
def _compute_final_payout_finite_tuple(distribution):
"""
根据玩家鱿鱼分布元组,为有限游戏计算终局收益。
规则:输家(0个鱿鱼)支付所有赢家(1个鱿鱼)的鱿鱼价值总和。
"""
n = len(distribution)
winners = [i for i, count in enumerate(distribution) if count == 1]
losers = [i for i, count in enumerate(distribution) if count == 0]
num_winners = len(winners)
num_losers = len(losers)
payoffs = [0.0] * n
if num_winners == 0 or num_losers == 0:
return tuple(payoffs)
sum_winner_values = float(num_winners) # 每个赢家价值为1
for i in losers:
payoffs[i] = -sum_winner_values
for i in winners:
payoffs[i] = 1.0 * num_losers
return tuple(payoffs)
def _is_terminal_finite(distribution, remaining):
"""
判断有限游戏是否结束。
结束条件:1. 没鱿鱼了。 2. 最多只剩一个玩家还没有鱿鱼。
"""
if remaining == 0:
return True
players_without_squid = sum(1 for count in distribution if count == 0)
if players_without_squid <= 1:
return True
return False
@lru_cache(None)
def get_expected_value_finite(distribution, remaining):
"""
(模仿 get_expected_value)
使用元组作为状态,计算有限游戏的期望收益 (EV) 向量。
"""
if _is_terminal_finite(distribution, remaining):
return _compute_final_payout_finite_tuple(distribution)
n = len(distribution)
accumulated_ev = [0.0] * n
# 找出所有还没有鱿鱼的合格候选人
eligible_players = [i for i, count in enumerate(distribution) if count == 0]
num_eligible = len(eligible_players)
if num_eligible == 0: # 理论上不应该发生,除非游戏已结束
return _compute_final_payout_finite_tuple(distribution)
# 遍历每一个合格的候选人,模拟他获得下一个鱿鱼
for winner_idx in eligible_players:
new_dist_list = list(distribution)
new_dist_list[winner_idx] += 1
# 递归调用
sub_ev = get_expected_value_finite(tuple(new_dist_list), remaining - 1)
for i in range(n):
accumulated_ev[i] += sub_ev[i]
# 对所有可能的结果求平均
for i in range(n):
accumulated_ev[i] /= num_eligible
return tuple(accumulated_ev)
def get_expected_value_finite_forced_win(i, distribution, remaining):
"""
(模仿 get_expected_value_forced_win)
假设下一个鱿鱼 100% 给玩家 i (前提是i合格)。
"""
if remaining <= 0 or distribution[i] == 1:
# 无法强制 (没鱿鱼了,或者i已经有鱿鱼),返回常规EV
return get_expected_value_finite(distribution, remaining)
# 强制给 i
new_dist_list = list(distribution)
new_dist_list[i] += 1
# 在新状态上进行完全随机的EV计算
return get_expected_value_finite(tuple(new_dist_list), remaining - 1)
def get_expected_value_finite_forced_lose(i, distribution, remaining,folded_players):
"""
(模仿 get_expected_value_forced_lose)
假设下一个鱿鱼 100% 不会给玩家 i。
"""
if remaining <= 0 or distribution[i] == 1:
# i 已经有鱿鱼,下一个本来就不会给他,等同于常规EV
return get_expected_value_finite(distribution, remaining)
n = len(distribution)
accumulated_ev = [0.0] * n
# 找出除了i之外所有合格的候选人
eligible_players = [p for p, count in enumerate(distribution) if (p != i and not folded_players[p])]
num_eligible = len(eligible_players)
if num_eligible == 0: # 如果除了i没有别人可选了,游戏结束
return _compute_final_payout_finite_tuple(distribution)
# 遍历每一个合格的候选人,模拟他获得下一个鱿鱼
for winner_idx in eligible_players:
if distribution[winner_idx] == 0:
new_dist_list = list(distribution)
new_dist_list[winner_idx] += 1
sub_ev = get_expected_value_finite(tuple(new_dist_list), remaining - 1)
else:
new_dist_list = list(distribution)
sub_ev = get_expected_value_finite(tuple(new_dist_list), remaining)
for p in range(n):
accumulated_ev[p] += sub_ev[p]
# 对所有可能的结果求平均
for p in range(n):
accumulated_ev[p] /= num_eligible
return tuple(accumulated_ev)
def compute_ev_win_lose_finite_tuple(distribution, remaining,folded_players):
"""
包装函数,为每个合格玩家计算 forced_win 和 forced_lose 的EV。
"""
n = len(distribution)
results = []
base_ev = get_expected_value_finite(distribution, remaining)
eligible_players = [i for i, count in enumerate(distribution) if count == 0]
for i in eligible_players:
win_ev_vec = get_expected_value_finite_forced_win(i, distribution, remaining)
lose_ev_vec = get_expected_value_finite_forced_lose(i, distribution, remaining,folded_players)
forced_win_i = win_ev_vec[i]
forced_lose_i = lose_ev_vec[i]
diff_i = forced_win_i - forced_lose_i
results.append({
'player': i,
'forcedWinEV': forced_win_i,
'forcedLoseEV': forced_lose_i,
'difference': diff_i
})
return base_ev, results