File size: 7,934 Bytes
d7aca66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
957fb5d
 
 
 
cfe8c32
d7aca66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c97d315
 
d7aca66
 
 
 
 
 
 
c97d315
d7aca66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
801ea96
d7aca66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c97d315
d7aca66
 
c97d315
d7aca66
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
"""
Python implementation and Gradio app for "An Optimal Strategy for [Solitaire] Yahtzee" (James Glenn, 2006)
Source: https://gunpowder.cs.loyola.edu/~jglenn/research/optimal_yahtzee.pdf

At each turn, the optimal strategy depends only on:
- The available scoring categories (2**13 possibilities)
- The total score in the upper section, capped at 63 (2**6 possibilities)

For each of the 2**19 possible states, we compute the expected optimal score for the rest of the game.
This implementation does not include Yahtzee bonuses or jokers.

The Gradio app then allows you to input your current scorecard and dice roll to get the optimal action.
"""

import numpy as np
import gradio as gr
from tqdm import tqdm

FOUR_OF_A_KIND_IS_40_POINTS = False  # common variant used in France

# Compute Rk and R5 (unique dice combinations for up to 5 dice / exactly 5 dice)
Rk = np.indices((6,) * 6).reshape(6, -1).T
Rk = Rk[Rk.sum(axis=1) <= 5]  # size 462
R5 = Rk[Rk.sum(axis=1) == 5]  # size 252

# Calculate probabilities (P) of each R5 outcome from each Rk roll using multinomial weights
fact = np.array([1, 1, 2, 6, 24, 120])
Δ = R5[None, :] - Rk[:, None]  # Δ[rk, r5, i]: number of die #i missing to go from Rk[rk] to R5[r5]
P  = np.where((Δ >= 0).all(axis=2), fact[Δ.sum(axis=2)] // np.prod(fact[Δ.clip(min=0)], axis=2), 0)
P = P / P.sum(axis=1, keepdims=True)
M = (P > 0).T  # Mask for valid transitions

# Calculate category scores for all R5 outcomes
R5_scores = np.zeros((len(R5), 13), dtype=int)
R5_scores[:, :6] = R5 * np.arange(1, 7)
upper_score = R5_scores[:, :6].sum(axis=1)
R5_scores[:, 6] = upper_score
m = R5.max(axis=1)
R5_scores[:, 7] = upper_score * (m >= 3)
R5_scores[:, 8] = (40 if FOUR_OF_A_KIND_IS_40_POINTS else upper_score) * (m >= 4)
R5_scores[:, 9] = 25 * np.array([set(r) == {0, 2, 3} for r in R5])
R5_scores[:, 10] = 30 * ((R5[:, 0:4] > 0).all(axis=1)| (R5[:, 1:5] > 0).all(axis=1)| (R5[:, 2:6] > 0).all(axis=1))
R5_scores[:, 11] = 40 * ((R5[:, 0:5] > 0).all(axis=1) | (R5[:, 1:6] > 0).all(axis=1))
R5_scores[:, 12] = 50 * (m == 5)

# Initialize state scores:
# - Bits 0-5: upper section score (capped at 63 for bonus)
# - Bits 6-11: upper categories (Aces, Twos, Threes, Fours, Fives, Sixes)
# - Bits 12-18: lower categories (Chance, Three-of-a-kind, Four-of-a-kind, Full House, Small Straight, Large Straight, Yahtzee)
UPPER_TOTAL_BITS = 2**6 - 1
UPPER_TOTAL_AND_CATEGORIES_BITS = 2**12 - 1
CATEGORIES_BITS = 2**19 - 2**6

states = np.arange(2**19, dtype=int)
scores = np.zeros(2**19, dtype=float)

def get_score(state):
    # Get children states
    available = np.array([1 - (state >> (6 + i)) & 1 for i in range(13)])
    upper_total_states = np.clip((state & UPPER_TOTAL_BITS) + R5_scores * (np.arange(13) < 6), 0, 63)
    categories_states = (state & CATEGORIES_BITS) + 2 ** np.arange(6, 19)
    children = available * (upper_total_states + categories_states)

    # Backpropagate scores from children to root of the widget (as per the paper description)
    s3 = available * (R5_scores + scores[children]) # Roll 3
    s2 = P @ s3.max(axis=1) # Roll 2
    s1 = P @ np.where(M, s2, 0).max(axis=1)  # Roll 1
    s0 = P[0] @ np.where(M, s1, 0).max(axis=1)  # Root state

    return s0, s1, s2, s3

# Identify reachable states (e.g., an upper total of 1 is impossible if only "Twos" was played)
reachable_states = np.zeros(2**12, dtype=bool)
U = np.indices((7,) * 6).reshape(6, -1).T  # 6 means unused category
reachable_states[np.clip((U % 6) @ np.arange(1, 7), 0, 63) + (U < 6) @ (2 ** np.arange(6, 12))] = True

# Compute scores sorted by the number of categories already played
# The score for the 64 states with all categories played is 0 except if the upper total is 63
order = np.argsort([bin(state)[:-6].count("1") for state in states])[::-1]
scores[-1] = 35
for idx in tqdm(order[63:]):
    if reachable_states[states[idx] & UPPER_TOTAL_AND_CATEGORIES_BITS]:
        scores[idx] = get_score(states[idx])[0]
print(f"Optimal score at the beginning of the game: {scores[0]:.2f}")  # 245.87 as per the paper (section 6)

# Gradio app
EMPTY = ""
score_options = {
    "Ones": [EMPTY, 0, 1, 2, 3, 4, 5],
    "Twos": [EMPTY, 0, 2, 4, 6, 8, 10],
    "Threes": [EMPTY, 0, 3, 6, 9, 12, 15],
    "Fours": [EMPTY, 0, 4, 8, 12, 16, 20],
    "Fives": [EMPTY, 0, 5, 10, 15, 20, 25],
    "Sixes": [EMPTY, 0, 6, 12, 18, 24, 30],
    "Chance": [EMPTY] + list(range(0, 31)),
    "Three of a kind": [EMPTY] + list(range(0, 31)),
    "Four of a kind": ([EMPTY, 0, 40] if FOUR_OF_A_KIND_IS_40_POINTS else ([EMPTY] + list(range(0, 31)))),
    "Full House": [EMPTY, 0, 25],
    "Small Straight": [EMPTY, 0, 30],
    "Large Straight": [EMPTY, 0, 40],
    "Yahtzee": [EMPTY, 0, 50],
}

categories = list(score_options.keys())
upper_categories, lower_categories = categories[:6], categories[6:]

def update_components(*inputs):
    # Parse inputs
    scorecard = {c: inputs[i] for i, c in enumerate(categories)}
    roll, dice = inputs[len(categories):]
    dice = np.array([int(d) for d in str(dice) if d in "123456"])

    # Compute totals
    scorecard_values = [0 if v == EMPTY else v for v in scorecard.values()]
    upper, lower = sum(scorecard_values[:6]), sum(scorecard_values[6:])
    bonus = 35 if upper > 62 else 0

    # Compute optimal action
    if (len(dice) == 5) and (EMPTY in scorecard.values()):
        state = np.clip(upper, 0, 63) + sum(2 ** (6 + i) for i, c in enumerate(categories) if scorecard[c] != EMPTY)
        s = get_score(state)[roll]
        r5 = R5_indices[tuple(np.bincount(dice, minlength=7)[1:])]

        if roll == 3:
            idx = np.argmax(s[r5])
            new_score = R5_scores[r5, idx]
            action = f"Play {categories[idx]} ({new_score} pts)\nExpected score: {upper + lower + new_score + s[r5, idx]:.2f}"
        else:
            rk = Rk[np.argmax(np.where(M, s, 0)[r5])]
            keep = sum([[i + 1] * rk[i] for i in range(6)], [])
            action = f"Keep {keep}"
    else:
        action = ""

    return upper, bonus, lower, lower + bonus + upper, action


with gr.Blocks() as app:

    gr.Markdown("""# 🎲 Optimal Yahtzee
This app recommends the optimal strategy for playing solitaire Yahtzee, based on [James Glenn's 2006 research](http://gunpowder.cs.loyola.edu/~jglenn/research/optimal_yahtzee.pdf).
### How to use:
- Enter your current scores in the upper and lower sections.
- Select the roll number (1, 2, or 3) and input your dice values (e.g. 61542).
- The app will display the optimal dice to keep (rolls 1 or 2) or the best scoring category to select (roll 3).
""" + FOUR_OF_A_KIND_IS_40_POINTS * "\n**Note:** In this variant, Four of a Kind is worth 40 points.")

    score_inputs = {}

    with gr.Row():
        with gr.Column():
            gr.Markdown("### Upper Section")
            for c in upper_categories:
                score_inputs[c] = gr.Dropdown(score_options[c], label=c, value=EMPTY)
            upper_score = gr.Number(label="Upper Total", interactive=False)
            bonus_score = gr.Number(label="Bonus (if >62)", interactive=False)

        with gr.Column():
            gr.Markdown("### Lower Section")
            for c in lower_categories:
                score_inputs[c] = gr.Dropdown(score_options[c], label=c, value=EMPTY)
            lower_score = gr.Number(label="Lower Total", interactive=False)
            total_score = gr.Number(label="Total", interactive=False)

        with gr.Column():
            gr.Markdown("### Current Roll")
            roll_input = gr.Dropdown([1, 2, 3], label="Roll number", value=1)
            dice_input = gr.Textbox(label="Dice values", lines=1)
            action_box = gr.Textbox(label="Optimal action", lines=2)

    inputs = list(score_inputs.values()) + [roll_input, dice_input]
    outputs = [upper_score, bonus_score, lower_score, total_score, action_box]

    for inp in inputs:
        inp.change(update_components, inputs=inputs, outputs=outputs)

app.launch()