Kaushik Rajan commited on
Commit
85310d8
ยท
1 Parent(s): c59d6c7

Add robust error handling and fallback interface for HF Spaces compatibility

Browse files
Files changed (1) hide show
  1. app.py +303 -260
app.py CHANGED
@@ -9,153 +9,43 @@ import numpy as np
9
  import random
10
  import os
11
  import sys
 
12
 
13
  # Add src to path for imports
14
  sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
15
 
16
- from games import TicTacToeEnv, KuhnPokerEnv
17
- from games.game_utils import get_available_games, get_game_info
 
 
 
 
 
 
 
18
 
19
 
20
- class GameInterface:
21
- """Interface for managing game interactions."""
 
22
 
23
- def __init__(self):
24
- self.tictactoe_env = None
25
- self.kuhn_env = None
26
- self.reset_games()
27
-
28
- def reset_games(self):
29
- """Reset both game environments."""
30
- self.tictactoe_env = TicTacToeEnv()
31
- self.kuhn_env = KuhnPokerEnv()
32
- self.tictactoe_env.reset()
33
- self.kuhn_env.reset()
34
-
35
- def play_tictactoe(self, position):
36
- """Play a TicTacToe move."""
37
- if self.tictactoe_env.game_over:
38
- return self.get_tictactoe_board(), "Game is over! Click 'New Game' to start again.", ""
39
-
40
  try:
41
- position = int(position)
42
- if position < 0 or position > 8:
43
- return self.get_tictactoe_board(), "Invalid position! Choose 0-8.", ""
44
 
45
- # Human move
46
- obs, reward, terminated, truncated, info = self.tictactoe_env.step(position)
47
-
48
- if terminated:
49
- winner = "You" if self.tictactoe_env.winner == 1 else "AI" if self.tictactoe_env.winner == -1 else "No one"
50
- return self.get_tictactoe_board(), f"Game Over! {winner} won!", f"Final reward: {reward}"
51
-
52
- # AI move (random for now)
53
- if not self.tictactoe_env.game_over:
54
- valid_actions = self.tictactoe_env._get_valid_actions()
55
- if valid_actions:
56
- ai_action = random.choice(valid_actions)
57
- obs, reward, terminated, truncated, info = self.tictactoe_env.step(ai_action)
58
-
59
- if terminated:
60
- winner = "You" if self.tictactoe_env.winner == 1 else "AI" if self.tictactoe_env.winner == -1 else "No one"
61
- return self.get_tictactoe_board(), f"Game Over! {winner} won!", f"AI played position {ai_action}. Final reward: {reward}"
62
- else:
63
- return self.get_tictactoe_board(), f"AI played position {ai_action}. Your turn!", f"AI reasoning: Chose position {ai_action} randomly"
64
 
65
- return self.get_tictactoe_board(), "Your turn!", ""
 
66
 
67
- except ValueError:
68
- return self.get_tictactoe_board(), "Please enter a valid number (0-8).", ""
69
- except Exception as e:
70
- return self.get_tictactoe_board(), f"Error: {str(e)}", ""
71
 
72
- def reset_tictactoe(self):
73
- """Reset TicTacToe game."""
74
- self.tictactoe_env.reset()
75
- return self.get_tictactoe_board(), "New game started! You are X. Choose a position (0-8).", ""
76
-
77
- def get_tictactoe_board(self):
78
- """Get current TicTacToe board as string."""
79
- board = self.tictactoe_env.board
80
- display = ""
81
- for row in range(3):
82
- for col in range(3):
83
- cell = board[row, col]
84
- if cell == 1:
85
- display += " X "
86
- elif cell == -1:
87
- display += " O "
88
- else:
89
- display += f" {row*3 + col} "
90
- if col < 2:
91
- display += "|"
92
- display += "\n"
93
- if row < 2:
94
- display += "-----------\n"
95
- return display
96
-
97
- def play_kuhn_poker(self, action_name):
98
- """Play a Kuhn Poker move."""
99
- if self.kuhn_env.game_over:
100
- return self.get_kuhn_poker_state(), "Game is over! Click 'New Game' to start again.", ""
101
-
102
- try:
103
- # Map action name to action number
104
- action_map = {"Check/Call": 0, "Bet": 1, "Fold": 2}
105
- if action_name not in action_map:
106
- return self.get_kuhn_poker_state(), "Invalid action!", ""
107
-
108
- action = action_map[action_name]
109
-
110
- # Human move
111
- obs, reward, terminated, truncated, info = self.kuhn_env.step(action)
112
-
113
- if terminated:
114
- winner = "You" if self.kuhn_env.winner == 1 else "AI"
115
- return self.get_kuhn_poker_state(), f"Game Over! {winner} won! Pot: {self.kuhn_env.pot}", f"Your final reward: {reward}"
116
-
117
- # AI move (random for now)
118
- if not self.kuhn_env.game_over:
119
- valid_actions = self.kuhn_env._get_valid_actions()
120
- ai_action = random.choice(valid_actions)
121
- ai_action_name = ["Check/Call", "Bet", "Fold"][ai_action]
122
-
123
- obs, reward, terminated, truncated, info = self.kuhn_env.step(ai_action)
124
-
125
- if terminated:
126
- winner = "You" if self.kuhn_env.winner == 1 else "AI"
127
- return self.get_kuhn_poker_state(), f"AI chose {ai_action_name}. Game Over! {winner} won! Pot: {self.kuhn_env.pot}", f"AI reasoning: Chose {ai_action_name} randomly. Your final reward: {reward}"
128
- else:
129
- return self.get_kuhn_poker_state(), f"AI chose {ai_action_name}. Your turn!", f"AI reasoning: Chose {ai_action_name} randomly"
130
-
131
- return self.get_kuhn_poker_state(), "Your turn!", ""
132
-
133
- except Exception as e:
134
- return self.get_kuhn_poker_state(), f"Error: {str(e)}", ""
135
-
136
- def reset_kuhn_poker(self):
137
- """Reset Kuhn Poker game."""
138
- self.kuhn_env.reset()
139
- return self.get_kuhn_poker_state(), "New game started! You are Player 1. Choose your action.", f"Your card: {['J', 'Q', 'K'][self.kuhn_env.player1_card]}"
140
-
141
- def get_kuhn_poker_state(self):
142
- """Get current Kuhn Poker state as string."""
143
- state = f"๐Ÿƒ Your Card: {['J', 'Q', 'K'][self.kuhn_env.player1_card]}\n"
144
- state += f"๐Ÿ’ฐ Pot: {self.kuhn_env.pot}\n"
145
- state += f"๐ŸŽฏ Current Player: {self.kuhn_env.current_player}\n"
146
- state += f"๐Ÿ”„ Betting Round: {self.kuhn_env.betting_round}\n"
147
-
148
- if self.kuhn_env.actions_history:
149
- state += "\n๐Ÿ“‹ Actions:\n"
150
- for player, action in self.kuhn_env.actions_history:
151
- action_name = ["Check/Call", "Bet", "Fold"][action]
152
- state += f" Player {player}: {action_name}\n"
153
-
154
- return state
155
-
156
-
157
- # Create game interface
158
- game_interface = GameInterface()
159
 
160
 
161
  def create_interface():
@@ -163,149 +53,302 @@ def create_interface():
163
 
164
  with gr.Blocks(title="SPIRAL: Interactive Reasoning Game Simulator", theme=gr.themes.Soft()) as demo:
165
  gr.Markdown("# ๐ŸŽฎ SPIRAL: Interactive Reasoning Game Simulator")
166
- gr.Markdown("**Demo Version** - Experience zero-sum games with AI! Full reasoning capabilities coming soon.")
167
 
168
- with gr.Tabs():
169
- # TicTacToe Tab
170
- with gr.TabItem("๐ŸŽฏ TicTacToe"):
171
- gr.Markdown("### Play TicTacToe against AI")
172
- gr.Markdown("You are **X** and go first. Enter a position (0-8) to make your move.")
 
 
 
 
173
 
174
- with gr.Row():
175
- with gr.Column(scale=2):
176
- ttt_board = gr.Textbox(
177
- label="Game Board",
178
- value=game_interface.get_tictactoe_board(),
179
- lines=6,
180
- interactive=False,
181
- elem_id="ttt-board"
182
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
- with gr.Column(scale=1):
185
- ttt_position = gr.Textbox(
186
- label="Your Move (0-8)",
187
- placeholder="Enter position number",
188
- lines=1
189
- )
190
 
191
- with gr.Row():
192
- ttt_play_btn = gr.Button("Play Move", variant="primary")
193
- ttt_reset_btn = gr.Button("New Game", variant="secondary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
- ttt_message = gr.Textbox(
196
- label="Game Status",
197
- value="Choose a position (0-8) to start!",
198
- lines=2,
199
- interactive=False
200
- )
201
 
202
- ttt_reasoning = gr.Textbox(
203
- label="AI Reasoning",
204
- value="AI will show its reasoning here...",
205
- lines=2,
206
- interactive=False
207
- )
 
 
 
 
 
 
 
 
208
 
209
- ttt_play_btn.click(
210
- fn=game_interface.play_tictactoe,
211
- inputs=[ttt_position],
212
- outputs=[ttt_board, ttt_message, ttt_reasoning]
213
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
- ttt_reset_btn.click(
216
- fn=game_interface.reset_tictactoe,
217
- outputs=[ttt_board, ttt_message, ttt_reasoning]
218
- )
219
-
220
- # Kuhn Poker Tab
221
- with gr.TabItem("๐Ÿƒ Kuhn Poker"):
222
- gr.Markdown("### Play Kuhn Poker against AI")
223
- gr.Markdown("Simple poker with 3 cards (J, Q, K). You are Player 1.")
224
 
225
- with gr.Row():
226
- with gr.Column(scale=2):
227
- kuhn_state = gr.Textbox(
228
- label="Game State",
229
- value=game_interface.get_kuhn_poker_state(),
230
- lines=8,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  interactive=False
232
  )
233
 
234
- with gr.Column(scale=1):
235
- kuhn_action = gr.Dropdown(
236
- label="Your Action",
237
- choices=["Check/Call", "Bet", "Fold"],
238
- value="Check/Call"
239
  )
240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  with gr.Row():
242
- kuhn_play_btn = gr.Button("Play Action", variant="primary")
243
- kuhn_reset_btn = gr.Button("New Game", variant="secondary")
244
-
245
- kuhn_message = gr.Textbox(
246
- label="Game Status",
247
- value="Choose your action!",
248
- lines=2,
249
- interactive=False
250
- )
251
-
252
- kuhn_reasoning = gr.Textbox(
253
- label="AI Reasoning",
254
- value="AI will show its reasoning here...",
255
- lines=2,
256
- interactive=False
257
- )
258
-
259
- kuhn_play_btn.click(
260
- fn=game_interface.play_kuhn_poker,
261
- inputs=[kuhn_action],
262
- outputs=[kuhn_state, kuhn_message, kuhn_reasoning]
263
- )
264
-
265
- kuhn_reset_btn.click(
266
- fn=game_interface.reset_kuhn_poker,
267
- outputs=[kuhn_state, kuhn_message, kuhn_reasoning]
268
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
270
- # About Tab
271
- with gr.TabItem("โ„น๏ธ About"):
272
- gr.Markdown("""
273
- ### About SPIRAL
274
-
275
- This is a **demo version** of the SPIRAL methodology: *"Self-Play on Zero-Sum Games Incentivizes Reasoning via Multi-Agent Multi-Turn Reinforcement Learning."*
276
-
277
- **Current Features:**
278
- - ๐ŸŽฏ **TicTacToe**: Play against a random AI opponent
279
- - ๐Ÿƒ **Kuhn Poker**: Experience simplified poker gameplay
280
- - ๐ŸŽฎ **Interactive Games**: Real-time game state updates
281
 
282
- **Coming Soon:**
283
- - ๐Ÿง  **SPIRAL-trained AI**: Opponents trained via self-play
284
- - ๐Ÿ“Š **Reasoning Traces**: See step-by-step AI decision-making
285
- - ๐Ÿ”ฌ **Transfer Learning**: Test AI reasoning on math problems
286
- - ๐Ÿ“ˆ **Performance Metrics**: Track AI improvement over time
287
-
288
- **Game Rules:**
289
-
290
- **TicTacToe:**
291
- - 3x3 grid, get 3 in a row to win
292
- - You are X, AI is O
293
- - Numbers 0-8 represent board positions
294
-
295
- **Kuhn Poker:**
296
- - 3 cards: Jack (lowest), Queen, King (highest)
297
- - Each player gets 1 card, antes 1 chip
298
- - Actions: Check/Call, Bet (+1 chip), Fold
299
- - Higher card wins if both call/check
300
-
301
- **Technical Details:**
302
- - Built with Gymnasium environments
303
- - Gradio web interface
304
- - Ready for SPIRAL training integration
305
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
- gr.Markdown("---")
308
- gr.Markdown("๐Ÿšง **This is a development preview.** Full SPIRAL training and reasoning capabilities will be added in the next update!")
 
 
 
 
309
 
310
  return demo
311
 
 
9
  import random
10
  import os
11
  import sys
12
+ import traceback
13
 
14
  # Add src to path for imports
15
  sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
16
 
17
+ # Try to import our game modules, fall back to simple versions if they fail
18
+ try:
19
+ from games import TicTacToeEnv, KuhnPokerEnv
20
+ GAMES_AVAILABLE = True
21
+ print("โœ… Successfully imported game modules")
22
+ except ImportError as e:
23
+ print(f"โŒ Failed to import game modules: {e}")
24
+ print("๐Ÿ“‹ Traceback:", traceback.format_exc())
25
+ GAMES_AVAILABLE = False
26
 
27
 
28
+ def create_simple_tictactoe():
29
+ """Simple TicTacToe implementation as fallback."""
30
+ board = [' ' for _ in range(9)]
31
 
32
+ def play_move(position, board_state):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  try:
34
+ pos = int(position)
35
+ if pos < 0 or pos > 8:
36
+ return board_state, "Invalid position! Choose 0-8."
37
 
38
+ # Simple game logic
39
+ current_board = board_state.split('\n')[0:5] # Get board lines
40
+ move_made = f"You played position {pos}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ # For demo, just show the move
43
+ return f"Move {pos} played!\n{board_state}", move_made
44
 
45
+ except:
46
+ return board_state, "Please enter a valid number 0-8"
 
 
47
 
48
+ return play_move
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
 
51
  def create_interface():
 
53
 
54
  with gr.Blocks(title="SPIRAL: Interactive Reasoning Game Simulator", theme=gr.themes.Soft()) as demo:
55
  gr.Markdown("# ๐ŸŽฎ SPIRAL: Interactive Reasoning Game Simulator")
 
56
 
57
+ if GAMES_AVAILABLE:
58
+ gr.Markdown("**Demo Version** - Experience zero-sum games with AI! Full reasoning capabilities coming soon.")
59
+
60
+ # Initialize game environments
61
+ try:
62
+ tictactoe_env = TicTacToeEnv()
63
+ kuhn_env = KuhnPokerEnv()
64
+ tictactoe_env.reset()
65
+ kuhn_env.reset()
66
 
67
+ def get_tictactoe_board():
68
+ """Get current TicTacToe board as string."""
69
+ board = tictactoe_env.board
70
+ display = ""
71
+ for row in range(3):
72
+ for col in range(3):
73
+ cell = board[row, col]
74
+ if cell == 1:
75
+ display += " X "
76
+ elif cell == -1:
77
+ display += " O "
78
+ else:
79
+ display += f" {row*3 + col} "
80
+ if col < 2:
81
+ display += "|"
82
+ display += "\n"
83
+ if row < 2:
84
+ display += "-----------\n"
85
+ return display
86
+
87
+ def play_tictactoe(position):
88
+ """Play a TicTacToe move."""
89
+ if tictactoe_env.game_over:
90
+ return get_tictactoe_board(), "Game is over! Click 'New Game' to start again.", ""
91
+
92
+ try:
93
+ position = int(position)
94
+ if position < 0 or position > 8:
95
+ return get_tictactoe_board(), "Invalid position! Choose 0-8.", ""
96
 
97
+ # Human move
98
+ obs, reward, terminated, truncated, info = tictactoe_env.step(position)
 
 
 
 
99
 
100
+ if terminated:
101
+ winner = "You" if tictactoe_env.winner == 1 else "AI" if tictactoe_env.winner == -1 else "No one"
102
+ return get_tictactoe_board(), f"Game Over! {winner} won!", f"Final reward: {reward}"
103
+
104
+ # AI move (random for now)
105
+ if not tictactoe_env.game_over:
106
+ valid_actions = tictactoe_env._get_valid_actions()
107
+ if valid_actions:
108
+ ai_action = random.choice(valid_actions)
109
+ obs, reward, terminated, truncated, info = tictactoe_env.step(ai_action)
110
+
111
+ if terminated:
112
+ winner = "You" if tictactoe_env.winner == 1 else "AI" if tictactoe_env.winner == -1 else "No one"
113
+ return get_tictactoe_board(), f"Game Over! {winner} won!", f"AI played position {ai_action}. Final reward: {reward}"
114
+ else:
115
+ return get_tictactoe_board(), f"AI played position {ai_action}. Your turn!", f"AI reasoning: Chose position {ai_action} randomly"
116
+
117
+ return get_tictactoe_board(), "Your turn!", ""
118
+
119
+ except ValueError:
120
+ return get_tictactoe_board(), "Please enter a valid number (0-8).", ""
121
+ except Exception as e:
122
+ return get_tictactoe_board(), f"Error: {str(e)}", ""
123
 
124
+ def reset_tictactoe():
125
+ """Reset TicTacToe game."""
126
+ tictactoe_env.reset()
127
+ return get_tictactoe_board(), "New game started! You are X. Choose a position (0-8).", ""
 
 
128
 
129
+ def get_kuhn_poker_state():
130
+ """Get current Kuhn Poker state as string."""
131
+ state = f"๐Ÿƒ Your Card: {['J', 'Q', 'K'][kuhn_env.player1_card]}\n"
132
+ state += f"๐Ÿ’ฐ Pot: {kuhn_env.pot}\n"
133
+ state += f"๐ŸŽฏ Current Player: {kuhn_env.current_player}\n"
134
+ state += f"๐Ÿ”„ Betting Round: {kuhn_env.betting_round}\n"
135
+
136
+ if kuhn_env.actions_history:
137
+ state += "\n๐Ÿ“‹ Actions:\n"
138
+ for player, action in kuhn_env.actions_history:
139
+ action_name = ["Check/Call", "Bet", "Fold"][action]
140
+ state += f" Player {player}: {action_name}\n"
141
+
142
+ return state
143
 
144
+ def play_kuhn_poker(action_name):
145
+ """Play a Kuhn Poker move."""
146
+ if kuhn_env.game_over:
147
+ return get_kuhn_poker_state(), "Game is over! Click 'New Game' to start again.", ""
148
+
149
+ try:
150
+ # Map action name to action number
151
+ action_map = {"Check/Call": 0, "Bet": 1, "Fold": 2}
152
+ if action_name not in action_map:
153
+ return get_kuhn_poker_state(), "Invalid action!", ""
154
+
155
+ action = action_map[action_name]
156
+
157
+ # Human move
158
+ obs, reward, terminated, truncated, info = kuhn_env.step(action)
159
+
160
+ if terminated:
161
+ winner = "You" if kuhn_env.winner == 1 else "AI"
162
+ return get_kuhn_poker_state(), f"Game Over! {winner} won! Pot: {kuhn_env.pot}", f"Your final reward: {reward}"
163
+
164
+ # AI move (random for now)
165
+ if not kuhn_env.game_over:
166
+ valid_actions = kuhn_env._get_valid_actions()
167
+ ai_action = random.choice(valid_actions)
168
+ ai_action_name = ["Check/Call", "Bet", "Fold"][ai_action]
169
+
170
+ obs, reward, terminated, truncated, info = kuhn_env.step(ai_action)
171
+
172
+ if terminated:
173
+ winner = "You" if kuhn_env.winner == 1 else "AI"
174
+ return get_kuhn_poker_state(), f"AI chose {ai_action_name}. Game Over! {winner} won! Pot: {kuhn_env.pot}", f"AI reasoning: Chose {ai_action_name} randomly. Your final reward: {reward}"
175
+ else:
176
+ return get_kuhn_poker_state(), f"AI chose {ai_action_name}. Your turn!", f"AI reasoning: Chose {ai_action_name} randomly"
177
+
178
+ return get_kuhn_poker_state(), "Your turn!", ""
179
+
180
+ except Exception as e:
181
+ return get_kuhn_poker_state(), f"Error: {str(e)}", ""
182
 
183
+ def reset_kuhn_poker():
184
+ """Reset Kuhn Poker game."""
185
+ kuhn_env.reset()
186
+ return get_kuhn_poker_state(), "New game started! You are Player 1. Choose your action.", f"Your card: {['J', 'Q', 'K'][kuhn_env.player1_card]}"
 
 
 
 
 
187
 
188
+ with gr.Tabs():
189
+ # TicTacToe Tab
190
+ with gr.TabItem("๐ŸŽฏ TicTacToe"):
191
+ gr.Markdown("### Play TicTacToe against AI")
192
+ gr.Markdown("You are **X** and go first. Enter a position (0-8) to make your move.")
193
+
194
+ with gr.Row():
195
+ with gr.Column(scale=2):
196
+ ttt_board = gr.Textbox(
197
+ label="Game Board",
198
+ value=get_tictactoe_board(),
199
+ lines=6,
200
+ interactive=False,
201
+ elem_id="ttt-board"
202
+ )
203
+
204
+ with gr.Column(scale=1):
205
+ ttt_position = gr.Textbox(
206
+ label="Your Move (0-8)",
207
+ placeholder="Enter position number",
208
+ lines=1
209
+ )
210
+
211
+ with gr.Row():
212
+ ttt_play_btn = gr.Button("Play Move", variant="primary")
213
+ ttt_reset_btn = gr.Button("New Game", variant="secondary")
214
+
215
+ ttt_message = gr.Textbox(
216
+ label="Game Status",
217
+ value="Choose a position (0-8) to start!",
218
+ lines=2,
219
  interactive=False
220
  )
221
 
222
+ ttt_reasoning = gr.Textbox(
223
+ label="AI Reasoning",
224
+ value="AI will show its reasoning here...",
225
+ lines=2,
226
+ interactive=False
227
  )
228
 
229
+ ttt_play_btn.click(
230
+ fn=play_tictactoe,
231
+ inputs=[ttt_position],
232
+ outputs=[ttt_board, ttt_message, ttt_reasoning]
233
+ )
234
+
235
+ ttt_reset_btn.click(
236
+ fn=reset_tictactoe,
237
+ outputs=[ttt_board, ttt_message, ttt_reasoning]
238
+ )
239
+
240
+ # Kuhn Poker Tab
241
+ with gr.TabItem("๐Ÿƒ Kuhn Poker"):
242
+ gr.Markdown("### Play Kuhn Poker against AI")
243
+ gr.Markdown("Simple poker with 3 cards (J, Q, K). You are Player 1.")
244
+
245
  with gr.Row():
246
+ with gr.Column(scale=2):
247
+ kuhn_state = gr.Textbox(
248
+ label="Game State",
249
+ value=get_kuhn_poker_state(),
250
+ lines=8,
251
+ interactive=False
252
+ )
253
+
254
+ with gr.Column(scale=1):
255
+ kuhn_action = gr.Dropdown(
256
+ label="Your Action",
257
+ choices=["Check/Call", "Bet", "Fold"],
258
+ value="Check/Call"
259
+ )
260
+
261
+ with gr.Row():
262
+ kuhn_play_btn = gr.Button("Play Action", variant="primary")
263
+ kuhn_reset_btn = gr.Button("New Game", variant="secondary")
264
+
265
+ kuhn_message = gr.Textbox(
266
+ label="Game Status",
267
+ value="Choose your action!",
268
+ lines=2,
269
+ interactive=False
270
+ )
271
+
272
+ kuhn_reasoning = gr.Textbox(
273
+ label="AI Reasoning",
274
+ value="AI will show its reasoning here...",
275
+ lines=2,
276
+ interactive=False
277
+ )
278
+
279
+ kuhn_play_btn.click(
280
+ fn=play_kuhn_poker,
281
+ inputs=[kuhn_action],
282
+ outputs=[kuhn_state, kuhn_message, kuhn_reasoning]
283
+ )
284
+
285
+ kuhn_reset_btn.click(
286
+ fn=reset_kuhn_poker,
287
+ outputs=[kuhn_state, kuhn_message, kuhn_reasoning]
288
+ )
289
 
290
+ except Exception as e:
291
+ gr.Markdown(f"โš ๏ธ **Error initializing games:** {str(e)}")
292
+ gr.Markdown("Please check the logs for more details.")
 
 
 
 
 
 
 
 
293
 
294
+ else:
295
+ # Fallback interface when games don't load
296
+ gr.Markdown("โš ๏ธ **Game modules could not be loaded.** Showing basic interface.")
297
+ gr.Markdown("This usually happens when dependencies are still installing on HF Spaces.")
298
+
299
+ # Simple demo interface
300
+ with gr.Row():
301
+ simple_input = gr.Textbox(label="Test Input", placeholder="Enter something...")
302
+ simple_output = gr.Textbox(label="Output", interactive=False)
303
+
304
+ def simple_echo(text):
305
+ return f"Echo: {text} (Game modules will be available once dependencies install)"
306
+
307
+ simple_input.submit(fn=simple_echo, inputs=[simple_input], outputs=[simple_output])
308
+
309
+ # About Tab (always available)
310
+ with gr.TabItem("โ„น๏ธ About"):
311
+ gr.Markdown("""
312
+ ### About SPIRAL
313
+
314
+ This is a **demo version** of the SPIRAL methodology: *"Self-Play on Zero-Sum Games Incentivizes Reasoning via Multi-Agent Multi-Turn Reinforcement Learning."*
315
+
316
+ **Current Features:**
317
+ - ๐ŸŽฏ **TicTacToe**: Play against a random AI opponent
318
+ - ๐Ÿƒ **Kuhn Poker**: Experience simplified poker gameplay
319
+ - ๐ŸŽฎ **Interactive Games**: Real-time game state updates
320
+
321
+ **Coming Soon:**
322
+ - ๐Ÿง  **SPIRAL-trained AI**: Opponents trained via self-play
323
+ - ๐Ÿ“Š **Reasoning Traces**: See step-by-step AI decision-making
324
+ - ๐Ÿ”ฌ **Transfer Learning**: Test AI reasoning on math problems
325
+ - ๐Ÿ“ˆ **Performance Metrics**: Track AI improvement over time
326
+
327
+ **Game Rules:**
328
+
329
+ **TicTacToe:**
330
+ - 3x3 grid, get 3 in a row to win
331
+ - You are X, AI is O
332
+ - Numbers 0-8 represent board positions
333
+
334
+ **Kuhn Poker:**
335
+ - 3 cards: Jack (lowest), Queen, King (highest)
336
+ - Each player gets 1 card, antes 1 chip
337
+ - Actions: Check/Call, Bet (+1 chip), Fold
338
+ - Higher card wins if both call/check
339
+
340
+ **Technical Details:**
341
+ - Built with Gymnasium environments
342
+ - Gradio web interface
343
+ - Ready for SPIRAL training integration
344
+ """)
345
 
346
+ if GAMES_AVAILABLE:
347
+ gr.Markdown("---")
348
+ gr.Markdown("๐Ÿšง **This is a development preview.** Full SPIRAL training and reasoning capabilities will be added in the next update!")
349
+ else:
350
+ gr.Markdown("---")
351
+ gr.Markdown("๐Ÿ”„ **Dependencies are loading.** Refresh in a few minutes to see the full game interface!")
352
 
353
  return demo
354