Kaushik Rajan commited on
Commit
b3a9ec4
ยท
1 Parent(s): 2701231

Add interactive game demo interface - TicTacToe and Kuhn Poker playable in browser

Browse files
Files changed (2) hide show
  1. app.py +16 -5
  2. app/app.py +289 -4
app.py CHANGED
@@ -1,7 +1,18 @@
1
- import gradio as gr
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SPIRAL: Interactive Reasoning Game Simulator
3
 
4
+ Entry point for Hugging Face Spaces deployment.
5
+ """
6
 
7
+ import sys
8
+ import os
9
+
10
+ # Add the app directory to the path
11
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'app'))
12
+
13
+ # Import and launch the main app
14
+ from app import create_interface
15
+
16
+ if __name__ == "__main__":
17
+ demo = create_interface()
18
+ demo.launch()
app/app.py CHANGED
@@ -5,22 +5,307 @@ Main Gradio application for the SPIRAL demo on Hugging Face Spaces.
5
  """
6
 
7
  import gradio as gr
 
 
8
  import os
9
  import sys
10
 
11
  # Add src to path for imports
12
  sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  def create_interface():
16
  """Create the main Gradio interface."""
17
 
18
- with gr.Blocks(title="SPIRAL: Interactive Reasoning Game Simulator") as demo:
19
  gr.Markdown("# ๐ŸŽฎ SPIRAL: Interactive Reasoning Game Simulator")
20
- gr.Markdown("**Coming Soon**: Interactive games with AI reasoning traces!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- # Placeholder for now
23
- gr.Markdown("This app is currently under development. Check back soon!")
24
 
25
  return demo
26
 
 
5
  """
6
 
7
  import gradio as gr
8
+ import numpy as np
9
+ import random
10
  import os
11
  import sys
12
 
13
  # Add src to path for imports
14
  sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))
15
 
16
+ from games import TicTacToeEnv, KuhnPokerEnv
17
+ from games.game_utils import get_available_games, get_game_info
18
+
19
+
20
+ class GameInterface:
21
+ """Interface for managing game interactions."""
22
+
23
+ def __init__(self):
24
+ self.tictactoe_env = None
25
+ self.kuhn_env = None
26
+ self.reset_games()
27
+
28
+ def reset_games(self):
29
+ """Reset both game environments."""
30
+ self.tictactoe_env = TicTacToeEnv()
31
+ self.kuhn_env = KuhnPokerEnv()
32
+ self.tictactoe_env.reset()
33
+ self.kuhn_env.reset()
34
+
35
+ def play_tictactoe(self, position):
36
+ """Play a TicTacToe move."""
37
+ if self.tictactoe_env.game_over:
38
+ return self.get_tictactoe_board(), "Game is over! Click 'New Game' to start again.", ""
39
+
40
+ try:
41
+ position = int(position)
42
+ if position < 0 or position > 8:
43
+ return self.get_tictactoe_board(), "Invalid position! Choose 0-8.", ""
44
+
45
+ # Human move
46
+ obs, reward, terminated, truncated, info = self.tictactoe_env.step(position)
47
+
48
+ if terminated:
49
+ winner = "You" if self.tictactoe_env.winner == 1 else "AI" if self.tictactoe_env.winner == -1 else "No one"
50
+ return self.get_tictactoe_board(), f"Game Over! {winner} won!", f"Final reward: {reward}"
51
+
52
+ # AI move (random for now)
53
+ if not self.tictactoe_env.game_over:
54
+ valid_actions = self.tictactoe_env._get_valid_actions()
55
+ if valid_actions:
56
+ ai_action = random.choice(valid_actions)
57
+ obs, reward, terminated, truncated, info = self.tictactoe_env.step(ai_action)
58
+
59
+ if terminated:
60
+ winner = "You" if self.tictactoe_env.winner == 1 else "AI" if self.tictactoe_env.winner == -1 else "No one"
61
+ return self.get_tictactoe_board(), f"Game Over! {winner} won!", f"AI played position {ai_action}. Final reward: {reward}"
62
+ else:
63
+ return self.get_tictactoe_board(), f"AI played position {ai_action}. Your turn!", f"AI reasoning: Chose position {ai_action} randomly"
64
+
65
+ return self.get_tictactoe_board(), "Your turn!", ""
66
+
67
+ except ValueError:
68
+ return self.get_tictactoe_board(), "Please enter a valid number (0-8).", ""
69
+ except Exception as e:
70
+ return self.get_tictactoe_board(), f"Error: {str(e)}", ""
71
+
72
+ def reset_tictactoe(self):
73
+ """Reset TicTacToe game."""
74
+ self.tictactoe_env.reset()
75
+ return self.get_tictactoe_board(), "New game started! You are X. Choose a position (0-8).", ""
76
+
77
+ def get_tictactoe_board(self):
78
+ """Get current TicTacToe board as string."""
79
+ board = self.tictactoe_env.board
80
+ display = ""
81
+ for row in range(3):
82
+ for col in range(3):
83
+ cell = board[row, col]
84
+ if cell == 1:
85
+ display += " X "
86
+ elif cell == -1:
87
+ display += " O "
88
+ else:
89
+ display += f" {row*3 + col} "
90
+ if col < 2:
91
+ display += "|"
92
+ display += "\n"
93
+ if row < 2:
94
+ display += "-----------\n"
95
+ return display
96
+
97
+ def play_kuhn_poker(self, action_name):
98
+ """Play a Kuhn Poker move."""
99
+ if self.kuhn_env.game_over:
100
+ return self.get_kuhn_poker_state(), "Game is over! Click 'New Game' to start again.", ""
101
+
102
+ try:
103
+ # Map action name to action number
104
+ action_map = {"Check/Call": 0, "Bet": 1, "Fold": 2}
105
+ if action_name not in action_map:
106
+ return self.get_kuhn_poker_state(), "Invalid action!", ""
107
+
108
+ action = action_map[action_name]
109
+
110
+ # Human move
111
+ obs, reward, terminated, truncated, info = self.kuhn_env.step(action)
112
+
113
+ if terminated:
114
+ winner = "You" if self.kuhn_env.winner == 1 else "AI"
115
+ return self.get_kuhn_poker_state(), f"Game Over! {winner} won! Pot: {self.kuhn_env.pot}", f"Your final reward: {reward}"
116
+
117
+ # AI move (random for now)
118
+ if not self.kuhn_env.game_over:
119
+ valid_actions = self.kuhn_env._get_valid_actions()
120
+ ai_action = random.choice(valid_actions)
121
+ ai_action_name = ["Check/Call", "Bet", "Fold"][ai_action]
122
+
123
+ obs, reward, terminated, truncated, info = self.kuhn_env.step(ai_action)
124
+
125
+ if terminated:
126
+ winner = "You" if self.kuhn_env.winner == 1 else "AI"
127
+ return self.get_kuhn_poker_state(), f"AI chose {ai_action_name}. Game Over! {winner} won! Pot: {self.kuhn_env.pot}", f"AI reasoning: Chose {ai_action_name} randomly. Your final reward: {reward}"
128
+ else:
129
+ return self.get_kuhn_poker_state(), f"AI chose {ai_action_name}. Your turn!", f"AI reasoning: Chose {ai_action_name} randomly"
130
+
131
+ return self.get_kuhn_poker_state(), "Your turn!", ""
132
+
133
+ except Exception as e:
134
+ return self.get_kuhn_poker_state(), f"Error: {str(e)}", ""
135
+
136
+ def reset_kuhn_poker(self):
137
+ """Reset Kuhn Poker game."""
138
+ self.kuhn_env.reset()
139
+ return self.get_kuhn_poker_state(), "New game started! You are Player 1. Choose your action.", f"Your card: {['J', 'Q', 'K'][self.kuhn_env.player1_card]}"
140
+
141
+ def get_kuhn_poker_state(self):
142
+ """Get current Kuhn Poker state as string."""
143
+ state = f"๐Ÿƒ Your Card: {['J', 'Q', 'K'][self.kuhn_env.player1_card]}\n"
144
+ state += f"๐Ÿ’ฐ Pot: {self.kuhn_env.pot}\n"
145
+ state += f"๐ŸŽฏ Current Player: {self.kuhn_env.current_player}\n"
146
+ state += f"๐Ÿ”„ Betting Round: {self.kuhn_env.betting_round}\n"
147
+
148
+ if self.kuhn_env.actions_history:
149
+ state += "\n๐Ÿ“‹ Actions:\n"
150
+ for player, action in self.kuhn_env.actions_history:
151
+ action_name = ["Check/Call", "Bet", "Fold"][action]
152
+ state += f" Player {player}: {action_name}\n"
153
+
154
+ return state
155
+
156
+
157
+ # Create game interface
158
+ game_interface = GameInterface()
159
+
160
 
161
  def create_interface():
162
  """Create the main Gradio interface."""
163
 
164
+ with gr.Blocks(title="SPIRAL: Interactive Reasoning Game Simulator", theme=gr.themes.Soft()) as demo:
165
  gr.Markdown("# ๐ŸŽฎ SPIRAL: Interactive Reasoning Game Simulator")
166
+ gr.Markdown("**Demo Version** - Experience zero-sum games with AI! Full reasoning capabilities coming soon.")
167
+
168
+ with gr.Tabs():
169
+ # TicTacToe Tab
170
+ with gr.TabItem("๐ŸŽฏ TicTacToe"):
171
+ gr.Markdown("### Play TicTacToe against AI")
172
+ gr.Markdown("You are **X** and go first. Enter a position (0-8) to make your move.")
173
+
174
+ with gr.Row():
175
+ with gr.Column(scale=2):
176
+ ttt_board = gr.Textbox(
177
+ label="Game Board",
178
+ value=game_interface.get_tictactoe_board(),
179
+ lines=6,
180
+ interactive=False,
181
+ elem_id="ttt-board"
182
+ )
183
+
184
+ with gr.Column(scale=1):
185
+ ttt_position = gr.Textbox(
186
+ label="Your Move (0-8)",
187
+ placeholder="Enter position number",
188
+ lines=1
189
+ )
190
+
191
+ with gr.Row():
192
+ ttt_play_btn = gr.Button("Play Move", variant="primary")
193
+ ttt_reset_btn = gr.Button("New Game", variant="secondary")
194
+
195
+ ttt_message = gr.Textbox(
196
+ label="Game Status",
197
+ value="Choose a position (0-8) to start!",
198
+ lines=2,
199
+ interactive=False
200
+ )
201
+
202
+ ttt_reasoning = gr.Textbox(
203
+ label="AI Reasoning",
204
+ value="AI will show its reasoning here...",
205
+ lines=2,
206
+ interactive=False
207
+ )
208
+
209
+ ttt_play_btn.click(
210
+ fn=game_interface.play_tictactoe,
211
+ inputs=[ttt_position],
212
+ outputs=[ttt_board, ttt_message, ttt_reasoning]
213
+ )
214
+
215
+ ttt_reset_btn.click(
216
+ fn=game_interface.reset_tictactoe,
217
+ outputs=[ttt_board, ttt_message, ttt_reasoning]
218
+ )
219
+
220
+ # Kuhn Poker Tab
221
+ with gr.TabItem("๐Ÿƒ Kuhn Poker"):
222
+ gr.Markdown("### Play Kuhn Poker against AI")
223
+ gr.Markdown("Simple poker with 3 cards (J, Q, K). You are Player 1.")
224
+
225
+ with gr.Row():
226
+ with gr.Column(scale=2):
227
+ kuhn_state = gr.Textbox(
228
+ label="Game State",
229
+ value=game_interface.get_kuhn_poker_state(),
230
+ lines=8,
231
+ interactive=False
232
+ )
233
+
234
+ with gr.Column(scale=1):
235
+ kuhn_action = gr.Dropdown(
236
+ label="Your Action",
237
+ choices=["Check/Call", "Bet", "Fold"],
238
+ value="Check/Call"
239
+ )
240
+
241
+ with gr.Row():
242
+ kuhn_play_btn = gr.Button("Play Action", variant="primary")
243
+ kuhn_reset_btn = gr.Button("New Game", variant="secondary")
244
+
245
+ kuhn_message = gr.Textbox(
246
+ label="Game Status",
247
+ value="Choose your action!",
248
+ lines=2,
249
+ interactive=False
250
+ )
251
+
252
+ kuhn_reasoning = gr.Textbox(
253
+ label="AI Reasoning",
254
+ value="AI will show its reasoning here...",
255
+ lines=2,
256
+ interactive=False
257
+ )
258
+
259
+ kuhn_play_btn.click(
260
+ fn=game_interface.play_kuhn_poker,
261
+ inputs=[kuhn_action],
262
+ outputs=[kuhn_state, kuhn_message, kuhn_reasoning]
263
+ )
264
+
265
+ kuhn_reset_btn.click(
266
+ fn=game_interface.reset_kuhn_poker,
267
+ outputs=[kuhn_state, kuhn_message, kuhn_reasoning]
268
+ )
269
+
270
+ # About Tab
271
+ with gr.TabItem("โ„น๏ธ About"):
272
+ gr.Markdown("""
273
+ ### About SPIRAL
274
+
275
+ This is a **demo version** of the SPIRAL methodology: *"Self-Play on Zero-Sum Games Incentivizes Reasoning via Multi-Agent Multi-Turn Reinforcement Learning."*
276
+
277
+ **Current Features:**
278
+ - ๐ŸŽฏ **TicTacToe**: Play against a random AI opponent
279
+ - ๐Ÿƒ **Kuhn Poker**: Experience simplified poker gameplay
280
+ - ๐ŸŽฎ **Interactive Games**: Real-time game state updates
281
+
282
+ **Coming Soon:**
283
+ - ๐Ÿง  **SPIRAL-trained AI**: Opponents trained via self-play
284
+ - ๐Ÿ“Š **Reasoning Traces**: See step-by-step AI decision-making
285
+ - ๐Ÿ”ฌ **Transfer Learning**: Test AI reasoning on math problems
286
+ - ๐Ÿ“ˆ **Performance Metrics**: Track AI improvement over time
287
+
288
+ **Game Rules:**
289
+
290
+ **TicTacToe:**
291
+ - 3x3 grid, get 3 in a row to win
292
+ - You are X, AI is O
293
+ - Numbers 0-8 represent board positions
294
+
295
+ **Kuhn Poker:**
296
+ - 3 cards: Jack (lowest), Queen, King (highest)
297
+ - Each player gets 1 card, antes 1 chip
298
+ - Actions: Check/Call, Bet (+1 chip), Fold
299
+ - Higher card wins if both call/check
300
+
301
+ **Technical Details:**
302
+ - Built with Gymnasium environments
303
+ - Gradio web interface
304
+ - Ready for SPIRAL training integration
305
+ """)
306
 
307
+ gr.Markdown("---")
308
+ gr.Markdown("๐Ÿšง **This is a development preview.** Full SPIRAL training and reasoning capabilities will be added in the next update!")
309
 
310
  return demo
311