Kaushik Rajan commited on
Commit
c59d6c7
·
1 Parent(s): b3a9ec4

Fix HF Spaces import issue - move app functionality to root app.py

Browse files
Files changed (2) hide show
  1. app.py +303 -6
  2. app/app.py +0 -315
app.py CHANGED
@@ -1,17 +1,314 @@
1
  """
2
  SPIRAL: Interactive Reasoning Game Simulator
3
 
4
- Entry point for Hugging Face Spaces deployment.
5
  """
6
 
7
- import sys
 
 
8
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # Add the app directory to the path
11
- sys.path.append(os.path.join(os.path.dirname(__file__), 'app'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- # Import and launch the main app
14
- from app import create_interface
15
 
16
  if __name__ == "__main__":
17
  demo = create_interface()
 
1
  """
2
  SPIRAL: Interactive Reasoning Game Simulator
3
 
4
+ Main Gradio application for the SPIRAL demo on Hugging Face Spaces.
5
  """
6
 
7
+ import gradio as gr
8
+ import numpy as np
9
+ import random
10
  import os
11
+ import sys
12
+
13
+ # Add src to path for imports
14
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
15
+
16
+ from games import TicTacToeEnv, KuhnPokerEnv
17
+ from games.game_utils import get_available_games, get_game_info
18
+
19
+
20
+ class GameInterface:
21
+ """Interface for managing game interactions."""
22
+
23
+ def __init__(self):
24
+ self.tictactoe_env = None
25
+ self.kuhn_env = None
26
+ self.reset_games()
27
+
28
+ def reset_games(self):
29
+ """Reset both game environments."""
30
+ self.tictactoe_env = TicTacToeEnv()
31
+ self.kuhn_env = KuhnPokerEnv()
32
+ self.tictactoe_env.reset()
33
+ self.kuhn_env.reset()
34
+
35
+ def play_tictactoe(self, position):
36
+ """Play a TicTacToe move."""
37
+ if self.tictactoe_env.game_over:
38
+ return self.get_tictactoe_board(), "Game is over! Click 'New Game' to start again.", ""
39
+
40
+ try:
41
+ position = int(position)
42
+ if position < 0 or position > 8:
43
+ return self.get_tictactoe_board(), "Invalid position! Choose 0-8.", ""
44
+
45
+ # Human move
46
+ obs, reward, terminated, truncated, info = self.tictactoe_env.step(position)
47
+
48
+ if terminated:
49
+ winner = "You" if self.tictactoe_env.winner == 1 else "AI" if self.tictactoe_env.winner == -1 else "No one"
50
+ return self.get_tictactoe_board(), f"Game Over! {winner} won!", f"Final reward: {reward}"
51
+
52
+ # AI move (random for now)
53
+ if not self.tictactoe_env.game_over:
54
+ valid_actions = self.tictactoe_env._get_valid_actions()
55
+ if valid_actions:
56
+ ai_action = random.choice(valid_actions)
57
+ obs, reward, terminated, truncated, info = self.tictactoe_env.step(ai_action)
58
+
59
+ if terminated:
60
+ winner = "You" if self.tictactoe_env.winner == 1 else "AI" if self.tictactoe_env.winner == -1 else "No one"
61
+ return self.get_tictactoe_board(), f"Game Over! {winner} won!", f"AI played position {ai_action}. Final reward: {reward}"
62
+ else:
63
+ return self.get_tictactoe_board(), f"AI played position {ai_action}. Your turn!", f"AI reasoning: Chose position {ai_action} randomly"
64
+
65
+ return self.get_tictactoe_board(), "Your turn!", ""
66
+
67
+ except ValueError:
68
+ return self.get_tictactoe_board(), "Please enter a valid number (0-8).", ""
69
+ except Exception as e:
70
+ return self.get_tictactoe_board(), f"Error: {str(e)}", ""
71
+
72
+ def reset_tictactoe(self):
73
+ """Reset TicTacToe game."""
74
+ self.tictactoe_env.reset()
75
+ return self.get_tictactoe_board(), "New game started! You are X. Choose a position (0-8).", ""
76
+
77
+ def get_tictactoe_board(self):
78
+ """Get current TicTacToe board as string."""
79
+ board = self.tictactoe_env.board
80
+ display = ""
81
+ for row in range(3):
82
+ for col in range(3):
83
+ cell = board[row, col]
84
+ if cell == 1:
85
+ display += " X "
86
+ elif cell == -1:
87
+ display += " O "
88
+ else:
89
+ display += f" {row*3 + col} "
90
+ if col < 2:
91
+ display += "|"
92
+ display += "\n"
93
+ if row < 2:
94
+ display += "-----------\n"
95
+ return display
96
+
97
+ def play_kuhn_poker(self, action_name):
98
+ """Play a Kuhn Poker move."""
99
+ if self.kuhn_env.game_over:
100
+ return self.get_kuhn_poker_state(), "Game is over! Click 'New Game' to start again.", ""
101
+
102
+ try:
103
+ # Map action name to action number
104
+ action_map = {"Check/Call": 0, "Bet": 1, "Fold": 2}
105
+ if action_name not in action_map:
106
+ return self.get_kuhn_poker_state(), "Invalid action!", ""
107
+
108
+ action = action_map[action_name]
109
+
110
+ # Human move
111
+ obs, reward, terminated, truncated, info = self.kuhn_env.step(action)
112
+
113
+ if terminated:
114
+ winner = "You" if self.kuhn_env.winner == 1 else "AI"
115
+ return self.get_kuhn_poker_state(), f"Game Over! {winner} won! Pot: {self.kuhn_env.pot}", f"Your final reward: {reward}"
116
+
117
+ # AI move (random for now)
118
+ if not self.kuhn_env.game_over:
119
+ valid_actions = self.kuhn_env._get_valid_actions()
120
+ ai_action = random.choice(valid_actions)
121
+ ai_action_name = ["Check/Call", "Bet", "Fold"][ai_action]
122
+
123
+ obs, reward, terminated, truncated, info = self.kuhn_env.step(ai_action)
124
+
125
+ if terminated:
126
+ winner = "You" if self.kuhn_env.winner == 1 else "AI"
127
+ return self.get_kuhn_poker_state(), f"AI chose {ai_action_name}. Game Over! {winner} won! Pot: {self.kuhn_env.pot}", f"AI reasoning: Chose {ai_action_name} randomly. Your final reward: {reward}"
128
+ else:
129
+ return self.get_kuhn_poker_state(), f"AI chose {ai_action_name}. Your turn!", f"AI reasoning: Chose {ai_action_name} randomly"
130
+
131
+ return self.get_kuhn_poker_state(), "Your turn!", ""
132
+
133
+ except Exception as e:
134
+ return self.get_kuhn_poker_state(), f"Error: {str(e)}", ""
135
+
136
+ def reset_kuhn_poker(self):
137
+ """Reset Kuhn Poker game."""
138
+ self.kuhn_env.reset()
139
+ return self.get_kuhn_poker_state(), "New game started! You are Player 1. Choose your action.", f"Your card: {['J', 'Q', 'K'][self.kuhn_env.player1_card]}"
140
+
141
+ def get_kuhn_poker_state(self):
142
+ """Get current Kuhn Poker state as string."""
143
+ state = f"🃏 Your Card: {['J', 'Q', 'K'][self.kuhn_env.player1_card]}\n"
144
+ state += f"💰 Pot: {self.kuhn_env.pot}\n"
145
+ state += f"🎯 Current Player: {self.kuhn_env.current_player}\n"
146
+ state += f"🔄 Betting Round: {self.kuhn_env.betting_round}\n"
147
+
148
+ if self.kuhn_env.actions_history:
149
+ state += "\n📋 Actions:\n"
150
+ for player, action in self.kuhn_env.actions_history:
151
+ action_name = ["Check/Call", "Bet", "Fold"][action]
152
+ state += f" Player {player}: {action_name}\n"
153
+
154
+ return state
155
+
156
+
157
+ # Create game interface
158
+ game_interface = GameInterface()
159
+
160
 
161
+ def create_interface():
162
+ """Create the main Gradio interface."""
163
+
164
+ with gr.Blocks(title="SPIRAL: Interactive Reasoning Game Simulator", theme=gr.themes.Soft()) as demo:
165
+ gr.Markdown("# 🎮 SPIRAL: Interactive Reasoning Game Simulator")
166
+ gr.Markdown("**Demo Version** - Experience zero-sum games with AI! Full reasoning capabilities coming soon.")
167
+
168
+ with gr.Tabs():
169
+ # TicTacToe Tab
170
+ with gr.TabItem("🎯 TicTacToe"):
171
+ gr.Markdown("### Play TicTacToe against AI")
172
+ gr.Markdown("You are **X** and go first. Enter a position (0-8) to make your move.")
173
+
174
+ with gr.Row():
175
+ with gr.Column(scale=2):
176
+ ttt_board = gr.Textbox(
177
+ label="Game Board",
178
+ value=game_interface.get_tictactoe_board(),
179
+ lines=6,
180
+ interactive=False,
181
+ elem_id="ttt-board"
182
+ )
183
+
184
+ with gr.Column(scale=1):
185
+ ttt_position = gr.Textbox(
186
+ label="Your Move (0-8)",
187
+ placeholder="Enter position number",
188
+ lines=1
189
+ )
190
+
191
+ with gr.Row():
192
+ ttt_play_btn = gr.Button("Play Move", variant="primary")
193
+ ttt_reset_btn = gr.Button("New Game", variant="secondary")
194
+
195
+ ttt_message = gr.Textbox(
196
+ label="Game Status",
197
+ value="Choose a position (0-8) to start!",
198
+ lines=2,
199
+ interactive=False
200
+ )
201
+
202
+ ttt_reasoning = gr.Textbox(
203
+ label="AI Reasoning",
204
+ value="AI will show its reasoning here...",
205
+ lines=2,
206
+ interactive=False
207
+ )
208
+
209
+ ttt_play_btn.click(
210
+ fn=game_interface.play_tictactoe,
211
+ inputs=[ttt_position],
212
+ outputs=[ttt_board, ttt_message, ttt_reasoning]
213
+ )
214
+
215
+ ttt_reset_btn.click(
216
+ fn=game_interface.reset_tictactoe,
217
+ outputs=[ttt_board, ttt_message, ttt_reasoning]
218
+ )
219
+
220
+ # Kuhn Poker Tab
221
+ with gr.TabItem("🃏 Kuhn Poker"):
222
+ gr.Markdown("### Play Kuhn Poker against AI")
223
+ gr.Markdown("Simple poker with 3 cards (J, Q, K). You are Player 1.")
224
+
225
+ with gr.Row():
226
+ with gr.Column(scale=2):
227
+ kuhn_state = gr.Textbox(
228
+ label="Game State",
229
+ value=game_interface.get_kuhn_poker_state(),
230
+ lines=8,
231
+ interactive=False
232
+ )
233
+
234
+ with gr.Column(scale=1):
235
+ kuhn_action = gr.Dropdown(
236
+ label="Your Action",
237
+ choices=["Check/Call", "Bet", "Fold"],
238
+ value="Check/Call"
239
+ )
240
+
241
+ with gr.Row():
242
+ kuhn_play_btn = gr.Button("Play Action", variant="primary")
243
+ kuhn_reset_btn = gr.Button("New Game", variant="secondary")
244
+
245
+ kuhn_message = gr.Textbox(
246
+ label="Game Status",
247
+ value="Choose your action!",
248
+ lines=2,
249
+ interactive=False
250
+ )
251
+
252
+ kuhn_reasoning = gr.Textbox(
253
+ label="AI Reasoning",
254
+ value="AI will show its reasoning here...",
255
+ lines=2,
256
+ interactive=False
257
+ )
258
+
259
+ kuhn_play_btn.click(
260
+ fn=game_interface.play_kuhn_poker,
261
+ inputs=[kuhn_action],
262
+ outputs=[kuhn_state, kuhn_message, kuhn_reasoning]
263
+ )
264
+
265
+ kuhn_reset_btn.click(
266
+ fn=game_interface.reset_kuhn_poker,
267
+ outputs=[kuhn_state, kuhn_message, kuhn_reasoning]
268
+ )
269
+
270
+ # About Tab
271
+ with gr.TabItem("ℹ️ About"):
272
+ gr.Markdown("""
273
+ ### About SPIRAL
274
+
275
+ This is a **demo version** of the SPIRAL methodology: *"Self-Play on Zero-Sum Games Incentivizes Reasoning via Multi-Agent Multi-Turn Reinforcement Learning."*
276
+
277
+ **Current Features:**
278
+ - 🎯 **TicTacToe**: Play against a random AI opponent
279
+ - 🃏 **Kuhn Poker**: Experience simplified poker gameplay
280
+ - 🎮 **Interactive Games**: Real-time game state updates
281
+
282
+ **Coming Soon:**
283
+ - 🧠 **SPIRAL-trained AI**: Opponents trained via self-play
284
+ - 📊 **Reasoning Traces**: See step-by-step AI decision-making
285
+ - 🔬 **Transfer Learning**: Test AI reasoning on math problems
286
+ - 📈 **Performance Metrics**: Track AI improvement over time
287
+
288
+ **Game Rules:**
289
+
290
+ **TicTacToe:**
291
+ - 3x3 grid, get 3 in a row to win
292
+ - You are X, AI is O
293
+ - Numbers 0-8 represent board positions
294
+
295
+ **Kuhn Poker:**
296
+ - 3 cards: Jack (lowest), Queen, King (highest)
297
+ - Each player gets 1 card, antes 1 chip
298
+ - Actions: Check/Call, Bet (+1 chip), Fold
299
+ - Higher card wins if both call/check
300
+
301
+ **Technical Details:**
302
+ - Built with Gymnasium environments
303
+ - Gradio web interface
304
+ - Ready for SPIRAL training integration
305
+ """)
306
+
307
+ gr.Markdown("---")
308
+ gr.Markdown("🚧 **This is a development preview.** Full SPIRAL training and reasoning capabilities will be added in the next update!")
309
+
310
+ return demo
311
 
 
 
312
 
313
  if __name__ == "__main__":
314
  demo = create_interface()
app/app.py DELETED
@@ -1,315 +0,0 @@
1
- """
2
- SPIRAL Interactive Reasoning Game Simulator
3
-
4
- Main Gradio application for the SPIRAL demo on Hugging Face Spaces.
5
- """
6
-
7
- import gradio as gr
8
- import numpy as np
9
- import random
10
- import os
11
- import sys
12
-
13
- # Add src to path for imports
14
- sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))
15
-
16
- from games import TicTacToeEnv, KuhnPokerEnv
17
- from games.game_utils import get_available_games, get_game_info
18
-
19
-
20
- class GameInterface:
21
- """Interface for managing game interactions."""
22
-
23
- def __init__(self):
24
- self.tictactoe_env = None
25
- self.kuhn_env = None
26
- self.reset_games()
27
-
28
- def reset_games(self):
29
- """Reset both game environments."""
30
- self.tictactoe_env = TicTacToeEnv()
31
- self.kuhn_env = KuhnPokerEnv()
32
- self.tictactoe_env.reset()
33
- self.kuhn_env.reset()
34
-
35
- def play_tictactoe(self, position):
36
- """Play a TicTacToe move."""
37
- if self.tictactoe_env.game_over:
38
- return self.get_tictactoe_board(), "Game is over! Click 'New Game' to start again.", ""
39
-
40
- try:
41
- position = int(position)
42
- if position < 0 or position > 8:
43
- return self.get_tictactoe_board(), "Invalid position! Choose 0-8.", ""
44
-
45
- # Human move
46
- obs, reward, terminated, truncated, info = self.tictactoe_env.step(position)
47
-
48
- if terminated:
49
- winner = "You" if self.tictactoe_env.winner == 1 else "AI" if self.tictactoe_env.winner == -1 else "No one"
50
- return self.get_tictactoe_board(), f"Game Over! {winner} won!", f"Final reward: {reward}"
51
-
52
- # AI move (random for now)
53
- if not self.tictactoe_env.game_over:
54
- valid_actions = self.tictactoe_env._get_valid_actions()
55
- if valid_actions:
56
- ai_action = random.choice(valid_actions)
57
- obs, reward, terminated, truncated, info = self.tictactoe_env.step(ai_action)
58
-
59
- if terminated:
60
- winner = "You" if self.tictactoe_env.winner == 1 else "AI" if self.tictactoe_env.winner == -1 else "No one"
61
- return self.get_tictactoe_board(), f"Game Over! {winner} won!", f"AI played position {ai_action}. Final reward: {reward}"
62
- else:
63
- return self.get_tictactoe_board(), f"AI played position {ai_action}. Your turn!", f"AI reasoning: Chose position {ai_action} randomly"
64
-
65
- return self.get_tictactoe_board(), "Your turn!", ""
66
-
67
- except ValueError:
68
- return self.get_tictactoe_board(), "Please enter a valid number (0-8).", ""
69
- except Exception as e:
70
- return self.get_tictactoe_board(), f"Error: {str(e)}", ""
71
-
72
- def reset_tictactoe(self):
73
- """Reset TicTacToe game."""
74
- self.tictactoe_env.reset()
75
- return self.get_tictactoe_board(), "New game started! You are X. Choose a position (0-8).", ""
76
-
77
- def get_tictactoe_board(self):
78
- """Get current TicTacToe board as string."""
79
- board = self.tictactoe_env.board
80
- display = ""
81
- for row in range(3):
82
- for col in range(3):
83
- cell = board[row, col]
84
- if cell == 1:
85
- display += " X "
86
- elif cell == -1:
87
- display += " O "
88
- else:
89
- display += f" {row*3 + col} "
90
- if col < 2:
91
- display += "|"
92
- display += "\n"
93
- if row < 2:
94
- display += "-----------\n"
95
- return display
96
-
97
- def play_kuhn_poker(self, action_name):
98
- """Play a Kuhn Poker move."""
99
- if self.kuhn_env.game_over:
100
- return self.get_kuhn_poker_state(), "Game is over! Click 'New Game' to start again.", ""
101
-
102
- try:
103
- # Map action name to action number
104
- action_map = {"Check/Call": 0, "Bet": 1, "Fold": 2}
105
- if action_name not in action_map:
106
- return self.get_kuhn_poker_state(), "Invalid action!", ""
107
-
108
- action = action_map[action_name]
109
-
110
- # Human move
111
- obs, reward, terminated, truncated, info = self.kuhn_env.step(action)
112
-
113
- if terminated:
114
- winner = "You" if self.kuhn_env.winner == 1 else "AI"
115
- return self.get_kuhn_poker_state(), f"Game Over! {winner} won! Pot: {self.kuhn_env.pot}", f"Your final reward: {reward}"
116
-
117
- # AI move (random for now)
118
- if not self.kuhn_env.game_over:
119
- valid_actions = self.kuhn_env._get_valid_actions()
120
- ai_action = random.choice(valid_actions)
121
- ai_action_name = ["Check/Call", "Bet", "Fold"][ai_action]
122
-
123
- obs, reward, terminated, truncated, info = self.kuhn_env.step(ai_action)
124
-
125
- if terminated:
126
- winner = "You" if self.kuhn_env.winner == 1 else "AI"
127
- return self.get_kuhn_poker_state(), f"AI chose {ai_action_name}. Game Over! {winner} won! Pot: {self.kuhn_env.pot}", f"AI reasoning: Chose {ai_action_name} randomly. Your final reward: {reward}"
128
- else:
129
- return self.get_kuhn_poker_state(), f"AI chose {ai_action_name}. Your turn!", f"AI reasoning: Chose {ai_action_name} randomly"
130
-
131
- return self.get_kuhn_poker_state(), "Your turn!", ""
132
-
133
- except Exception as e:
134
- return self.get_kuhn_poker_state(), f"Error: {str(e)}", ""
135
-
136
- def reset_kuhn_poker(self):
137
- """Reset Kuhn Poker game."""
138
- self.kuhn_env.reset()
139
- return self.get_kuhn_poker_state(), "New game started! You are Player 1. Choose your action.", f"Your card: {['J', 'Q', 'K'][self.kuhn_env.player1_card]}"
140
-
141
- def get_kuhn_poker_state(self):
142
- """Get current Kuhn Poker state as string."""
143
- state = f"🃏 Your Card: {['J', 'Q', 'K'][self.kuhn_env.player1_card]}\n"
144
- state += f"💰 Pot: {self.kuhn_env.pot}\n"
145
- state += f"🎯 Current Player: {self.kuhn_env.current_player}\n"
146
- state += f"🔄 Betting Round: {self.kuhn_env.betting_round}\n"
147
-
148
- if self.kuhn_env.actions_history:
149
- state += "\n📋 Actions:\n"
150
- for player, action in self.kuhn_env.actions_history:
151
- action_name = ["Check/Call", "Bet", "Fold"][action]
152
- state += f" Player {player}: {action_name}\n"
153
-
154
- return state
155
-
156
-
157
- # Create game interface
158
- game_interface = GameInterface()
159
-
160
-
161
- def create_interface():
162
- """Create the main Gradio interface."""
163
-
164
- with gr.Blocks(title="SPIRAL: Interactive Reasoning Game Simulator", theme=gr.themes.Soft()) as demo:
165
- gr.Markdown("# 🎮 SPIRAL: Interactive Reasoning Game Simulator")
166
- gr.Markdown("**Demo Version** - Experience zero-sum games with AI! Full reasoning capabilities coming soon.")
167
-
168
- with gr.Tabs():
169
- # TicTacToe Tab
170
- with gr.TabItem("🎯 TicTacToe"):
171
- gr.Markdown("### Play TicTacToe against AI")
172
- gr.Markdown("You are **X** and go first. Enter a position (0-8) to make your move.")
173
-
174
- with gr.Row():
175
- with gr.Column(scale=2):
176
- ttt_board = gr.Textbox(
177
- label="Game Board",
178
- value=game_interface.get_tictactoe_board(),
179
- lines=6,
180
- interactive=False,
181
- elem_id="ttt-board"
182
- )
183
-
184
- with gr.Column(scale=1):
185
- ttt_position = gr.Textbox(
186
- label="Your Move (0-8)",
187
- placeholder="Enter position number",
188
- lines=1
189
- )
190
-
191
- with gr.Row():
192
- ttt_play_btn = gr.Button("Play Move", variant="primary")
193
- ttt_reset_btn = gr.Button("New Game", variant="secondary")
194
-
195
- ttt_message = gr.Textbox(
196
- label="Game Status",
197
- value="Choose a position (0-8) to start!",
198
- lines=2,
199
- interactive=False
200
- )
201
-
202
- ttt_reasoning = gr.Textbox(
203
- label="AI Reasoning",
204
- value="AI will show its reasoning here...",
205
- lines=2,
206
- interactive=False
207
- )
208
-
209
- ttt_play_btn.click(
210
- fn=game_interface.play_tictactoe,
211
- inputs=[ttt_position],
212
- outputs=[ttt_board, ttt_message, ttt_reasoning]
213
- )
214
-
215
- ttt_reset_btn.click(
216
- fn=game_interface.reset_tictactoe,
217
- outputs=[ttt_board, ttt_message, ttt_reasoning]
218
- )
219
-
220
- # Kuhn Poker Tab
221
- with gr.TabItem("🃏 Kuhn Poker"):
222
- gr.Markdown("### Play Kuhn Poker against AI")
223
- gr.Markdown("Simple poker with 3 cards (J, Q, K). You are Player 1.")
224
-
225
- with gr.Row():
226
- with gr.Column(scale=2):
227
- kuhn_state = gr.Textbox(
228
- label="Game State",
229
- value=game_interface.get_kuhn_poker_state(),
230
- lines=8,
231
- interactive=False
232
- )
233
-
234
- with gr.Column(scale=1):
235
- kuhn_action = gr.Dropdown(
236
- label="Your Action",
237
- choices=["Check/Call", "Bet", "Fold"],
238
- value="Check/Call"
239
- )
240
-
241
- with gr.Row():
242
- kuhn_play_btn = gr.Button("Play Action", variant="primary")
243
- kuhn_reset_btn = gr.Button("New Game", variant="secondary")
244
-
245
- kuhn_message = gr.Textbox(
246
- label="Game Status",
247
- value="Choose your action!",
248
- lines=2,
249
- interactive=False
250
- )
251
-
252
- kuhn_reasoning = gr.Textbox(
253
- label="AI Reasoning",
254
- value="AI will show its reasoning here...",
255
- lines=2,
256
- interactive=False
257
- )
258
-
259
- kuhn_play_btn.click(
260
- fn=game_interface.play_kuhn_poker,
261
- inputs=[kuhn_action],
262
- outputs=[kuhn_state, kuhn_message, kuhn_reasoning]
263
- )
264
-
265
- kuhn_reset_btn.click(
266
- fn=game_interface.reset_kuhn_poker,
267
- outputs=[kuhn_state, kuhn_message, kuhn_reasoning]
268
- )
269
-
270
- # About Tab
271
- with gr.TabItem("ℹ️ About"):
272
- gr.Markdown("""
273
- ### About SPIRAL
274
-
275
- This is a **demo version** of the SPIRAL methodology: *"Self-Play on Zero-Sum Games Incentivizes Reasoning via Multi-Agent Multi-Turn Reinforcement Learning."*
276
-
277
- **Current Features:**
278
- - 🎯 **TicTacToe**: Play against a random AI opponent
279
- - 🃏 **Kuhn Poker**: Experience simplified poker gameplay
280
- - 🎮 **Interactive Games**: Real-time game state updates
281
-
282
- **Coming Soon:**
283
- - 🧠 **SPIRAL-trained AI**: Opponents trained via self-play
284
- - 📊 **Reasoning Traces**: See step-by-step AI decision-making
285
- - 🔬 **Transfer Learning**: Test AI reasoning on math problems
286
- - 📈 **Performance Metrics**: Track AI improvement over time
287
-
288
- **Game Rules:**
289
-
290
- **TicTacToe:**
291
- - 3x3 grid, get 3 in a row to win
292
- - You are X, AI is O
293
- - Numbers 0-8 represent board positions
294
-
295
- **Kuhn Poker:**
296
- - 3 cards: Jack (lowest), Queen, King (highest)
297
- - Each player gets 1 card, antes 1 chip
298
- - Actions: Check/Call, Bet (+1 chip), Fold
299
- - Higher card wins if both call/check
300
-
301
- **Technical Details:**
302
- - Built with Gymnasium environments
303
- - Gradio web interface
304
- - Ready for SPIRAL training integration
305
- """)
306
-
307
- gr.Markdown("---")
308
- gr.Markdown("🚧 **This is a development preview.** Full SPIRAL training and reasoning capabilities will be added in the next update!")
309
-
310
- return demo
311
-
312
-
313
- if __name__ == "__main__":
314
- demo = create_interface()
315
- demo.launch()