Kaushik Rajan commited on
Commit
06c8d18
·
1 Parent(s): e786a81

Phase 2.1: Set up project structure with src/, models/, data/, app/ directories and requirements.txt

Browse files
app/app.py CHANGED
@@ -1,255 +1,30 @@
1
  """
2
- SPIRAL Interactive Reasoning Game Simulator - Main Gradio App
3
 
4
- A practical tool demonstrating how self-play training on zero-sum games
5
- can improve AI reasoning capabilities.
6
  """
7
 
8
  import gradio as gr
9
- import yaml
10
  import os
11
  import sys
12
 
13
- # Add the src directory to the path for imports
14
  sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))
15
 
16
- from typing import Tuple, Dict, Any, List, Optional
17
- import logging
18
 
19
- # Configure logging
20
- logging.basicConfig(level=logging.INFO)
21
- logger = logging.getLogger(__name__)
22
-
23
- class SpiralApp:
24
- """Main application class for the SPIRAL reasoning simulator."""
25
-
26
- def __init__(self, config_path: str = "../config.yaml"):
27
- """Initialize the SPIRAL app with configuration."""
28
- self.config = self._load_config(config_path)
29
- self.setup_logging()
30
-
31
- # Initialize components (will be implemented in Phase 2)
32
- self.game_interface = None
33
- self.reasoning_interface = None
34
- self.transfer_interface = None
35
-
36
- logger.info("SPIRAL App initialized successfully")
37
-
38
- def _load_config(self, config_path: str) -> Dict[str, Any]:
39
- """Load configuration from YAML file."""
40
- try:
41
- with open(config_path, 'r') as f:
42
- config = yaml.safe_load(f)
43
- return config
44
- except FileNotFoundError:
45
- logger.warning(f"Config file not found: {config_path}. Using defaults.")
46
- return self._get_default_config()
47
 
48
- def _get_default_config(self) -> Dict[str, Any]:
49
- """Get default configuration."""
50
- return {
51
- 'interface': {
52
- 'title': 'SPIRAL: Interactive Reasoning Game Simulator',
53
- 'description': 'Play games against AI and explore reasoning capabilities',
54
- 'theme': 'default'
55
- },
56
- 'games': {
57
- 'kuhn_poker': {'name': 'Kuhn Poker'},
58
- 'tictactoe': {'name': 'TicTacToe'}
59
- }
60
- }
61
-
62
- def setup_logging(self):
63
- """Set up logging configuration."""
64
- log_config = self.config.get('logging', {})
65
- level = getattr(logging, log_config.get('level', 'INFO'))
66
- logging.getLogger().setLevel(level)
67
-
68
- def play_game(self, game_type: str, user_move: str, game_state: str = "") -> Tuple[str, str, str]:
69
- """
70
- Handle game play interaction.
71
-
72
- Args:
73
- game_type: Type of game (kuhn_poker, tictactoe)
74
- user_move: User's move input
75
- game_state: Current game state
76
-
77
- Returns:
78
- Tuple of (updated_game_state, ai_response, reasoning_trace)
79
- """
80
- # Placeholder implementation - will be completed in Phase 2
81
- if not user_move:
82
- return game_state, "Please enter a move!", ""
83
-
84
- # Simulate AI response
85
- ai_response = f"AI responds to your move: {user_move}"
86
- reasoning_trace = f"AI thinking: Analyzing move '{user_move}' in {game_type}..."
87
- updated_state = f"{game_state}\nUser: {user_move}\nAI: {ai_response}"
88
-
89
- return updated_state, ai_response, reasoning_trace
90
-
91
- def test_reasoning(self, prompt: str, task_type: str = "math") -> Tuple[str, str]:
92
- """
93
- Test AI reasoning on non-game tasks.
94
-
95
- Args:
96
- prompt: User's reasoning prompt
97
- task_type: Type of reasoning task
98
-
99
- Returns:
100
- Tuple of (response, reasoning_trace)
101
- """
102
- # Placeholder implementation - will be completed in Phase 2
103
- if not prompt:
104
- return "Please enter a reasoning prompt!", ""
105
-
106
- response = f"AI response to: {prompt}"
107
- reasoning_trace = f"Step-by-step reasoning for '{prompt}'..."
108
-
109
- return response, reasoning_trace
110
-
111
- def create_interface(self) -> gr.Blocks:
112
- """Create the main Gradio interface."""
113
- title = self.config['interface']['title']
114
- description = self.config['interface']['description']
115
-
116
- with gr.Blocks(title=title, theme=self.config['interface']['theme']) as demo:
117
- gr.Markdown(f"# {title}")
118
- gr.Markdown(description)
119
-
120
- with gr.Tabs():
121
- # Game Play Tab
122
- with gr.TabItem("🎮 Game Play"):
123
- gr.Markdown("### Play zero-sum games against AI")
124
-
125
- with gr.Row():
126
- with gr.Column():
127
- game_selector = gr.Dropdown(
128
- choices=["kuhn_poker", "tictactoe"],
129
- value="kuhn_poker",
130
- label="Select Game"
131
- )
132
- user_move = gr.Textbox(
133
- label="Your Move",
134
- placeholder="Enter your move..."
135
- )
136
- play_button = gr.Button("Play Move", variant="primary")
137
-
138
- with gr.Column():
139
- game_state = gr.Textbox(
140
- label="Game State",
141
- lines=10,
142
- interactive=False
143
- )
144
- ai_response = gr.Textbox(
145
- label="AI Response",
146
- lines=3,
147
- interactive=False
148
- )
149
-
150
- reasoning_trace = gr.Textbox(
151
- label="AI Reasoning Trace",
152
- lines=5,
153
- interactive=False
154
- )
155
-
156
- play_button.click(
157
- fn=self.play_game,
158
- inputs=[game_selector, user_move, game_state],
159
- outputs=[game_state, ai_response, reasoning_trace]
160
- )
161
-
162
- # Reasoning Test Tab
163
- with gr.TabItem("🧠 Reasoning Test"):
164
- gr.Markdown("### Test AI reasoning on math and logic problems")
165
-
166
- with gr.Row():
167
- with gr.Column():
168
- task_type = gr.Dropdown(
169
- choices=["math", "logic", "strategic"],
170
- value="math",
171
- label="Task Type"
172
- )
173
- reasoning_prompt = gr.Textbox(
174
- label="Reasoning Prompt",
175
- placeholder="Enter a math problem or logic puzzle...",
176
- lines=3
177
- )
178
- test_button = gr.Button("Test Reasoning", variant="primary")
179
-
180
- with gr.Column():
181
- reasoning_response = gr.Textbox(
182
- label="AI Response",
183
- lines=8,
184
- interactive=False
185
- )
186
- reasoning_steps = gr.Textbox(
187
- label="Step-by-Step Reasoning",
188
- lines=8,
189
- interactive=False
190
- )
191
-
192
- test_button.click(
193
- fn=self.test_reasoning,
194
- inputs=[reasoning_prompt, task_type],
195
- outputs=[reasoning_response, reasoning_steps]
196
- )
197
-
198
- # About Tab
199
- with gr.TabItem("ℹ️ About"):
200
- gr.Markdown("""
201
- ### About SPIRAL
202
-
203
- This tool demonstrates the SPIRAL methodology: "Self-Play on Zero-Sum Games
204
- Incentivizes Reasoning via Multi-Agent Multi-Turn Reinforcement Learning."
205
-
206
- **Key Features:**
207
- - **Game Play**: Interactive games with AI opponents
208
- - **Reasoning Traces**: Transparent AI decision-making
209
- - **Transfer Learning**: Test reasoning on non-game tasks
210
- - **Educational**: Learn about AI reasoning capabilities
211
-
212
- **How it works:**
213
- 1. AI agents are trained via self-play on zero-sum games
214
- 2. Role-conditioned advantage estimation improves learning
215
- 3. Reasoning skills transfer to mathematical and logical tasks
216
- 4. Interactive interface shows the AI's thinking process
217
-
218
- **Games Available:**
219
- - **Kuhn Poker**: Simple poker variant with betting
220
- - **TicTacToe**: Classic strategy game
221
-
222
- **Technical Details:**
223
- - Base Model: Qwen-4B from Hugging Face
224
- - Training: PPO with self-play
225
- - Interface: Gradio web app
226
- """)
227
-
228
- return demo
229
-
230
- def launch(self, **kwargs):
231
- """Launch the Gradio app."""
232
- demo = self.create_interface()
233
-
234
- # Get launch configuration
235
- gradio_config = self.config.get('interface', {}).get('gradio', {})
236
 
237
- launch_kwargs = {
238
- 'server_name': gradio_config.get('server_name', '0.0.0.0'),
239
- 'server_port': gradio_config.get('server_port', 7860),
240
- 'share': gradio_config.get('share', False),
241
- 'inbrowser': gradio_config.get('inbrowser', True),
242
- 'enable_queue': gradio_config.get('enable_queue', True),
243
- **kwargs
244
- }
245
 
246
- logger.info(f"Launching SPIRAL app with config: {launch_kwargs}")
247
- demo.launch(**launch_kwargs)
248
 
249
- def main():
250
- """Main entry point for the application."""
251
- app = SpiralApp()
252
- app.launch()
253
 
254
  if __name__ == "__main__":
255
- main()
 
 
1
  """
2
+ SPIRAL Interactive Reasoning Game Simulator
3
 
4
+ Main Gradio application for the SPIRAL demo on Hugging Face Spaces.
 
5
  """
6
 
7
  import gradio as gr
 
8
  import os
9
  import sys
10
 
11
+ # Add src to path for imports
12
  sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))
13
 
 
 
14
 
15
+ def create_interface():
16
+ """Create the main Gradio interface."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ with gr.Blocks(title="SPIRAL: Interactive Reasoning Game Simulator") as demo:
19
+ gr.Markdown("# 🎮 SPIRAL: Interactive Reasoning Game Simulator")
20
+ gr.Markdown("**Coming Soon**: Interactive games with AI reasoning traces!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ # Placeholder for now
23
+ gr.Markdown("This app is currently under development. Check back soon!")
 
 
 
 
 
 
24
 
25
+ return demo
 
26
 
 
 
 
 
27
 
28
  if __name__ == "__main__":
29
+ demo = create_interface()
30
+ demo.launch()
data/README.md CHANGED
@@ -1,45 +1,16 @@
1
- # SPIRAL Data Directory
2
 
3
- This directory contains datasets, benchmarks, and cached data for the SPIRAL Interactive Reasoning Game Simulator.
4
 
5
  ## Structure
6
 
7
- ```
8
- data/
9
- ├── cache/ # Cached model outputs and processed data
10
- ├── datasets/ # Game datasets and training data
11
- ├── benchmarks/ # Evaluation benchmarks for transfer learning
12
- │ ├── gsm8k.json # GSM8K math problems
13
- │ └── logic_puzzles.json # Logic reasoning puzzles
14
- └── README.md # This file
15
- ```
16
-
17
- ## Datasets
18
-
19
- ### Game Datasets
20
- - **Kuhn Poker**: Training games and strategies
21
- - **TicTacToe**: Game states and optimal moves
22
-
23
- ### Benchmark Datasets
24
- - **GSM8K**: Grade School Math 8K dataset for mathematical reasoning
25
- - **Logic Puzzles**: Custom logic and reasoning problems
26
- - **Strategic Reasoning**: Game-theory based reasoning tasks
27
-
28
- ## Usage
29
-
30
- Datasets are automatically downloaded and cached when first used. To manually download:
31
-
32
- ```python
33
- from src.data_utils import download_datasets
34
- download_datasets()
35
- ```
36
 
37
  ## Data Sources
38
 
39
- - GSM8K: [Cobbe et al. 2021](https://arxiv.org/abs/2110.14168)
40
- - Logic Puzzles: Curated collection from various sources
41
- - Game Data: Generated through self-play training
42
-
43
- ## License
44
-
45
- Please refer to individual dataset licenses for usage rights.
 
1
+ # Data Directory
2
 
3
+ This directory contains datasets and game-related files for the SPIRAL project.
4
 
5
  ## Structure
6
 
7
+ - `games/` - Game datasets and rule definitions
8
+ - `benchmarks/` - Math and logic benchmarks for transfer testing (e.g., GSM8K)
9
+ - `training/` - Training data and logs
10
+ - `examples/` - Example game sessions and reasoning traces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  ## Data Sources
13
 
14
+ - Game implementations from GitHub repositories
15
+ - Math benchmarks like GSM8K for transfer evaluation
16
+ - Custom game datasets generated during training
 
 
 
 
requirements.txt CHANGED
@@ -1,44 +1,10 @@
1
- # Core ML and Deep Learning
2
  torch>=2.0.0
3
  transformers>=4.30.0
4
- accelerate>=0.20.0
5
- bitsandbytes>=0.41.0
6
-
7
- # Reinforcement Learning
8
- gymnasium>=0.28.0
9
  stable-baselines3>=2.0.0
10
- sb3-contrib>=2.0.0
11
-
12
- # Web Interface
13
  gradio>=4.0.0
14
-
15
- # Data Processing and Utilities
16
  numpy>=1.21.0
17
- pandas>=1.3.0
18
  matplotlib>=3.5.0
19
  seaborn>=0.11.0
20
- plotly>=5.0.0
21
-
22
- # Game Theory and Math
23
- scipy>=1.7.0
24
- networkx>=2.6.0
25
-
26
- # Model Management
27
- huggingface-hub>=0.16.0
28
- datasets>=2.10.0
29
-
30
- # Testing and Development
31
- pytest>=7.0.0
32
- pytest-cov>=4.0.0
33
- black>=22.0.0
34
- flake8>=5.0.0
35
-
36
- # Logging and Monitoring
37
- wandb>=0.15.0
38
- tensorboard>=2.10.0
39
-
40
- # Utilities
41
- tqdm>=4.64.0
42
- pyyaml>=6.0.0
43
- python-dotenv>=1.0.0
44
- requests>=2.28.0
 
 
1
  torch>=2.0.0
2
  transformers>=4.30.0
3
+ gymnasium>=0.29.0
 
 
 
 
4
  stable-baselines3>=2.0.0
 
 
 
5
  gradio>=4.0.0
 
 
6
  numpy>=1.21.0
 
7
  matplotlib>=3.5.0
8
  seaborn>=0.11.0
9
+ pandas>=1.3.0
10
+ tqdm>=4.62.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/__init__.py CHANGED
@@ -1,8 +1,8 @@
1
  """
2
- SPIRAL: Interactive Reasoning Game Simulator
3
 
4
- A practical tool demonstrating how self-play training on zero-sum games
5
- can improve AI reasoning capabilities.
6
  """
7
 
8
  __version__ = "0.1.0"
 
1
  """
2
+ SPIRAL: Self-Play on Zero-Sum Games Incentivizes Reasoning
3
 
4
+ This package implements the SPIRAL methodology for training AI agents
5
+ through self-play on zero-sum games to improve reasoning capabilities.
6
  """
7
 
8
  __version__ = "0.1.0"
src/games/__init__.py CHANGED
@@ -1,8 +1,8 @@
1
  """
2
- Game environments for SPIRAL reasoning simulator.
3
 
4
- This module contains implementations of zero-sum games used for self-play training,
5
- including Kuhn Poker, TicTacToe, and other strategic games.
6
  """
7
 
8
  from .kuhn_poker import KuhnPokerEnv
 
1
  """
2
+ Game environments for SPIRAL training.
3
 
4
+ This module contains implementations of zero-sum games used for
5
+ self-play training, including Kuhn Poker and TicTacToe.
6
  """
7
 
8
  from .kuhn_poker import KuhnPokerEnv
src/models/__init__.py CHANGED
@@ -1,8 +1,8 @@
1
  """
2
- Model implementations for SPIRAL reasoning simulator.
3
 
4
- This module contains the SPIRAL model architecture, role-conditioned advantage
5
- estimation, and other model components for self-play training.
6
  """
7
 
8
  from .spiral_model import SpiralModel
 
1
  """
2
+ SPIRAL model implementations.
3
 
4
+ This module contains the core SPIRAL model architecture and
5
+ role-conditioned advantage estimation (RAE) components.
6
  """
7
 
8
  from .spiral_model import SpiralModel
src/reasoning/__init__.py CHANGED
@@ -1,8 +1,8 @@
1
  """
2
- Reasoning components for SPIRAL reasoning simulator.
3
 
4
- This module contains reasoning trace generation, chain-of-thought processing,
5
- and transfer learning evaluation for testing reasoning capabilities.
6
  """
7
 
8
  from .trace_generator import TraceGenerator
 
1
  """
2
+ Reasoning trace generation and analysis.
3
 
4
+ This module handles the generation of step-by-step reasoning traces
5
+ during gameplay and transfer to non-game tasks.
6
  """
7
 
8
  from .trace_generator import TraceGenerator
src/training/__init__.py CHANGED
@@ -1,8 +1,8 @@
1
  """
2
- Training components for SPIRAL reasoning simulator.
3
 
4
- This module contains the self-play training logic, PPO implementation with
5
- role-conditioned advantage estimation, and training utilities.
6
  """
7
 
8
  from .self_play_trainer import SelfPlayTrainer
 
1
  """
2
+ Training components for SPIRAL.
3
 
4
+ This module implements the self-play training logic using PPO
5
+ with role-conditioned advantage estimation.
6
  """
7
 
8
  from .self_play_trainer import SelfPlayTrainer