Spaces:

kaushikvr06
/

reasoning-simulator

Build error

App Files Files Community

Kaushik Rajan commited on Jul 12

Commit

06c8d18

1 Parent(s): e786a81

Phase 2.1: Set up project structure with src/, models/, data/, app/ directories and requirements.txt

Browse files

Files changed (8) hide show

app/app.py +13 -238
data/README.md +9 -38
requirements.txt +3 -37
src/__init__.py +3 -3
src/games/__init__.py +3 -3
src/models/__init__.py +3 -3
src/reasoning/__init__.py +3 -3
src/training/__init__.py +3 -3

app/app.py CHANGED Viewed

@@ -1,255 +1,30 @@
 """
-SPIRAL Interactive Reasoning Game Simulator - Main Gradio App
-A practical tool demonstrating how self-play training on zero-sum games
-can improve AI reasoning capabilities.
 """
 import gradio as gr
-import yaml
 import os
 import sys
-# Add the src directory to the path for imports
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))
-from typing import Tuple, Dict, Any, List, Optional
-import logging
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-class SpiralApp:
-    """Main application class for the SPIRAL reasoning simulator."""
-    def __init__(self, config_path: str = "../config.yaml"):
-        """Initialize the SPIRAL app with configuration."""
-        self.config = self._load_config(config_path)
-        self.setup_logging()
-        # Initialize components (will be implemented in Phase 2)
-        self.game_interface = None
-        self.reasoning_interface = None
-        self.transfer_interface = None
-        logger.info("SPIRAL App initialized successfully")
-    def _load_config(self, config_path: str) -> Dict[str, Any]:
-        """Load configuration from YAML file."""
-        try:
-            with open(config_path, 'r') as f:
-                config = yaml.safe_load(f)
-            return config
-        except FileNotFoundError:
-            logger.warning(f"Config file not found: {config_path}. Using defaults.")
-            return self._get_default_config()
-    def _get_default_config(self) -> Dict[str, Any]:
-        """Get default configuration."""
-        return {
-            'interface': {
-                'title': 'SPIRAL: Interactive Reasoning Game Simulator',
-                'description': 'Play games against AI and explore reasoning capabilities',
-                'theme': 'default'
-            },
-            'games': {
-                'kuhn_poker': {'name': 'Kuhn Poker'},
-                'tictactoe': {'name': 'TicTacToe'}
-            }
-        }
-    def setup_logging(self):
-        """Set up logging configuration."""
-        log_config = self.config.get('logging', {})
-        level = getattr(logging, log_config.get('level', 'INFO'))
-        logging.getLogger().setLevel(level)
-    def play_game(self, game_type: str, user_move: str, game_state: str = "") -> Tuple[str, str, str]:
-        """
-        Handle game play interaction.
-        Args:
-            game_type: Type of game (kuhn_poker, tictactoe)
-            user_move: User's move input
-            game_state: Current game state
-        Returns:
-            Tuple of (updated_game_state, ai_response, reasoning_trace)
-        """
-        # Placeholder implementation - will be completed in Phase 2
-        if not user_move:
-            return game_state, "Please enter a move!", ""
-        # Simulate AI response
-        ai_response = f"AI responds to your move: {user_move}"
-        reasoning_trace = f"AI thinking: Analyzing move '{user_move}' in {game_type}..."
-        updated_state = f"{game_state}\nUser: {user_move}\nAI: {ai_response}"
-        return updated_state, ai_response, reasoning_trace
-    def test_reasoning(self, prompt: str, task_type: str = "math") -> Tuple[str, str]:
-        """
-        Test AI reasoning on non-game tasks.
-        Args:
-            prompt: User's reasoning prompt
-            task_type: Type of reasoning task
-        Returns:
-            Tuple of (response, reasoning_trace)
-        """
-        # Placeholder implementation - will be completed in Phase 2
-        if not prompt:
-            return "Please enter a reasoning prompt!", ""
-        response = f"AI response to: {prompt}"
-        reasoning_trace = f"Step-by-step reasoning for '{prompt}'..."
-        return response, reasoning_trace
-    def create_interface(self) -> gr.Blocks:
-        """Create the main Gradio interface."""
-        title = self.config['interface']['title']
-        description = self.config['interface']['description']
-        with gr.Blocks(title=title, theme=self.config['interface']['theme']) as demo:
-            gr.Markdown(f"# {title}")
-            gr.Markdown(description)
-            with gr.Tabs():
-                # Game Play Tab
-                with gr.TabItem("🎮 Game Play"):
-                    gr.Markdown("### Play zero-sum games against AI")
-                    with gr.Row():
-                        with gr.Column():
-                            game_selector = gr.Dropdown(
-                                choices=["kuhn_poker", "tictactoe"],
-                                value="kuhn_poker",
-                                label="Select Game"
-                            )
-                            user_move = gr.Textbox(
-                                label="Your Move",
-                                placeholder="Enter your move..."
-                            )
-                            play_button = gr.Button("Play Move", variant="primary")
-                        with gr.Column():
-                            game_state = gr.Textbox(
-                                label="Game State",
-                                lines=10,
-                                interactive=False
-                            )
-                            ai_response = gr.Textbox(
-                                label="AI Response",
-                                lines=3,
-                                interactive=False
-                            )
-                    reasoning_trace = gr.Textbox(
-                        label="AI Reasoning Trace",
-                        lines=5,
-                        interactive=False
-                    )
-                    play_button.click(
-                        fn=self.play_game,
-                        inputs=[game_selector, user_move, game_state],
-                        outputs=[game_state, ai_response, reasoning_trace]
-                    )
-                # Reasoning Test Tab
-                with gr.TabItem("🧠 Reasoning Test"):
-                    gr.Markdown("### Test AI reasoning on math and logic problems")
-                    with gr.Row():
-                        with gr.Column():
-                            task_type = gr.Dropdown(
-                                choices=["math", "logic", "strategic"],
-                                value="math",
-                                label="Task Type"
-                            )
-                            reasoning_prompt = gr.Textbox(
-                                label="Reasoning Prompt",
-                                placeholder="Enter a math problem or logic puzzle...",
-                                lines=3
-                            )
-                            test_button = gr.Button("Test Reasoning", variant="primary")
-                        with gr.Column():
-                            reasoning_response = gr.Textbox(
-                                label="AI Response",
-                                lines=8,
-                                interactive=False
-                            )
-                            reasoning_steps = gr.Textbox(
-                                label="Step-by-Step Reasoning",
-                                lines=8,
-                                interactive=False
-                            )
-                    test_button.click(
-                        fn=self.test_reasoning,
-                        inputs=[reasoning_prompt, task_type],
-                        outputs=[reasoning_response, reasoning_steps]
-                    )
-                # About Tab
-                with gr.TabItem("ℹ️ About"):
-                    gr.Markdown("""
-                    ### About SPIRAL
-                    This tool demonstrates the SPIRAL methodology: "Self-Play on Zero-Sum Games
-                    Incentivizes Reasoning via Multi-Agent Multi-Turn Reinforcement Learning."
-                    **Key Features:**
-                    - **Game Play**: Interactive games with AI opponents
-                    - **Reasoning Traces**: Transparent AI decision-making
-                    - **Transfer Learning**: Test reasoning on non-game tasks
-                    - **Educational**: Learn about AI reasoning capabilities
-                    **How it works:**
-                    1. AI agents are trained via self-play on zero-sum games
-                    2. Role-conditioned advantage estimation improves learning
-                    3. Reasoning skills transfer to mathematical and logical tasks
-                    4. Interactive interface shows the AI's thinking process
-                    **Games Available:**
-                    - **Kuhn Poker**: Simple poker variant with betting
-                    - **TicTacToe**: Classic strategy game
-                    **Technical Details:**
-                    - Base Model: Qwen-4B from Hugging Face
-                    - Training: PPO with self-play
-                    - Interface: Gradio web app
-                    """)
-        return demo
-    def launch(self, **kwargs):
-        """Launch the Gradio app."""
-        demo = self.create_interface()
-        # Get launch configuration
-        gradio_config = self.config.get('interface', {}).get('gradio', {})
-        launch_kwargs = {
-            'server_name': gradio_config.get('server_name', '0.0.0.0'),
-            'server_port': gradio_config.get('server_port', 7860),
-            'share': gradio_config.get('share', False),
-            'inbrowser': gradio_config.get('inbrowser', True),
-            'enable_queue': gradio_config.get('enable_queue', True),
-            **kwargs
-        }
-        logger.info(f"Launching SPIRAL app with config: {launch_kwargs}")
-        demo.launch(**launch_kwargs)
-def main():
-    """Main entry point for the application."""
-    app = SpiralApp()
-    app.launch()
 if __name__ == "__main__":
-    main()

 """
+SPIRAL Interactive Reasoning Game Simulator
+Main Gradio application for the SPIRAL demo on Hugging Face Spaces.
 """
 import gradio as gr
 import os
 import sys
+# Add src to path for imports
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))
+def create_interface():
+    """Create the main Gradio interface."""
+    with gr.Blocks(title="SPIRAL: Interactive Reasoning Game Simulator") as demo:
+        gr.Markdown("# 🎮 SPIRAL: Interactive Reasoning Game Simulator")
+        gr.Markdown("**Coming Soon**: Interactive games with AI reasoning traces!")
+        # Placeholder for now
+        gr.Markdown("This app is currently under development. Check back soon!")
+    return demo
 if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()

data/README.md CHANGED Viewed

@@ -1,45 +1,16 @@
-# SPIRAL Data Directory
-This directory contains datasets, benchmarks, and cached data for the SPIRAL Interactive Reasoning Game Simulator.
 ## Structure
-```
-data/
-├── cache/              # Cached model outputs and processed data
-├── datasets/           # Game datasets and training data
-├── benchmarks/         # Evaluation benchmarks for transfer learning
-│   ├── gsm8k.json     # GSM8K math problems
-│   └── logic_puzzles.json  # Logic reasoning puzzles
-└── README.md          # This file
-```
-## Datasets
-### Game Datasets
-- **Kuhn Poker**: Training games and strategies
-- **TicTacToe**: Game states and optimal moves
-### Benchmark Datasets
-- **GSM8K**: Grade School Math 8K dataset for mathematical reasoning
-- **Logic Puzzles**: Custom logic and reasoning problems
-- **Strategic Reasoning**: Game-theory based reasoning tasks
-## Usage
-Datasets are automatically downloaded and cached when first used. To manually download:
-```python
-from src.data_utils import download_datasets
-download_datasets()
-```
 ## Data Sources
-- GSM8K: [Cobbe et al. 2021](https://arxiv.org/abs/2110.14168)
-- Logic Puzzles: Curated collection from various sources
-- Game Data: Generated through self-play training
-## License
-Please refer to individual dataset licenses for usage rights.

+# Data Directory
+This directory contains datasets and game-related files for the SPIRAL project.
 ## Structure
+- `games/` - Game datasets and rule definitions
+- `benchmarks/` - Math and logic benchmarks for transfer testing (e.g., GSM8K)
+- `training/` - Training data and logs
+- `examples/` - Example game sessions and reasoning traces
 ## Data Sources
+- Game implementations from GitHub repositories
+- Math benchmarks like GSM8K for transfer evaluation
+- Custom game datasets generated during training

requirements.txt CHANGED Viewed

@@ -1,44 +1,10 @@
-# Core ML and Deep Learning
 torch>=2.0.0
 transformers>=4.30.0
-accelerate>=0.20.0
-bitsandbytes>=0.41.0
-# Reinforcement Learning
-gymnasium>=0.28.0
 stable-baselines3>=2.0.0
-sb3-contrib>=2.0.0
-# Web Interface
 gradio>=4.0.0
-# Data Processing and Utilities
 numpy>=1.21.0
-pandas>=1.3.0
 matplotlib>=3.5.0
 seaborn>=0.11.0
-plotly>=5.0.0
-# Game Theory and Math
-scipy>=1.7.0
-networkx>=2.6.0
-# Model Management
-huggingface-hub>=0.16.0
-datasets>=2.10.0
-# Testing and Development
-pytest>=7.0.0
-pytest-cov>=4.0.0
-black>=22.0.0
-flake8>=5.0.0
-# Logging and Monitoring
-wandb>=0.15.0
-tensorboard>=2.10.0
-# Utilities
-tqdm>=4.64.0
-pyyaml>=6.0.0
-python-dotenv>=1.0.0
-requests>=2.28.0

 torch>=2.0.0
 transformers>=4.30.0
+gymnasium>=0.29.0
 stable-baselines3>=2.0.0
 gradio>=4.0.0
 numpy>=1.21.0
 matplotlib>=3.5.0
 seaborn>=0.11.0
+pandas>=1.3.0
+tqdm>=4.62.0

src/__init__.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """
-SPIRAL: Interactive Reasoning Game Simulator
-A practical tool demonstrating how self-play training on zero-sum games
-can improve AI reasoning capabilities.
 """
 __version__ = "0.1.0"

 """
+SPIRAL: Self-Play on Zero-Sum Games Incentivizes Reasoning
+This package implements the SPIRAL methodology for training AI agents
+through self-play on zero-sum games to improve reasoning capabilities.
 """
 __version__ = "0.1.0"

src/games/__init__.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """
-Game environments for SPIRAL reasoning simulator.
-This module contains implementations of zero-sum games used for self-play training,
-including Kuhn Poker, TicTacToe, and other strategic games.
 """
 from .kuhn_poker import KuhnPokerEnv

 """
+Game environments for SPIRAL training.
+This module contains implementations of zero-sum games used for
+self-play training, including Kuhn Poker and TicTacToe.
 """
 from .kuhn_poker import KuhnPokerEnv

src/models/__init__.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """
-Model implementations for SPIRAL reasoning simulator.
-This module contains the SPIRAL model architecture, role-conditioned advantage
-estimation, and other model components for self-play training.
 """
 from .spiral_model import SpiralModel

 """
+SPIRAL model implementations.
+This module contains the core SPIRAL model architecture and
+role-conditioned advantage estimation (RAE) components.
 """
 from .spiral_model import SpiralModel

src/reasoning/__init__.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """
-Reasoning components for SPIRAL reasoning simulator.
-This module contains reasoning trace generation, chain-of-thought processing,
-and transfer learning evaluation for testing reasoning capabilities.
 """
 from .trace_generator import TraceGenerator

 """
+Reasoning trace generation and analysis.
+This module handles the generation of step-by-step reasoning traces
+during gameplay and transfer to non-game tasks.
 """
 from .trace_generator import TraceGenerator

src/training/__init__.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """
-Training components for SPIRAL reasoning simulator.
-This module contains the self-play training logic, PPO implementation with
-role-conditioned advantage estimation, and training utilities.
 """
 from .self_play_trainer import SelfPlayTrainer

 """
+Training components for SPIRAL.
+This module implements the self-play training logic using PPO
+with role-conditioned advantage estimation.
 """
 from .self_play_trainer import SelfPlayTrainer