Spaces:
Build error
Build error
| """ | |
| SPIRAL Interactive Reasoning Game Simulator - Main Gradio App | |
| A practical tool demonstrating how self-play training on zero-sum games | |
| can improve AI reasoning capabilities. | |
| """ | |
| import gradio as gr | |
| import yaml | |
| import os | |
| import sys | |
| # Add the src directory to the path for imports | |
| sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src')) | |
| from typing import Tuple, Dict, Any, List, Optional | |
| import logging | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class SpiralApp: | |
| """Main application class for the SPIRAL reasoning simulator.""" | |
| def __init__(self, config_path: str = "../config.yaml"): | |
| """Initialize the SPIRAL app with configuration.""" | |
| self.config = self._load_config(config_path) | |
| self.setup_logging() | |
| # Initialize components (will be implemented in Phase 2) | |
| self.game_interface = None | |
| self.reasoning_interface = None | |
| self.transfer_interface = None | |
| logger.info("SPIRAL App initialized successfully") | |
| def _load_config(self, config_path: str) -> Dict[str, Any]: | |
| """Load configuration from YAML file.""" | |
| try: | |
| with open(config_path, 'r') as f: | |
| config = yaml.safe_load(f) | |
| return config | |
| except FileNotFoundError: | |
| logger.warning(f"Config file not found: {config_path}. Using defaults.") | |
| return self._get_default_config() | |
| def _get_default_config(self) -> Dict[str, Any]: | |
| """Get default configuration.""" | |
| return { | |
| 'interface': { | |
| 'title': 'SPIRAL: Interactive Reasoning Game Simulator', | |
| 'description': 'Play games against AI and explore reasoning capabilities', | |
| 'theme': 'default' | |
| }, | |
| 'games': { | |
| 'kuhn_poker': {'name': 'Kuhn Poker'}, | |
| 'tictactoe': {'name': 'TicTacToe'} | |
| } | |
| } | |
| def setup_logging(self): | |
| """Set up logging configuration.""" | |
| log_config = self.config.get('logging', {}) | |
| level = getattr(logging, log_config.get('level', 'INFO')) | |
| logging.getLogger().setLevel(level) | |
| def play_game(self, game_type: str, user_move: str, game_state: str = "") -> Tuple[str, str, str]: | |
| """ | |
| Handle game play interaction. | |
| Args: | |
| game_type: Type of game (kuhn_poker, tictactoe) | |
| user_move: User's move input | |
| game_state: Current game state | |
| Returns: | |
| Tuple of (updated_game_state, ai_response, reasoning_trace) | |
| """ | |
| # Placeholder implementation - will be completed in Phase 2 | |
| if not user_move: | |
| return game_state, "Please enter a move!", "" | |
| # Simulate AI response | |
| ai_response = f"AI responds to your move: {user_move}" | |
| reasoning_trace = f"AI thinking: Analyzing move '{user_move}' in {game_type}..." | |
| updated_state = f"{game_state}\nUser: {user_move}\nAI: {ai_response}" | |
| return updated_state, ai_response, reasoning_trace | |
| def test_reasoning(self, prompt: str, task_type: str = "math") -> Tuple[str, str]: | |
| """ | |
| Test AI reasoning on non-game tasks. | |
| Args: | |
| prompt: User's reasoning prompt | |
| task_type: Type of reasoning task | |
| Returns: | |
| Tuple of (response, reasoning_trace) | |
| """ | |
| # Placeholder implementation - will be completed in Phase 2 | |
| if not prompt: | |
| return "Please enter a reasoning prompt!", "" | |
| response = f"AI response to: {prompt}" | |
| reasoning_trace = f"Step-by-step reasoning for '{prompt}'..." | |
| return response, reasoning_trace | |
| def create_interface(self) -> gr.Blocks: | |
| """Create the main Gradio interface.""" | |
| title = self.config['interface']['title'] | |
| description = self.config['interface']['description'] | |
| with gr.Blocks(title=title, theme=self.config['interface']['theme']) as demo: | |
| gr.Markdown(f"# {title}") | |
| gr.Markdown(description) | |
| with gr.Tabs(): | |
| # Game Play Tab | |
| with gr.TabItem("🎮 Game Play"): | |
| gr.Markdown("### Play zero-sum games against AI") | |
| with gr.Row(): | |
| with gr.Column(): | |
| game_selector = gr.Dropdown( | |
| choices=["kuhn_poker", "tictactoe"], | |
| value="kuhn_poker", | |
| label="Select Game" | |
| ) | |
| user_move = gr.Textbox( | |
| label="Your Move", | |
| placeholder="Enter your move..." | |
| ) | |
| play_button = gr.Button("Play Move", variant="primary") | |
| with gr.Column(): | |
| game_state = gr.Textbox( | |
| label="Game State", | |
| lines=10, | |
| interactive=False | |
| ) | |
| ai_response = gr.Textbox( | |
| label="AI Response", | |
| lines=3, | |
| interactive=False | |
| ) | |
| reasoning_trace = gr.Textbox( | |
| label="AI Reasoning Trace", | |
| lines=5, | |
| interactive=False | |
| ) | |
| play_button.click( | |
| fn=self.play_game, | |
| inputs=[game_selector, user_move, game_state], | |
| outputs=[game_state, ai_response, reasoning_trace] | |
| ) | |
| # Reasoning Test Tab | |
| with gr.TabItem("🧠 Reasoning Test"): | |
| gr.Markdown("### Test AI reasoning on math and logic problems") | |
| with gr.Row(): | |
| with gr.Column(): | |
| task_type = gr.Dropdown( | |
| choices=["math", "logic", "strategic"], | |
| value="math", | |
| label="Task Type" | |
| ) | |
| reasoning_prompt = gr.Textbox( | |
| label="Reasoning Prompt", | |
| placeholder="Enter a math problem or logic puzzle...", | |
| lines=3 | |
| ) | |
| test_button = gr.Button("Test Reasoning", variant="primary") | |
| with gr.Column(): | |
| reasoning_response = gr.Textbox( | |
| label="AI Response", | |
| lines=8, | |
| interactive=False | |
| ) | |
| reasoning_steps = gr.Textbox( | |
| label="Step-by-Step Reasoning", | |
| lines=8, | |
| interactive=False | |
| ) | |
| test_button.click( | |
| fn=self.test_reasoning, | |
| inputs=[reasoning_prompt, task_type], | |
| outputs=[reasoning_response, reasoning_steps] | |
| ) | |
| # About Tab | |
| with gr.TabItem("ℹ️ About"): | |
| gr.Markdown(""" | |
| ### About SPIRAL | |
| This tool demonstrates the SPIRAL methodology: "Self-Play on Zero-Sum Games | |
| Incentivizes Reasoning via Multi-Agent Multi-Turn Reinforcement Learning." | |
| **Key Features:** | |
| - **Game Play**: Interactive games with AI opponents | |
| - **Reasoning Traces**: Transparent AI decision-making | |
| - **Transfer Learning**: Test reasoning on non-game tasks | |
| - **Educational**: Learn about AI reasoning capabilities | |
| **How it works:** | |
| 1. AI agents are trained via self-play on zero-sum games | |
| 2. Role-conditioned advantage estimation improves learning | |
| 3. Reasoning skills transfer to mathematical and logical tasks | |
| 4. Interactive interface shows the AI's thinking process | |
| **Games Available:** | |
| - **Kuhn Poker**: Simple poker variant with betting | |
| - **TicTacToe**: Classic strategy game | |
| **Technical Details:** | |
| - Base Model: Qwen-4B from Hugging Face | |
| - Training: PPO with self-play | |
| - Interface: Gradio web app | |
| """) | |
| return demo | |
| def launch(self, **kwargs): | |
| """Launch the Gradio app.""" | |
| demo = self.create_interface() | |
| # Get launch configuration | |
| gradio_config = self.config.get('interface', {}).get('gradio', {}) | |
| launch_kwargs = { | |
| 'server_name': gradio_config.get('server_name', '0.0.0.0'), | |
| 'server_port': gradio_config.get('server_port', 7860), | |
| 'share': gradio_config.get('share', False), | |
| 'inbrowser': gradio_config.get('inbrowser', True), | |
| 'enable_queue': gradio_config.get('enable_queue', True), | |
| **kwargs | |
| } | |
| logger.info(f"Launching SPIRAL app with config: {launch_kwargs}") | |
| demo.launch(**launch_kwargs) | |
| def main(): | |
| """Main entry point for the application.""" | |
| app = SpiralApp() | |
| app.launch() | |
| if __name__ == "__main__": | |
| main() |