""" SPIRAL Interactive Reasoning Game Simulator - Main Gradio App A practical tool demonstrating how self-play training on zero-sum games can improve AI reasoning capabilities. """ import gradio as gr import yaml import os import sys # Add the src directory to the path for imports sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src')) from typing import Tuple, Dict, Any, List, Optional import logging # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class SpiralApp: """Main application class for the SPIRAL reasoning simulator.""" def __init__(self, config_path: str = "../config.yaml"): """Initialize the SPIRAL app with configuration.""" self.config = self._load_config(config_path) self.setup_logging() # Initialize components (will be implemented in Phase 2) self.game_interface = None self.reasoning_interface = None self.transfer_interface = None logger.info("SPIRAL App initialized successfully") def _load_config(self, config_path: str) -> Dict[str, Any]: """Load configuration from YAML file.""" try: with open(config_path, 'r') as f: config = yaml.safe_load(f) return config except FileNotFoundError: logger.warning(f"Config file not found: {config_path}. Using defaults.") return self._get_default_config() def _get_default_config(self) -> Dict[str, Any]: """Get default configuration.""" return { 'interface': { 'title': 'SPIRAL: Interactive Reasoning Game Simulator', 'description': 'Play games against AI and explore reasoning capabilities', 'theme': 'default' }, 'games': { 'kuhn_poker': {'name': 'Kuhn Poker'}, 'tictactoe': {'name': 'TicTacToe'} } } def setup_logging(self): """Set up logging configuration.""" log_config = self.config.get('logging', {}) level = getattr(logging, log_config.get('level', 'INFO')) logging.getLogger().setLevel(level) def play_game(self, game_type: str, user_move: str, game_state: str = "") -> Tuple[str, str, str]: """ Handle game play interaction. Args: game_type: Type of game (kuhn_poker, tictactoe) user_move: User's move input game_state: Current game state Returns: Tuple of (updated_game_state, ai_response, reasoning_trace) """ # Placeholder implementation - will be completed in Phase 2 if not user_move: return game_state, "Please enter a move!", "" # Simulate AI response ai_response = f"AI responds to your move: {user_move}" reasoning_trace = f"AI thinking: Analyzing move '{user_move}' in {game_type}..." updated_state = f"{game_state}\nUser: {user_move}\nAI: {ai_response}" return updated_state, ai_response, reasoning_trace def test_reasoning(self, prompt: str, task_type: str = "math") -> Tuple[str, str]: """ Test AI reasoning on non-game tasks. Args: prompt: User's reasoning prompt task_type: Type of reasoning task Returns: Tuple of (response, reasoning_trace) """ # Placeholder implementation - will be completed in Phase 2 if not prompt: return "Please enter a reasoning prompt!", "" response = f"AI response to: {prompt}" reasoning_trace = f"Step-by-step reasoning for '{prompt}'..." return response, reasoning_trace def create_interface(self) -> gr.Blocks: """Create the main Gradio interface.""" title = self.config['interface']['title'] description = self.config['interface']['description'] with gr.Blocks(title=title, theme=self.config['interface']['theme']) as demo: gr.Markdown(f"# {title}") gr.Markdown(description) with gr.Tabs(): # Game Play Tab with gr.TabItem("🎮 Game Play"): gr.Markdown("### Play zero-sum games against AI") with gr.Row(): with gr.Column(): game_selector = gr.Dropdown( choices=["kuhn_poker", "tictactoe"], value="kuhn_poker", label="Select Game" ) user_move = gr.Textbox( label="Your Move", placeholder="Enter your move..." ) play_button = gr.Button("Play Move", variant="primary") with gr.Column(): game_state = gr.Textbox( label="Game State", lines=10, interactive=False ) ai_response = gr.Textbox( label="AI Response", lines=3, interactive=False ) reasoning_trace = gr.Textbox( label="AI Reasoning Trace", lines=5, interactive=False ) play_button.click( fn=self.play_game, inputs=[game_selector, user_move, game_state], outputs=[game_state, ai_response, reasoning_trace] ) # Reasoning Test Tab with gr.TabItem("🧠 Reasoning Test"): gr.Markdown("### Test AI reasoning on math and logic problems") with gr.Row(): with gr.Column(): task_type = gr.Dropdown( choices=["math", "logic", "strategic"], value="math", label="Task Type" ) reasoning_prompt = gr.Textbox( label="Reasoning Prompt", placeholder="Enter a math problem or logic puzzle...", lines=3 ) test_button = gr.Button("Test Reasoning", variant="primary") with gr.Column(): reasoning_response = gr.Textbox( label="AI Response", lines=8, interactive=False ) reasoning_steps = gr.Textbox( label="Step-by-Step Reasoning", lines=8, interactive=False ) test_button.click( fn=self.test_reasoning, inputs=[reasoning_prompt, task_type], outputs=[reasoning_response, reasoning_steps] ) # About Tab with gr.TabItem("ℹ️ About"): gr.Markdown(""" ### About SPIRAL This tool demonstrates the SPIRAL methodology: "Self-Play on Zero-Sum Games Incentivizes Reasoning via Multi-Agent Multi-Turn Reinforcement Learning." **Key Features:** - **Game Play**: Interactive games with AI opponents - **Reasoning Traces**: Transparent AI decision-making - **Transfer Learning**: Test reasoning on non-game tasks - **Educational**: Learn about AI reasoning capabilities **How it works:** 1. AI agents are trained via self-play on zero-sum games 2. Role-conditioned advantage estimation improves learning 3. Reasoning skills transfer to mathematical and logical tasks 4. Interactive interface shows the AI's thinking process **Games Available:** - **Kuhn Poker**: Simple poker variant with betting - **TicTacToe**: Classic strategy game **Technical Details:** - Base Model: Qwen-4B from Hugging Face - Training: PPO with self-play - Interface: Gradio web app """) return demo def launch(self, **kwargs): """Launch the Gradio app.""" demo = self.create_interface() # Get launch configuration gradio_config = self.config.get('interface', {}).get('gradio', {}) launch_kwargs = { 'server_name': gradio_config.get('server_name', '0.0.0.0'), 'server_port': gradio_config.get('server_port', 7860), 'share': gradio_config.get('share', False), 'inbrowser': gradio_config.get('inbrowser', True), 'enable_queue': gradio_config.get('enable_queue', True), **kwargs } logger.info(f"Launching SPIRAL app with config: {launch_kwargs}") demo.launch(**launch_kwargs) def main(): """Main entry point for the application.""" app = SpiralApp() app.launch() if __name__ == "__main__": main()