Spaces:
Build error
Build error
| # SPIRAL Interactive Reasoning Game Simulator Configuration | |
| # Model Configuration | |
| model: | |
| name: "Qwen/Qwen2.5-4B-Instruct" | |
| max_length: 2048 | |
| temperature: 0.7 | |
| do_sample: true | |
| quantization: | |
| load_in_4bit: true | |
| bnb_4bit_compute_dtype: "float16" | |
| bnb_4bit_use_double_quant: true | |
| # Games Configuration | |
| games: | |
| kuhn_poker: | |
| name: "Kuhn Poker" | |
| max_rounds: 50 | |
| deck_size: 3 | |
| betting_rounds: 2 | |
| tictactoe: | |
| name: "TicTacToe" | |
| board_size: 3 | |
| max_moves: 9 | |
| win_condition: 3 | |
| # Training Configuration | |
| training: | |
| algorithm: "PPO" | |
| episodes: 1000 | |
| batch_size: 32 | |
| learning_rate: 0.0003 | |
| gamma: 0.99 | |
| gae_lambda: 0.95 | |
| clip_range: 0.2 | |
| entropy_coef: 0.01 | |
| value_loss_coef: 0.5 | |
| max_grad_norm: 0.5 | |
| # Self-play specific | |
| self_play: | |
| update_opponent_every: 100 | |
| opponent_pool_size: 5 | |
| # Role-conditioned advantage estimation | |
| rae: | |
| enable: true | |
| role_embedding_dim: 64 | |
| advantage_weighting: 0.5 | |
| # Reasoning Configuration | |
| reasoning: | |
| enable_traces: true | |
| trace_depth: 3 | |
| chain_of_thought: true | |
| explanation_length: 150 | |
| # Transfer learning evaluation | |
| transfer_tasks: | |
| - "GSM8K" | |
| - "Logic Puzzles" | |
| - "Strategic Reasoning" | |
| # Web Interface Configuration | |
| interface: | |
| title: "SPIRAL: Interactive Reasoning Game Simulator" | |
| description: "Play games against AI and explore reasoning capabilities" | |
| theme: "default" | |
| # Gradio settings | |
| gradio: | |
| share: false | |
| inbrowser: true | |
| server_name: "0.0.0.0" | |
| server_port: 7860 | |
| enable_queue: true | |
| max_threads: 4 | |
| # Logging Configuration | |
| logging: | |
| level: "INFO" | |
| format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" | |
| file: "logs/spiral.log" | |
| # Experiment tracking | |
| wandb: | |
| enable: false | |
| project: "spiral-reasoning" | |
| entity: "your-username" | |
| tensorboard: | |
| enable: true | |
| log_dir: "logs/tensorboard" | |
| # Data Configuration | |
| data: | |
| cache_dir: "data/cache" | |
| datasets_dir: "data/datasets" | |
| models_dir: "models" | |
| # Benchmark datasets | |
| benchmarks: | |
| gsm8k: "data/benchmarks/gsm8k.json" | |
| logic_puzzles: "data/benchmarks/logic_puzzles.json" | |
| # Deployment Configuration | |
| deployment: | |
| huggingface: | |
| space_name: "kaushikvr06/reasoning-simulator" | |
| private: false | |
| # Performance settings | |
| performance: | |
| max_concurrent_users: 10 | |
| timeout_seconds: 30 | |
| memory_limit: "2GB" | |
| # Debug Configuration | |
| debug: | |
| enable: false | |
| verbose_traces: false | |
| save_game_logs: true | |
| profile_inference: false |