reasoning-simulator / config.yaml
Kaushik Rajan
Phase 1: Initial SPIRAL project setup
e526e6a
raw
history blame
2.5 kB
# SPIRAL Interactive Reasoning Game Simulator Configuration
# Model Configuration
model:
name: "Qwen/Qwen2.5-4B-Instruct"
max_length: 2048
temperature: 0.7
do_sample: true
quantization:
load_in_4bit: true
bnb_4bit_compute_dtype: "float16"
bnb_4bit_use_double_quant: true
# Games Configuration
games:
kuhn_poker:
name: "Kuhn Poker"
max_rounds: 50
deck_size: 3
betting_rounds: 2
tictactoe:
name: "TicTacToe"
board_size: 3
max_moves: 9
win_condition: 3
# Training Configuration
training:
algorithm: "PPO"
episodes: 1000
batch_size: 32
learning_rate: 0.0003
gamma: 0.99
gae_lambda: 0.95
clip_range: 0.2
entropy_coef: 0.01
value_loss_coef: 0.5
max_grad_norm: 0.5
# Self-play specific
self_play:
update_opponent_every: 100
opponent_pool_size: 5
# Role-conditioned advantage estimation
rae:
enable: true
role_embedding_dim: 64
advantage_weighting: 0.5
# Reasoning Configuration
reasoning:
enable_traces: true
trace_depth: 3
chain_of_thought: true
explanation_length: 150
# Transfer learning evaluation
transfer_tasks:
- "GSM8K"
- "Logic Puzzles"
- "Strategic Reasoning"
# Web Interface Configuration
interface:
title: "SPIRAL: Interactive Reasoning Game Simulator"
description: "Play games against AI and explore reasoning capabilities"
theme: "default"
# Gradio settings
gradio:
share: false
inbrowser: true
server_name: "0.0.0.0"
server_port: 7860
enable_queue: true
max_threads: 4
# Logging Configuration
logging:
level: "INFO"
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
file: "logs/spiral.log"
# Experiment tracking
wandb:
enable: false
project: "spiral-reasoning"
entity: "your-username"
tensorboard:
enable: true
log_dir: "logs/tensorboard"
# Data Configuration
data:
cache_dir: "data/cache"
datasets_dir: "data/datasets"
models_dir: "models"
# Benchmark datasets
benchmarks:
gsm8k: "data/benchmarks/gsm8k.json"
logic_puzzles: "data/benchmarks/logic_puzzles.json"
# Deployment Configuration
deployment:
huggingface:
space_name: "kaushikvr06/reasoning-simulator"
private: false
# Performance settings
performance:
max_concurrent_users: 10
timeout_seconds: 30
memory_limit: "2GB"
# Debug Configuration
debug:
enable: false
verbose_traces: false
save_game_logs: true
profile_inference: false