Spaces:

kaushikvr06
/

reasoning-simulator

Build error

reasoning-simulator / config.yaml

Kaushik Rajan

Phase 1: Initial SPIRAL project setup

e526e6a 5 months ago

2.5 kB

	# SPIRAL Interactive Reasoning Game Simulator Configuration

	# Model Configuration
	model:
	name: "Qwen/Qwen2.5-4B-Instruct"
	max_length: 2048
	temperature: 0.7
	do_sample: true
	quantization:
	load_in_4bit: true
	bnb_4bit_compute_dtype: "float16"
	bnb_4bit_use_double_quant: true

	# Games Configuration
	games:
	kuhn_poker:
	name: "Kuhn Poker"
	max_rounds: 50
	deck_size: 3
	betting_rounds: 2

	tictactoe:
	name: "TicTacToe"
	board_size: 3
	max_moves: 9
	win_condition: 3

	# Training Configuration
	training:
	algorithm: "PPO"
	episodes: 1000
	batch_size: 32
	learning_rate: 0.0003
	gamma: 0.99
	gae_lambda: 0.95
	clip_range: 0.2
	entropy_coef: 0.01
	value_loss_coef: 0.5
	max_grad_norm: 0.5

	# Self-play specific
	self_play:
	update_opponent_every: 100
	opponent_pool_size: 5

	# Role-conditioned advantage estimation
	rae:
	enable: true
	role_embedding_dim: 64
	advantage_weighting: 0.5

	# Reasoning Configuration
	reasoning:
	enable_traces: true
	trace_depth: 3
	chain_of_thought: true
	explanation_length: 150

	# Transfer learning evaluation
	transfer_tasks:
	- "GSM8K"
	- "Logic Puzzles"
	- "Strategic Reasoning"

	# Web Interface Configuration
	interface:
	title: "SPIRAL: Interactive Reasoning Game Simulator"
	description: "Play games against AI and explore reasoning capabilities"
	theme: "default"

	# Gradio settings
	gradio:
	share: false
	inbrowser: true
	server_name: "0.0.0.0"
	server_port: 7860
	enable_queue: true
	max_threads: 4

	# Logging Configuration
	logging:
	level: "INFO"
	format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
	file: "logs/spiral.log"

	# Experiment tracking
	wandb:
	enable: false
	project: "spiral-reasoning"
	entity: "your-username"

	tensorboard:
	enable: true
	log_dir: "logs/tensorboard"

	# Data Configuration
	data:
	cache_dir: "data/cache"
	datasets_dir: "data/datasets"
	models_dir: "models"

	# Benchmark datasets
	benchmarks:
	gsm8k: "data/benchmarks/gsm8k.json"
	logic_puzzles: "data/benchmarks/logic_puzzles.json"

	# Deployment Configuration
	deployment:
	huggingface:
	space_name: "kaushikvr06/reasoning-simulator"
	private: false

	# Performance settings
	performance:
	max_concurrent_users: 10
	timeout_seconds: 30
	memory_limit: "2GB"

	# Debug Configuration
	debug:
	enable: false
	verbose_traces: false
	save_game_logs: true
	profile_inference: false