Add files using upload-large-folder tool

7c7089b verified 3 days ago

5.72 kB

	entrypoint: examples.terminal_bench.entrypoints.main_tbench

	# Hydra config groups (+ prefix in CLI)
	config_groups:
	terminal_bench_config: terminal_bench

	# Terminal bench / agentic environment settings
	terminal_bench:
	# trials_dir: Directory for Harbor trial artifacts (derived from experiments_dir if null)
	trials_dir: null

	# Harbor configuration - schema-driven mapping to TrialConfig
	harbor:
	# Agent settings
	name: terminus-2
	max_episodes: 999999
	enable_summarize: false
	store_all_messages: true
	enable_episode_logging: false
	record_terminal_session: false
	enable_pane_logging: false

	# Strict JSON parser
	strict_json_parser: true

	# Interleaved Thinking Settings
	interleaved_thinking: true
	extra_body:
	chat_template_kwargs:
	enable_thinking: true
	# Long timeout for thinking models
	override_timeout_sec: 1800

	# Environment settings
	override_cpus: 1
	override_memory_mb: 2048
	override_storage_mb: 2048

	# ==========================================================================
	# AUTO SNAPSHOT: Reduce Daytona rate limits with hash-based snapshot caching
	# ==========================================================================
	# When true, automatically creates a snapshot from the Dockerfile on first use,
	# then reuses it for all subsequent sandboxes with the same Dockerfile content.
	# Snapshots are named: harbor__<sha256[:12]>__snapshot
	auto_snapshot: true

	# Verifier settings
	verifier_override_timeout_sec: 120

	# Retry settings
	max_retries: 3
	min_wait_sec: 60.0
	max_wait_sec: 600.0
	wait_multiplier: 2.0

	exclude_exceptions:
	- VerifierTimeoutError
	- VerifierRuntimeError
	- RewardFileNotFoundError
	- RewardFileEmptyError
	- VerifierOutputParseError

	n_concurrent_trials: 280

	# Logging settings
	log_level: INFO

	# Reward shaping (disabled - binary rewards)
	enable_reward_shaping: false

	# RLOO-N error classification
	enable_error_classification: true
	mask_exceptions:
	- DaytonaError
	- EnvironmentStartTimeoutError
	- NetworkError
	- ConnectionError
	- RewardFileNotFoundError
	- RewardFileEmptyError
	- AgentEnvironmentTimeoutError
	default_error_treatment: zero
	passthrough_exceptions:
	- AgentTimeoutError
	- ContextLengthExceededError

	# Model info for Harbor's hosted_vllm validation
	model_info:
	max_input_tokens: 32768
	max_output_tokens: 4096

	archiving:
	# Enable trial archiving callback
	enabled: false

	# Post-training trace upload to HuggingFace
	trace_upload:
	enabled: true
	repo_org: DCAgent
	episodes: last
	dataset_type: SFT
	cleanup: true

	# Trainer configuration
	trainer:
	strategy: fsdp2
	algorithm:
	advantage_estimator: rloo_n
	use_kl_loss: false
	kl_loss_coef: 0.0
	eps_clip_low: 0.2
	eps_clip_high: 0.2
	loss_reduction: token_mean

	# Training loop settings
	epochs: 2
	max_steps: 80
	update_epochs_per_batch: 1

	# Batch sizes
	train_batch_size: 64
	policy_mini_batch_size: 64
	eval_batch_size: 64

	# Micro batch sizes (micro1x4 variant)
	micro_forward_batch_size_per_gpu: 4
	micro_train_batch_size_per_gpu: 1

	max_prompt_length: 999999

	# Evaluation and checkpointing
	eval_interval: 999999
	eval_before_train: false
	# Resumable checkpointing
	ckpt_interval: 5
	resume_mode: latest
	# HF upload-ready checkpoints
	hf_save_interval: 5
	# HuggingFace Hub upload (set via CLI: trainer.hf_hub_repo_id=org/repo)
	hf_hub_repo_id: null
	hf_hub_private: false
	hf_hub_revision: main

	# Database registration (auto-registers trained model to Supabase)
	# Requires KEYS env var pointing to Supabase credentials file
	enable_db_registration: true

	# Logging
	project_name: OpenThoughts-Agent
	log_level: INFO
	tracker_commit_each_step: true
	logger: console

	# Paths
	run_name: null
	ckpt_path: null
	export_path: null

	policy:
	optimizer_config:
	lr: 3e-5
	weight_decay: 0.0
	adam_betas: [0.9, 0.999]
	max_grad_norm: 10.0
	fsdp_config:
	cpu_offload: false
	reshard_after_forward: true
	fsdp_size: 4
	ref:
	fsdp_config:
	cpu_offload: false
	reshard_after_forward: true
	fsdp_size: 4

	placement:
	colocate_all: false
	policy_num_nodes: 2
	ref_num_nodes: 2
	policy_num_gpus_per_node: 4
	ref_num_gpus_per_node: 4

	fully_async:
	max_staleness_steps: 16
	num_parallel_generation_workers: 768

	generator:
	backend: vllm
	timeout_multiplier: 1.0
	model_dtype: bfloat16

	inference_engine_tensor_parallel_size: 1
	# 16 inference engines (24 total GPUs: 16 engines + 8 policy/ref shared)
	num_inference_engines: 16

	n_samples_per_prompt: 8
	eval_n_samples_per_prompt: 8

	gpu_memory_utilization: 0.75

	max_num_seqs: 24
	max_num_batched_tokens: 65536

	enable_prefix_caching: true
	enable_chunked_prefill: true

	run_engines_locally: true
	weight_sync_backend: nccl
	async_engine: true
	batched: false
	enable_http_endpoint: true
	enable_ray_prometheus_stats: false
	vllm_stats_interval: 1
	append_eos_token_after_stop_str_in_multi_turn: true
	max_turns: 999999

	sampling_params:
	max_generate_length: 8192
	temperature: 0.7
	top_p: 0.95
	top_k: 20

	engine_init_kwargs:
	max_model_len: 32768
	# Interleaved thinking chat template: preserves <think> blocks on ALL
	# historical assistant turns (stock Qwen3 template strips them).
	custom_chat_template_chat_completion_path: chat_templates/qwen3_thinking_acc.jinja2

	data:
	train_data: []
	val_data: ["open-thoughts/OpenThoughts-TB-dev"]