penfever's picture
Add files using upload-large-folder tool
7c7089b verified
entrypoint: examples.terminal_bench.entrypoints.main_tbench
# Hydra config groups (+ prefix in CLI)
config_groups:
terminal_bench_config: terminal_bench
# Terminal bench / agentic environment settings
terminal_bench:
# trials_dir: Directory for Harbor trial artifacts (derived from experiments_dir if null)
trials_dir: null
# Harbor configuration - schema-driven mapping to TrialConfig
harbor:
# Agent settings
name: terminus-2
max_episodes: 999999
enable_summarize: false
store_all_messages: true
enable_episode_logging: false
record_terminal_session: false
enable_pane_logging: false
# Strict JSON parser
strict_json_parser: true
# Interleaved Thinking Settings
interleaved_thinking: true
extra_body:
chat_template_kwargs:
enable_thinking: true
# Long timeout for thinking models
override_timeout_sec: 1800
# Environment settings
override_cpus: 1
override_memory_mb: 2048
override_storage_mb: 2048
# ==========================================================================
# AUTO SNAPSHOT: Reduce Daytona rate limits with hash-based snapshot caching
# ==========================================================================
# When true, automatically creates a snapshot from the Dockerfile on first use,
# then reuses it for all subsequent sandboxes with the same Dockerfile content.
# Snapshots are named: harbor__<sha256[:12]>__snapshot
auto_snapshot: true
# Verifier settings
verifier_override_timeout_sec: 120
# Retry settings
max_retries: 3
min_wait_sec: 60.0
max_wait_sec: 600.0
wait_multiplier: 2.0
exclude_exceptions:
- VerifierTimeoutError
- VerifierRuntimeError
- RewardFileNotFoundError
- RewardFileEmptyError
- VerifierOutputParseError
n_concurrent_trials: 280
# Logging settings
log_level: INFO
# Reward shaping (disabled - binary rewards)
enable_reward_shaping: false
# RLOO-N error classification
enable_error_classification: true
mask_exceptions:
- DaytonaError
- EnvironmentStartTimeoutError
- NetworkError
- ConnectionError
- RewardFileNotFoundError
- RewardFileEmptyError
- AgentEnvironmentTimeoutError
default_error_treatment: zero
passthrough_exceptions:
- AgentTimeoutError
- ContextLengthExceededError
# Model info for Harbor's hosted_vllm validation
model_info:
max_input_tokens: 32768
max_output_tokens: 4096
archiving:
# Enable trial archiving callback
enabled: false
# Post-training trace upload to HuggingFace
trace_upload:
enabled: true
repo_org: DCAgent
episodes: last
dataset_type: SFT
cleanup: true
# Trainer configuration
trainer:
strategy: fsdp2
algorithm:
advantage_estimator: rloo_n
use_kl_loss: false
kl_loss_coef: 0.0
eps_clip_low: 0.2
eps_clip_high: 0.2
loss_reduction: token_mean
# Training loop settings
epochs: 2
max_steps: 80
update_epochs_per_batch: 1
# Batch sizes
train_batch_size: 64
policy_mini_batch_size: 64
eval_batch_size: 64
# Micro batch sizes (micro1x4 variant)
micro_forward_batch_size_per_gpu: 4
micro_train_batch_size_per_gpu: 1
max_prompt_length: 999999
# Evaluation and checkpointing
eval_interval: 999999
eval_before_train: false
# Resumable checkpointing
ckpt_interval: 5
resume_mode: latest
# HF upload-ready checkpoints
hf_save_interval: 5
# HuggingFace Hub upload (set via CLI: trainer.hf_hub_repo_id=org/repo)
hf_hub_repo_id: null
hf_hub_private: false
hf_hub_revision: main
# Database registration (auto-registers trained model to Supabase)
# Requires KEYS env var pointing to Supabase credentials file
enable_db_registration: true
# Logging
project_name: OpenThoughts-Agent
log_level: INFO
tracker_commit_each_step: true
logger: console
# Paths
run_name: null
ckpt_path: null
export_path: null
policy:
optimizer_config:
lr: 3e-5
weight_decay: 0.0
adam_betas: [0.9, 0.999]
max_grad_norm: 10.0
fsdp_config:
cpu_offload: false
reshard_after_forward: true
fsdp_size: 4
ref:
fsdp_config:
cpu_offload: false
reshard_after_forward: true
fsdp_size: 4
placement:
colocate_all: false
policy_num_nodes: 2
ref_num_nodes: 2
policy_num_gpus_per_node: 4
ref_num_gpus_per_node: 4
fully_async:
max_staleness_steps: 16
num_parallel_generation_workers: 768
generator:
backend: vllm
timeout_multiplier: 1.0
model_dtype: bfloat16
inference_engine_tensor_parallel_size: 1
# 16 inference engines (24 total GPUs: 16 engines + 8 policy/ref shared)
num_inference_engines: 16
n_samples_per_prompt: 8
eval_n_samples_per_prompt: 8
gpu_memory_utilization: 0.75
max_num_seqs: 24
max_num_batched_tokens: 65536
enable_prefix_caching: true
enable_chunked_prefill: true
run_engines_locally: true
weight_sync_backend: nccl
async_engine: true
batched: false
enable_http_endpoint: true
enable_ray_prometheus_stats: false
vllm_stats_interval: 1
append_eos_token_after_stop_str_in_multi_turn: true
max_turns: 999999
sampling_params:
max_generate_length: 8192
temperature: 0.7
top_p: 0.95
top_k: 20
engine_init_kwargs:
max_model_len: 32768
# Interleaved thinking chat template: preserves <think> blocks on ALL
# historical assistant turns (stock Qwen3 template strips them).
custom_chat_template_chat_completion_path: chat_templates/qwen3_thinking_acc.jinja2
data:
train_data: []
val_data: ["open-thoughts/OpenThoughts-TB-dev"]