File size: 5,723 Bytes
7c7089b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 | entrypoint: examples.terminal_bench.entrypoints.main_tbench
# Hydra config groups (+ prefix in CLI)
config_groups:
terminal_bench_config: terminal_bench
# Terminal bench / agentic environment settings
terminal_bench:
# trials_dir: Directory for Harbor trial artifacts (derived from experiments_dir if null)
trials_dir: null
# Harbor configuration - schema-driven mapping to TrialConfig
harbor:
# Agent settings
name: terminus-2
max_episodes: 999999
enable_summarize: false
store_all_messages: true
enable_episode_logging: false
record_terminal_session: false
enable_pane_logging: false
# Strict JSON parser
strict_json_parser: true
# Interleaved Thinking Settings
interleaved_thinking: true
extra_body:
chat_template_kwargs:
enable_thinking: true
# Long timeout for thinking models
override_timeout_sec: 1800
# Environment settings
override_cpus: 1
override_memory_mb: 2048
override_storage_mb: 2048
# ==========================================================================
# AUTO SNAPSHOT: Reduce Daytona rate limits with hash-based snapshot caching
# ==========================================================================
# When true, automatically creates a snapshot from the Dockerfile on first use,
# then reuses it for all subsequent sandboxes with the same Dockerfile content.
# Snapshots are named: harbor__<sha256[:12]>__snapshot
auto_snapshot: true
# Verifier settings
verifier_override_timeout_sec: 120
# Retry settings
max_retries: 3
min_wait_sec: 60.0
max_wait_sec: 600.0
wait_multiplier: 2.0
exclude_exceptions:
- VerifierTimeoutError
- VerifierRuntimeError
- RewardFileNotFoundError
- RewardFileEmptyError
- VerifierOutputParseError
n_concurrent_trials: 280
# Logging settings
log_level: INFO
# Reward shaping (disabled - binary rewards)
enable_reward_shaping: false
# RLOO-N error classification
enable_error_classification: true
mask_exceptions:
- DaytonaError
- EnvironmentStartTimeoutError
- NetworkError
- ConnectionError
- RewardFileNotFoundError
- RewardFileEmptyError
- AgentEnvironmentTimeoutError
default_error_treatment: zero
passthrough_exceptions:
- AgentTimeoutError
- ContextLengthExceededError
# Model info for Harbor's hosted_vllm validation
model_info:
max_input_tokens: 32768
max_output_tokens: 4096
archiving:
# Enable trial archiving callback
enabled: false
# Post-training trace upload to HuggingFace
trace_upload:
enabled: true
repo_org: DCAgent
episodes: last
dataset_type: SFT
cleanup: true
# Trainer configuration
trainer:
strategy: fsdp2
algorithm:
advantage_estimator: rloo_n
use_kl_loss: false
kl_loss_coef: 0.0
eps_clip_low: 0.2
eps_clip_high: 0.2
loss_reduction: token_mean
# Training loop settings
epochs: 2
max_steps: 80
update_epochs_per_batch: 1
# Batch sizes
train_batch_size: 64
policy_mini_batch_size: 64
eval_batch_size: 64
# Micro batch sizes (micro1x4 variant)
micro_forward_batch_size_per_gpu: 4
micro_train_batch_size_per_gpu: 1
max_prompt_length: 999999
# Evaluation and checkpointing
eval_interval: 999999
eval_before_train: false
# Resumable checkpointing
ckpt_interval: 5
resume_mode: latest
# HF upload-ready checkpoints
hf_save_interval: 5
# HuggingFace Hub upload (set via CLI: trainer.hf_hub_repo_id=org/repo)
hf_hub_repo_id: null
hf_hub_private: false
hf_hub_revision: main
# Database registration (auto-registers trained model to Supabase)
# Requires KEYS env var pointing to Supabase credentials file
enable_db_registration: true
# Logging
project_name: OpenThoughts-Agent
log_level: INFO
tracker_commit_each_step: true
logger: console
# Paths
run_name: null
ckpt_path: null
export_path: null
policy:
optimizer_config:
lr: 3e-5
weight_decay: 0.0
adam_betas: [0.9, 0.999]
max_grad_norm: 10.0
fsdp_config:
cpu_offload: false
reshard_after_forward: true
fsdp_size: 4
ref:
fsdp_config:
cpu_offload: false
reshard_after_forward: true
fsdp_size: 4
placement:
colocate_all: false
policy_num_nodes: 2
ref_num_nodes: 2
policy_num_gpus_per_node: 4
ref_num_gpus_per_node: 4
fully_async:
max_staleness_steps: 16
num_parallel_generation_workers: 768
generator:
backend: vllm
timeout_multiplier: 1.0
model_dtype: bfloat16
inference_engine_tensor_parallel_size: 1
# 16 inference engines (24 total GPUs: 16 engines + 8 policy/ref shared)
num_inference_engines: 16
n_samples_per_prompt: 8
eval_n_samples_per_prompt: 8
gpu_memory_utilization: 0.75
max_num_seqs: 24
max_num_batched_tokens: 65536
enable_prefix_caching: true
enable_chunked_prefill: true
run_engines_locally: true
weight_sync_backend: nccl
async_engine: true
batched: false
enable_http_endpoint: true
enable_ray_prometheus_stats: false
vllm_stats_interval: 1
append_eos_token_after_stop_str_in_multi_turn: true
max_turns: 999999
sampling_params:
max_generate_length: 8192
temperature: 0.7
top_p: 0.95
top_k: 20
engine_init_kwargs:
max_model_len: 32768
# Interleaved thinking chat template: preserves <think> blocks on ALL
# historical assistant turns (stock Qwen3 template strips them).
custom_chat_template_chat_completion_path: chat_templates/qwen3_thinking_acc.jinja2
data:
train_data: []
val_data: ["open-thoughts/OpenThoughts-TB-dev"]
|