| entrypoint: examples.terminal_bench.entrypoints.main_tbench |
|
|
| |
| config_groups: |
| terminal_bench_config: terminal_bench |
|
|
| |
| terminal_bench: |
| |
| trials_dir: null |
|
|
| |
| harbor: |
| |
| name: terminus-2 |
| max_episodes: 999999 |
| enable_summarize: false |
| store_all_messages: true |
| enable_episode_logging: false |
| record_terminal_session: false |
| enable_pane_logging: false |
|
|
| |
| strict_json_parser: true |
|
|
| |
| interleaved_thinking: true |
| extra_body: |
| chat_template_kwargs: |
| enable_thinking: true |
| |
| override_timeout_sec: 1800 |
|
|
| |
| override_cpus: 1 |
| override_memory_mb: 2048 |
| override_storage_mb: 2048 |
|
|
| |
| |
| |
| |
| |
| |
| auto_snapshot: true |
|
|
| |
| verifier_override_timeout_sec: 120 |
|
|
| |
| max_retries: 3 |
| min_wait_sec: 60.0 |
| max_wait_sec: 600.0 |
| wait_multiplier: 2.0 |
|
|
| exclude_exceptions: |
| - VerifierTimeoutError |
| - VerifierRuntimeError |
| - RewardFileNotFoundError |
| - RewardFileEmptyError |
| - VerifierOutputParseError |
|
|
| n_concurrent_trials: 280 |
|
|
| |
| log_level: INFO |
|
|
| |
| enable_reward_shaping: false |
|
|
| |
| enable_error_classification: true |
| mask_exceptions: |
| - DaytonaError |
| - EnvironmentStartTimeoutError |
| - NetworkError |
| - ConnectionError |
| - RewardFileNotFoundError |
| - RewardFileEmptyError |
| - AgentEnvironmentTimeoutError |
| default_error_treatment: zero |
| passthrough_exceptions: |
| - AgentTimeoutError |
| - ContextLengthExceededError |
|
|
| |
| model_info: |
| max_input_tokens: 32768 |
| max_output_tokens: 4096 |
|
|
| archiving: |
| |
| enabled: false |
|
|
| |
| trace_upload: |
| enabled: true |
| repo_org: DCAgent |
| episodes: last |
| dataset_type: SFT |
| cleanup: true |
|
|
| |
| trainer: |
| strategy: fsdp2 |
| algorithm: |
| advantage_estimator: rloo_n |
| use_kl_loss: false |
| kl_loss_coef: 0.0 |
| eps_clip_low: 0.2 |
| eps_clip_high: 0.2 |
| loss_reduction: token_mean |
|
|
| |
| epochs: 2 |
| max_steps: 80 |
| update_epochs_per_batch: 1 |
|
|
| |
| train_batch_size: 64 |
| policy_mini_batch_size: 64 |
| eval_batch_size: 64 |
|
|
| |
| micro_forward_batch_size_per_gpu: 4 |
| micro_train_batch_size_per_gpu: 1 |
|
|
| max_prompt_length: 999999 |
|
|
| |
| eval_interval: 999999 |
| eval_before_train: false |
| |
| ckpt_interval: 5 |
| resume_mode: latest |
| |
| hf_save_interval: 5 |
| |
| hf_hub_repo_id: null |
| hf_hub_private: false |
| hf_hub_revision: main |
|
|
| |
| |
| enable_db_registration: true |
|
|
| |
| project_name: OpenThoughts-Agent |
| log_level: INFO |
| tracker_commit_each_step: true |
| logger: console |
|
|
| |
| run_name: null |
| ckpt_path: null |
| export_path: null |
|
|
| policy: |
| optimizer_config: |
| lr: 3e-5 |
| weight_decay: 0.0 |
| adam_betas: [0.9, 0.999] |
| max_grad_norm: 10.0 |
| fsdp_config: |
| cpu_offload: false |
| reshard_after_forward: true |
| fsdp_size: 4 |
| ref: |
| fsdp_config: |
| cpu_offload: false |
| reshard_after_forward: true |
| fsdp_size: 4 |
|
|
| placement: |
| colocate_all: false |
| policy_num_nodes: 2 |
| ref_num_nodes: 2 |
| policy_num_gpus_per_node: 4 |
| ref_num_gpus_per_node: 4 |
|
|
| fully_async: |
| max_staleness_steps: 16 |
| num_parallel_generation_workers: 768 |
|
|
| generator: |
| backend: vllm |
| timeout_multiplier: 1.0 |
| model_dtype: bfloat16 |
|
|
| inference_engine_tensor_parallel_size: 1 |
| |
| num_inference_engines: 16 |
|
|
| n_samples_per_prompt: 8 |
| eval_n_samples_per_prompt: 8 |
|
|
| gpu_memory_utilization: 0.75 |
|
|
| max_num_seqs: 24 |
| max_num_batched_tokens: 65536 |
|
|
| enable_prefix_caching: true |
| enable_chunked_prefill: true |
|
|
| run_engines_locally: true |
| weight_sync_backend: nccl |
| async_engine: true |
| batched: false |
| enable_http_endpoint: true |
| enable_ray_prometheus_stats: false |
| vllm_stats_interval: 1 |
| append_eos_token_after_stop_str_in_multi_turn: true |
| max_turns: 999999 |
|
|
| sampling_params: |
| max_generate_length: 8192 |
| temperature: 0.7 |
| top_p: 0.95 |
| top_k: 20 |
|
|
| engine_init_kwargs: |
| max_model_len: 32768 |
| |
| |
| custom_chat_template_chat_completion_path: chat_templates/qwen3_thinking_acc.jinja2 |
|
|
| data: |
| train_data: [] |
| val_data: ["open-thoughts/OpenThoughts-TB-dev"] |
|
|