| { |
| "job_name": "rl__56GPU_seqnorm_tis_untrunc_symclip_loopshape__exp_rpt_pymethods2test-large__GLM-4_7-swesmith-san", |
| "experiments_dir": "/e/data1/datasets/playground/ot-baf/rl__56GPU_seqnorm_tis_untrunc_symclip_loopshape__exp_rpt_pymethods2test-large__GLM-4_7-swesmith-san", |
| "cluster_name": "jupiter", |
| "skyrl_entrypoint": "examples.terminal_bench.entrypoints.main_tbench", |
| "skyrl_hydra_args": [ |
| "+terminal_bench_config=terminal_bench", |
| "trainer.strategy=fsdp2", |
| "trainer.algorithm.advantage_estimator=rloo_n", |
| "trainer.algorithm.use_kl_loss=false", |
| "trainer.algorithm.kl_loss_coef=0.0", |
| "trainer.algorithm.eps_clip_low=0.2", |
| "trainer.algorithm.eps_clip_high=0.2", |
| "trainer.algorithm.loss_reduction=seq_mean_token_sum_norm_global", |
| "trainer.algorithm.use_tis=true", |
| "trainer.algorithm.tis_imp_ratio_cap=2.0", |
| "trainer.epochs=2", |
| "trainer.max_steps=80", |
| "trainer.update_epochs_per_batch=1", |
| "trainer.train_batch_size=64", |
| "trainer.policy_mini_batch_size=64", |
| "trainer.eval_batch_size=64", |
| "trainer.micro_forward_batch_size_per_gpu=4", |
| "trainer.micro_train_batch_size_per_gpu=1", |
| "trainer.max_prompt_length=999999", |
| "trainer.eval_interval=999999", |
| "trainer.eval_before_train=false", |
| "trainer.ckpt_interval=2", |
| "trainer.resume_mode=latest", |
| "trainer.hf_save_interval=5", |
| "++trainer.hf_hub_repo_id=laion/rl__56GPU_seqnorm_tis_untrunc_symclip_loopshape__exp_rpt_pymethods2test-large__GLM-4_7-swesmith-san", |
| "++trainer.hf_hub_private=false", |
| "++trainer.hf_hub_revision=main", |
| "++trainer.enable_db_registration=false", |
| "trainer.project_name=OpenThoughts-Agent", |
| "trainer.log_level=INFO", |
| "trainer.tracker_commit_each_step=true", |
| "trainer.logger=console", |
| "trainer.run_name=rl__56GPU_seqnorm_tis_untrunc_symclip_loopshape__exp_rpt_pymethods2test-large__GLM-4_7-swesmith-san", |
| "trainer.ckpt_path=/e/data1/datasets/playground/ot-baf/rl__56GPU_seqnorm_tis_untrunc_symclip_loopshape__exp_rpt_pymethods2test-large__GLM-4_7-swesmith-san/rl__56GPU_seqnorm_tis_untrunc_symclip_loopshape__exp_rpt_pymethods2test-large__GLM-4_7-swesmith-san/checkpoints", |
| "trainer.export_path=/e/data1/datasets/playground/ot-baf/rl__56GPU_seqnorm_tis_untrunc_symclip_loopshape__exp_rpt_pymethods2test-large__GLM-4_7-swesmith-san/rl__56GPU_seqnorm_tis_untrunc_symclip_loopshape__exp_rpt_pymethods2test-large__GLM-4_7-swesmith-san/exports", |
| "trainer.policy.optimizer_config.lr=8e-6", |
| "trainer.policy.optimizer_config.weight_decay=0.0", |
| "trainer.policy.optimizer_config.adam_betas=[0.9,0.999]", |
| "trainer.policy.optimizer_config.max_grad_norm=0.9", |
| "trainer.policy.fsdp_config.cpu_offload=false", |
| "trainer.policy.fsdp_config.reshard_after_forward=true", |
| "trainer.policy.fsdp_config.fsdp_size=4", |
| "trainer.policy.model.path=/e/data1/datasets/playground/ot-baf/hf_hub/models--laion--GLM-4_7-swesmith-sandboxes-with_tests-oracle_verified_120s-maxeps-131k-fixthink/snapshots/0e3bff0c4e51f6b9ec0713b98b9eec36efb91cc6", |
| "trainer.ref.fsdp_config.cpu_offload=false", |
| "trainer.ref.fsdp_config.reshard_after_forward=true", |
| "trainer.ref.fsdp_config.fsdp_size=4", |
| "trainer.placement.colocate_all=false", |
| "trainer.placement.policy_num_nodes=2", |
| "trainer.placement.ref_num_nodes=2", |
| "trainer.placement.policy_num_gpus_per_node=4", |
| "trainer.placement.ref_num_gpus_per_node=4", |
| "trainer.fully_async.max_staleness_steps=16", |
| "trainer.fully_async.num_parallel_generation_workers=338", |
| "generator.backend=vllm", |
| "generator.timeout_multiplier=1.0", |
| "generator.model_dtype=bfloat16", |
| "generator.inference_engine_tensor_parallel_size=1", |
| "generator.num_inference_engines=48", |
| "generator.n_samples_per_prompt=8", |
| "generator.eval_n_samples_per_prompt=8", |
| "generator.gpu_memory_utilization=0.75", |
| "generator.max_num_seqs=24", |
| "generator.max_num_batched_tokens=65536", |
| "generator.enable_prefix_caching=true", |
| "generator.enable_chunked_prefill=true", |
| "generator.run_engines_locally=true", |
| "generator.weight_sync_backend=nccl", |
| "generator.async_engine=true", |
| "generator.batched=false", |
| "generator.enable_http_endpoint=true", |
| "generator.enable_ray_prometheus_stats=false", |
| "generator.vllm_stats_interval=1", |
| "generator.append_eos_token_after_stop_str_in_multi_turn=true", |
| "generator.max_turns=999999", |
| "generator.sampling_params.max_generate_length=4096", |
| "generator.sampling_params.temperature=1.0", |
| "generator.sampling_params.top_p=1.0", |
| "generator.sampling_params.top_k=-1", |
| "++generator.engine_init_kwargs.max_model_len=32768", |
| "++generator.engine_init_kwargs.custom_chat_template_chat_completion_path=chat_templates/qwen3_thinking_acc.jinja2", |
| "++generator.engine_init_kwargs.served_model_name=0e3bff0c4e51f6b9ec0713b98b9eec36efb91cc6", |
| "data.train_data=[\"/e/scratch/jureap59/feuer1/tasks/exp_rpt_pymethods2test-large\"]", |
| "data.val_data=[]", |
| "+terminal_bench_config.trials_dir=/e/data1/datasets/playground/ot-baf/rl__56GPU_seqnorm_tis_untrunc_symclip_loopshape__exp_rpt_pymethods2test-large__GLM-4_7-swesmith-san/rl__56GPU_seqnorm_tis_untrunc_symclip_loopshape__exp_rpt_pymethods2test-large__GLM-4_7-swesmith-san/trace_jobs", |
| "+terminal_bench_config.harbor.name=terminus-2", |
| "+terminal_bench_config.harbor.max_episodes=999999", |
| "+terminal_bench_config.harbor.enable_summarize=false", |
| "+terminal_bench_config.harbor.store_all_messages=true", |
| "+terminal_bench_config.harbor.trajectory_config.raw_content=true", |
| "+terminal_bench_config.harbor.enable_episode_logging=false", |
| "+terminal_bench_config.harbor.record_terminal_session=false", |
| "+terminal_bench_config.harbor.enable_pane_logging=false", |
| "+terminal_bench_config.harbor.strict_json_parser=true", |
| "+terminal_bench_config.harbor.interleaved_thinking=true", |
| "+terminal_bench_config.harbor.extra_body.chat_template_kwargs.enable_thinking=true", |
| "+terminal_bench_config.harbor.override_timeout_sec=900", |
| "+terminal_bench_config.harbor.override_cpus=1", |
| "+terminal_bench_config.harbor.override_memory_mb=2048", |
| "+terminal_bench_config.harbor.override_storage_mb=2048", |
| "+terminal_bench_config.harbor.auto_snapshot=true", |
| "+terminal_bench_config.harbor.verifier_override_timeout_sec=120", |
| "+terminal_bench_config.harbor.max_retries=3", |
| "+terminal_bench_config.harbor.min_wait_sec=60.0", |
| "+terminal_bench_config.harbor.max_wait_sec=600.0", |
| "+terminal_bench_config.harbor.wait_multiplier=2.0", |
| "+terminal_bench_config.harbor.exclude_exceptions=[\"VerifierTimeoutError\",\"VerifierRuntimeError\",\"RewardFileNotFoundError\",\"RewardFileEmptyError\",\"VerifierOutputParseError\"]", |
| "+terminal_bench_config.harbor.n_concurrent_trials=675", |
| "+terminal_bench_config.harbor.log_level=INFO", |
| "+terminal_bench_config.harbor.enable_reward_shaping=true", |
| "+terminal_bench_config.harbor.reward_shaper=composite_loop", |
| "+terminal_bench_config.harbor.loop_outcome_shaper=original", |
| "+terminal_bench_config.harbor.loop_shaping.outcome_weight=1.0", |
| "+terminal_bench_config.harbor.loop_shaping.total_shaping_cap=0.3", |
| "+terminal_bench_config.harbor.loop_shaping.terminate.enabled=true", |
| "+terminal_bench_config.harbor.loop_shaping.terminate.green_bonus=0.3", |
| "+terminal_bench_config.harbor.loop_shaping.terminate.red_penalty=0.3", |
| "+terminal_bench_config.harbor.loop_shaping.terminate.noterm_penalty=0.2", |
| "+terminal_bench_config.harbor.loop_shaping.antithrash.enabled=true", |
| "+terminal_bench_config.harbor.loop_shaping.antithrash.per_repeat_penalty=0.02", |
| "+terminal_bench_config.harbor.loop_shaping.antithrash.cap=0.1", |
| "+terminal_bench_config.harbor.collect_rollout_details=true", |
| "+terminal_bench_config.harbor.enable_error_classification=true", |
| "+terminal_bench_config.harbor.mask_exceptions=[\"DaytonaError\",\"EnvironmentStartTimeoutError\",\"NetworkError\",\"ConnectionError\",\"RewardFileNotFoundError\",\"RewardFileEmptyError\",\"AgentEnvironmentTimeoutError\",\"ContextLengthExceededError\"]", |
| "+terminal_bench_config.harbor.default_error_treatment=zero", |
| "+terminal_bench_config.harbor.passthrough_exceptions=[\"AgentTimeoutError\"]", |
| "+terminal_bench_config.harbor.zero_exceptions=[]", |
| "+terminal_bench_config.model_info.max_input_tokens=32000", |
| "+terminal_bench_config.model_info.max_output_tokens=4096", |
| "+terminal_bench_config.archiving.enabled=false", |
| "+terminal_bench_config.trace_upload.enabled=true", |
| "+terminal_bench_config.trace_upload.repo_org=DCAgent", |
| "+terminal_bench_config.trace_upload.episodes=last", |
| "+terminal_bench_config.trace_upload.dataset_type=SFT", |
| "+terminal_bench_config.trace_upload.cleanup=true" |
| ], |
| "model_path": "/e/data1/datasets/playground/ot-baf/hf_hub/models--laion--GLM-4_7-swesmith-sandboxes-with_tests-oracle_verified_120s-maxeps-131k-fixthink/snapshots/0e3bff0c4e51f6b9ec0713b98b9eec36efb91cc6", |
| "train_data": [ |
| "/e/scratch/jureap59/feuer1/tasks/exp_rpt_pymethods2test-large" |
| ], |
| "val_data": [], |
| "num_nodes": 14, |
| "gpus_per_node": 4, |
| "cpus_per_node": 288, |
| "tensor_parallel_size": 1, |
| "ray_port": 6379, |
| "master_port": 12345, |
| "checkpoints_dir": null, |
| "export_path": "/e/data1/datasets/playground/ot-baf/rl__56GPU_seqnorm_tis_untrunc_symclip_loopshape__exp_rpt_pymethods2test-large__GLM-4_7-swesmith-san/rl__56GPU_seqnorm_tis_untrunc_symclip_loopshape__exp_rpt_pymethods2test-large__GLM-4_7-swesmith-san/exports", |
| "needs_ssh_tunnel": true, |
| "needs_cuda_detection": false, |
| "pinggy_persistent_url": null, |
| "pinggy_token": null, |
| "agent_name": "terminus-2", |
| "harbor_env": "daytona", |
| "proxychains_binary": null, |
| "container_sif": null, |
| "container_binds": [], |
| "ray_object_store_gb": 40.0, |
| "trace_upload_enabled": true, |
| "trace_upload_repo_org": "DCAgent", |
| "trace_upload_episodes": "last", |
| "trace_upload_dataset_type": "SFT", |
| "trace_upload_cleanup": true |
| } |