| { |
| "custom": {}, |
| "train": { |
| "train_policy": { |
| "type": "sft", |
| "dataset": { |
| "name": "LNTANOooo/sharegpt52k", |
| "subset": "", |
| "revision": "", |
| "split": [ |
| "train" |
| ], |
| "test_size": 0.1 |
| }, |
| "mini_batch": 8, |
| "dataloader_shuffle": true, |
| "enable_dataset_cache": false, |
| "dataloader_num_workers": 0, |
| "dataloader_prefetch_factor": null, |
| "conversation_column_name": "conversation", |
| "system_prompt": "" |
| }, |
| "optm_name": "AdamW", |
| "optm_lr": 0.0002, |
| "optm_impl": "fused", |
| "optm_weight_decay": 0.01, |
| "optm_betas": [ |
| 0.9, |
| 0.999 |
| ], |
| "optm_warmup_steps": 20, |
| "optm_decay_ratio": null, |
| "optm_decay_type": null, |
| "optm_min_lr_factor": 0.0, |
| "optm_grad_norm_clip": 1.0, |
| "master_dtype": "float32", |
| "param_dtype": "bfloat16", |
| "transfer_dtype": "bfloat16", |
| "fsdp_reduce_dtype": "float32", |
| "fsdp_offload": false, |
| "fsdp_reshard_after_forward": "default", |
| "train_batch_per_replica": 32, |
| "enable_validation": false, |
| "validation_step": 20, |
| "validation_batch_per_replica": 24, |
| "fp8": { |
| "enable_fp8": false, |
| "fp8_recipe": "dynamic_scaling", |
| "quant_recipe": "rowwise" |
| }, |
| "ckpt": { |
| "enable_checkpoint": true, |
| "save_freq": 2000, |
| "save_mode": "async", |
| "max_keep": 5, |
| "export_safetensors": true, |
| "upload_hf": false, |
| "hf_repo_name": "Comos-Reason1", |
| "upload_s3": false, |
| "s3_bucket": null, |
| "s3_prefix": "outputs" |
| }, |
| "resume": true, |
| "epoch": 1000, |
| "output_dir": "./outputs/qwen2-5-3b-tp1-sft/20251001112805", |
| "timestamp": "20251001112805", |
| "epsilon": 1e-06, |
| "async_tp_enabled": false, |
| "compile": false, |
| "sync_weight_interval": 1, |
| "deterministic": false, |
| "seed": null, |
| "max_num_steps": null, |
| "sequence_packing": false |
| }, |
| "rollout": { |
| "parallelism": { |
| "n_init_replicas": 1, |
| "tp_size": 2, |
| "cp_size": 1, |
| "ep_size": 1, |
| "dp_shard_size": -1, |
| "pp_size": 1, |
| "pp_dynamic_shape": false, |
| "pp_micro_batch_size": 1, |
| "dp_replicate_size": 1 |
| }, |
| "enforce_eager": true, |
| "include_stop_str_in_output": false, |
| "gpu_memory_utilization": 0.8, |
| "enable_chunked_prefill": false, |
| "max_response_length": 2048, |
| "n_generation": 16, |
| "batch_size": 1, |
| "val_batch_size": null, |
| "quantization": "none", |
| "seed": null, |
| "sampling_config": { |
| "temperature": 1.0, |
| "top_p": 1.0, |
| "top_k": -1, |
| "repetition_penalty": 1.0, |
| "use_flashinfer": false |
| }, |
| "vllm_use_flashinfer": false, |
| "backend": "vllm" |
| }, |
| "policy": { |
| "parallelism": { |
| "n_init_replicas": 1, |
| "tp_size": 1, |
| "cp_size": 1, |
| "ep_size": 1, |
| "dp_shard_size": 8, |
| "pp_size": 1, |
| "pp_dynamic_shape": false, |
| "pp_micro_batch_size": 1, |
| "dp_replicate_size": 8 |
| }, |
| "model_name_or_path": "Qwen/Qwen3-4B-Instruct-2507", |
| "model_revision": null, |
| "model_max_length": 4096, |
| "model_gradient_checkpointing": true, |
| "lora": null, |
| "trainable_map": null, |
| "enable_liger_kernel": false |
| }, |
| "logging": { |
| "logger": [ |
| "console", |
| "wandb" |
| ], |
| "project_name": "cosmos_rl", |
| "experiment_name": "None", |
| "report_mfu": false |
| }, |
| "profiler": { |
| "enable_profiler": false, |
| "enable_nsys": false, |
| "sub_profiler_config": { |
| "do_profile": false, |
| "active_steps": 1, |
| "warmup_steps": 1, |
| "wait_steps": 1, |
| "rank_filter": [], |
| "record_shape": false, |
| "profile_memory": false, |
| "with_stack": false, |
| "with_modules": false |
| } |
| }, |
| "validation": { |
| "dataset": { |
| "name": "", |
| "subset": "", |
| "revision": "", |
| "split": [ |
| "" |
| ], |
| "test_size": null |
| }, |
| "temperature": 0.0, |
| "top_p": null, |
| "top_k": 1, |
| "repetition_penalty": 1.0, |
| "n_generation": 1, |
| "max_response_length": null, |
| "reward_function": { |
| "single_choice": 1.0 |
| } |
| }, |
| "redis": "12800", |
| "eth_ips": "10.0.0.171;172.17.0.1;100.64.0.5;10.50.83.240;10.50.154.240;169.254.66.66;172.20.0.10" |
| } |