| { | |
| "epoch": 3, | |
| "train_bsz": 2, | |
| "eval_bsz": 4, | |
| "lr": 5e-06, | |
| "deepspeed": "configs/ds_stage2.json", | |
| "weight_decay": 0.01, | |
| "save_eval_step_ratio": 0.333333, | |
| "warmup_step_ratio": 0.1, | |
| "grad_checkpointing": true, | |
| "model": "llama3", | |
| "common": { | |
| "debug": false, | |
| "device": "0,1,2,3", | |
| "world_size": 4, | |
| "rank": 0, | |
| "master_address": "localhost", | |
| "master_port": 48541, | |
| "bf16": true, | |
| "wandb_project_name": "CREAM", | |
| "wandb_entity_name": "your_wandb_entity_name", | |
| "run_name": "train_llama_w_qwen_binary_sft-lora_None-ckpt_None-25-04-27-20_56_47", | |
| "output_dir": "outputs/sft/train_llama_w_qwen_binary_sft-lora_None-ckpt_None-25-04-27-20_56_47", | |
| "load_args_path": null | |
| }, | |
| "checkpoint": null, | |
| "train_stage": "sft", | |
| "dataset": { | |
| "name": "train_llama_w_qwen_binary_sft", | |
| "limit_size": null, | |
| "max_length": 4096 | |
| }, | |
| "lora": { | |
| "enable": false, | |
| "alpha": 64, | |
| "r": 32, | |
| "dropout": 0.1 | |
| }, | |
| "dpo": { | |
| "beta": 0.1, | |
| "method": "original" | |
| } | |
| } |