gpu-goblin / tests /fixtures /sample_train.json
bharathtelu's picture
Deploy auto-tune UI + scripts (work-from-91d0cf0)
a9aa4ae verified
Raw
History Blame Contribute Delete
782 Bytes
{
"model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
"per_device_train_batch_size": 8,
"gradient_accumulation_steps": 4,
"max_seq_length": 4096,
"learning_rate": 0.0003,
"warmup_steps": 200,
"bf16": true,
"optim": "adamw_torch_fused",
"gradient_checkpointing": true,
"torch_compile": true,
"dataloader_num_workers": 4,
"dataloader_pin_memory": true,
"dataloader_prefetch_factor": 4,
"dataloader_persistent_workers": true,
"attn_implementation": "flash",
"num_train_epochs": 3,
"save_steps": 500,
"logging_steps": 25,
"output_dir": "./out",
"hub_token": "hf_jsonsamplehfabcdefghijklmnopqrs",
"checkpoint_uri": "s3://team-bucket/runs/qwen-lora-001/",
"env_vars": {
"HSA_FORCE_FINE_GRAIN_PCIE": "1",
"NCCL_MIN_NCHANNELS": "112"
}
}