| { | |
| "model_path": "/mnt/moe_ub/checkpoints/GLM-4.6V", | |
| "dataset_path": "/t1/moe_ub/data/training_data_3.jsonl", | |
| "max_length": 4096, | |
| "lora_rank": 64, | |
| "lora_alpha": 128, | |
| "learning_rate": 2e-05, | |
| "epochs": 2, | |
| "batch_size": 1, | |
| "gradient_accumulation": 8, | |
| "model_type": "GLM-4.6V 108B MoE (128 experts, 8 active) + LoRA", | |
| "training_method": "SFT with LoRA" | |
| } |