{ "model_path": "/mnt/moe_ub/checkpoints/GLM-4.6V", "dataset_path": "/t1/moe_ub/data/training_data_3.jsonl", "max_length": 4096, "lora_rank": 64, "lora_alpha": 128, "learning_rate": 2e-05, "epochs": 2, "batch_size": 1, "gradient_accumulation": 8, "model_type": "GLM-4.6V 108B MoE (128 experts, 8 active) + LoRA", "training_method": "SFT with LoRA" }