File size: 371 Bytes
ac7471c
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
{
  "model_path": "/mnt/moe_ub/checkpoints/GLM-4.6V",
  "dataset_path": "/t1/moe_ub/data/training_data_3.jsonl",
  "max_length": 4096,
  "lora_rank": 64,
  "lora_alpha": 128,
  "learning_rate": 2e-05,
  "epochs": 2,
  "batch_size": 1,
  "gradient_accumulation": 8,
  "model_type": "GLM-4.6V 108B MoE (128 experts, 8 active) + LoRA",
  "training_method": "SFT with LoRA"
}