nano-start_64_26m_f32 / training_config.json
fs90's picture
Upload folder using huggingface_hub
41202ce verified
{
"config": {
"model": {
"dtype": "f32",
"experts_per_tok": 1,
"hidden_size": 128,
"max_seq_len": 64,
"name": "nano-start",
"num_experts": 2,
"num_heads": 4,
"num_layers": 4,
"vocab_size": 100315
},
"trainer": {
"batch_size": 4,
"effective_batch_size": 8,
"gradient_accumulation": 2,
"learning_rate": 0.002,
"max_steps": 0,
"num_epochs": 20,
"seq_len": 64,
"total_steps": 260
}
},
"dataset_size": 6379,
"device": "Cuda(CudaDevice(DeviceId(1)))",
"error": null,
"run_dir": "./runs/20251205_144741",
"status": "completed"
}