AnthonyPa57's picture
Push model using huggingface_hub.
ecfbea3 verified
raw
history blame contribute delete
545 Bytes
{
"args": {
"capacity_factor": 1.0,
"context_window": 512,
"d_ff": 256,
"device": "cuda:0",
"dim": 256,
"dtype_str": "bfloat16",
"fp8_recipe": "tensorwise",
"impl": "grouped",
"k": 1,
"moe_type": "pytorch",
"moe_zloss_weight": 0.1,
"n_heads": 2,
"n_kv_heads": 2,
"n_layers": 1,
"n_regression_heads": 48,
"num_experts": 2,
"output_moe_weights": false,
"soft_cap": 20,
"theta": 10000.0,
"torch_compile": false,
"use_sparse": false,
"vocab_size": 30000
}
}