Text Generation
PyTorch
hychiang's picture
Upload folder using huggingface_hub
77ca467 verified
{
"d_model": 4096,
"d_intermediate": 0,
"n_layer": 56,
"vocab_size": 256000,
"ssm_cfg": {
"layer": [
{
"W4A16": "W4A16QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A16": "W4A16QMamba2"
},
{
"W4A16": "W4A16QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A16": "W4A16QMamba2"
},
{
"W4A16": "W4A16QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A16": "W4A16QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A16": "W4A16QMamba2"
},
{
"W4A16": "W4A16QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A16": "W4A16QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A16": "W4A16QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A16": "W4A16QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A16": "W4A16QMamba2"
},
{
"W4A16": "W4A16QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A8": "W4A8QMamba2"
},
{
"W4A16": "W4A16QMamba2"
},
{
"W4A8": "W4A8QMamba2"
}
],
"ngroups": 8,
"chunk_size": 128
},
"attn_layer_idx": [],
"attn_cfg": {},
"rms_norm": true,
"residual_in_fp32": true,
"fused_add_norm": true,
"pad_vocab_size_multiple": 128,
"tie_embeddings": false,
"norm_cfg": {
"norm": [
{
"W4A16": "RMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A16": "RMSNorm"
},
{
"W4A16": "RMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A16": "RMSNorm"
},
{
"W4A16": "RMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A16": "RMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A16": "RMSNorm"
},
{
"W4A16": "RMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A16": "RMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A16": "RMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A16": "RMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A16": "RMSNorm"
},
{
"W4A16": "RMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A8": "QRMSNorm"
},
{
"W4A16": "RMSNorm"
},
{
"W4A8": "QRMSNorm"
}
]
},
"embedding_cfg": {
"layer": "W4O16Embedding"
},
"lm_head_cfg": {
"layer": "W4A8B16O16Linear"
},
"is_hybrid": true
}