alibi_2_4_256_fla / model.txt
Lanni-ni's picture
add remote code + model files
efb516c verified
AlibiForCausalLM(
(emb): Embedding(50277, 256)
(layers): ModuleList(
(0-1): 2 x TransformerBlock(
(attn_norm): RMSNorm(256, eps=1e-06)
(attn): Attention(
(q_proj): Linear(in_features=256, out_features=256, bias=False)
(k_proj): Linear(in_features=256, out_features=256, bias=False)
(v_proj): Linear(in_features=256, out_features=256, bias=False)
(o_proj): Linear(in_features=256, out_features=256, bias=False)
)
(mlp_norm): RMSNorm(256, eps=1e-06)
(mlp): TransformerMLP(
(gate_proj): Linear(in_features=256, out_features=1536, bias=False)
(down_proj): Linear(in_features=768, out_features=256, bias=False)
)
)
)
(norm): RMSNorm(256, eps=1e-06)
(lm_head): Linear(in_features=256, out_features=50277, bias=False)
)