Custom architecture: SsaiForCausalLM
hidden_size: 1024
intermediate_size: 4096
layers: 16
heads: 16
kv_heads: 4
max_seq_len: 160
rope_theta: 10000
qk_norm: True
local_mixer: True
neftune_alpha: 0.0