Pomilon
Deploy Aetheris to HF Space
1df0e33
vocab_size: 50257
d_model: 128
n_layer: 4
num_experts: 4
top_k: 1
d_ff: 384
ssm_d_state: 8
ssm_expand: 2
load_balancing_coef: 0.01
router_z_loss_coef: 0.001
max_seq_len: 128
dtype: "float32" # Use float32 for debugging on CPU
use_cpu_offload: false
gradient_checkpointing: false
checkpoint_ssm_layers: false
use_flash_attention: false