Spaces:
Sleeping
Sleeping
| vocab_size: 50257 | |
| d_model: 768 | |
| n_layer: 24 | |
| num_experts: 4 | |
| top_k: 1 | |
| d_ff: 2304 | |
| ssm_d_state: 16 | |
| ssm_expand: 2 | |
| load_balancing_coef: 0.0 | |
| router_z_loss_coef: 0.0 | |
| max_seq_len: 1024 | |
| dtype: "float16" | |
| use_cpu_offload: true # Offload to CPU during inference to save VRAM | |
| gradient_checkpointing: false | |
| checkpoint_ssm_layers: false | |
| use_flash_attention: true | |