| { | |
| "bias": true, | |
| "channel_size": 5000, | |
| "channels": [ | |
| "c0" | |
| ], | |
| "depth": 8, | |
| "dim": 256, | |
| "format": "lenepa_encoder", | |
| "format_version": 1, | |
| "is_causal": true, | |
| "mlp_ratio": 4.0, | |
| "nepa_final_norm": "ln", | |
| "nepa_patch_embed_cnn_dim": 192, | |
| "nepa_patch_embed_scalar_epsilon": 1.1, | |
| "nepa_patch_embed_scalar_hidden_dim": 32, | |
| "nepa_patch_embed_scalar_scales": [ | |
| 0.0001, | |
| 0.001, | |
| 0.01, | |
| 0.1, | |
| 1.0, | |
| 10.0, | |
| 100.0, | |
| 1000.0, | |
| 10000.0 | |
| ], | |
| "nepa_patch_embed_scalar_stats_mode": "patch_norm", | |
| "nepa_rep_pooling": "mean", | |
| "nepa_static_tokenizer": "conv_patch_embed", | |
| "norm_eps": 1e-06, | |
| "num_heads": 4, | |
| "num_patches": 625, | |
| "num_registers": 0, | |
| "patch_size": 8, | |
| "pos_embed_type": "none", | |
| "qk_norm_eps": 1e-06, | |
| "qkv_bias": true, | |
| "rope_base": 10000, | |
| "sampling_frequency": 1, | |
| "use_nepa": true, | |
| "use_qk_norm": true, | |
| "use_rope": true, | |
| "use_swiglu": true | |
| } | |