ReactiveAI
/

RxT-Beta-Decoder-iSFT

Text Generation

model_hub_mixin

pytorch_model_hub_mixin

🇪🇺 Region: EU

Model card Files Files and versions

AdamF92 commited on Feb 16

Commit

0e79b91

·

verified ·

1 Parent(s): c6a6132

In progress training - batch: 8192

Files changed (2) hide show

config.json +44 -0
model.safetensors +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "att_groups": 4,
+  "att_heads": 16,
+  "att_query_groups": 8,
+  "cross_att_type": "sqa",
+  "dense_layer_dim": 1536,
+  "embed_dim": 512,
+  "ff_activation": "silu",
+  "ff_dim": 192,
+  "ff_dropout": 0.0,
+  "final_stateless_layers_config": [
+    "moe",
+    "moe"
+  ],
+  "head_norm_type": "rms_norm",
+  "moe_bias_mode": "global",
+  "moe_grouped_gemm": true,
+  "moe_shared_experts_bias_mode": "global",
+  "moe_top_k": 10,
+  "moe_use_cutlass_grouped_gemm": true,
+  "moe_use_weighted_shared_experts": false,
+  "num_experts": 384,
+  "num_layers": 21,
+  "num_shared_experts": 2,
+  "rope_base": 100000,
+  "router_amp": true,
+  "self_att_type": "sqa",
+  "seq_len": 8192,
+  "shared_expert_dim": 384,
+  "stateless_layers_config": [
+    "dense",
+    "moe"
+  ],
+  "stm_size": 4096,
+  "use_attention_output_bias": false,
+  "use_flash_attention": true,
+  "use_gated": true,
+  "use_gated_attention": true,
+  "use_gated_cross_attention": false,
+  "use_head_norm": true,
+  "use_moe": true,
+  "use_vectorized_moe": true,
+  "vocab_size": 65536
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e3ad87cfc7fcc49e01bb9b649ffd2081d8f79aa1e200dd59bd6a2c24fb702cc
+size 5728244528