ReactiveAI
/

RxT-Beta-Supervised

Text Generation

model_hub_mixin

pytorch_model_hub_mixin

🇪🇺 Region: EU

Model card Files Files and versions

AdamF92 commited on Feb 24

Commit

c3d88cd

·

verified ·

1 Parent(s): 0765942

Push model using huggingface_hub.

Files changed (2) hide show

config.json +44 -0
model.safetensors +1 -1

config.json CHANGED Viewed

@@ -1,4 +1,48 @@
 {
   "encoder_config": {
     "att_groups": 8,
     "att_heads": 16,

 {
+  "decoder_config": {
+    "att_groups": 4,
+    "att_heads": 16,
+    "att_query_groups": 8,
+    "cross_att_type": "sqa",
+    "dense_layer_dim": 1536,
+    "embed_dim": 512,
+    "ff_activation": "silu",
+    "ff_dim": 192,
+    "ff_dropout": 0.0,
+    "final_stateless_layers_config": [
+      "moe",
+      "moe"
+    ],
+    "head_norm_type": "rms_norm",
+    "moe_bias_mode": "global",
+    "moe_grouped_gemm": true,
+    "moe_shared_experts_bias_mode": "global",
+    "moe_top_k": 10,
+    "moe_use_cutlass_grouped_gemm": true,
+    "moe_use_weighted_shared_experts": false,
+    "num_experts": 384,
+    "num_layers": 21,
+    "num_shared_experts": 2,
+    "rope_base": 100000,
+    "router_amp": true,
+    "self_att_type": "sqa",
+    "seq_len": 8192,
+    "shared_expert_dim": 384,
+    "stateless_layers_config": [
+      "dense",
+      "moe"
+    ],
+    "stm_size": 4096,
+    "use_attention_output_bias": false,
+    "use_flash_attention": true,
+    "use_gated": true,
+    "use_gated_attention": true,
+    "use_gated_cross_attention": false,
+    "use_head_norm": true,
+    "use_moe": true,
+    "use_vectorized_moe": true,
+    "vocab_size": 65536
+  },
   "encoder_config": {
     "att_groups": 8,
     "att_heads": 16,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4879d67758d3c6ed9aa9a3891d5976a518acdb7c7be19782859649747cec733e
 size 6099558592

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1bbeee16809db34a0e38fc699e6cde268609721885caa7d2b1feab25bab27e1
 size 6099558592