Push model using huggingface_hub.

Files changed (3) hide show

README.md ADDED Viewed

+---
+license: apache-2.0
+pipeline_tag: text-generation
+tags:
+- model_hub_mixin
+- pytorch_model_hub_mixin
+---
+This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
+- Code: [More Information Needed]
+- Paper: [More Information Needed]
+- Docs: [More Information Needed]

config.json ADDED Viewed

+{
+  "encoder_config": {
+    "att_groups": 8,
+    "att_heads": 16,
+    "att_query_groups": 8,
+    "cross_att_type": "sqa",
+    "embed_dim": 512,
+    "ff_activation": "silu",
+    "ff_dim": 1536,
+    "ff_dropout": 0.0,
+    "num_layers": 21,
+    "rope_base": 100000,
+    "self_att_type": "sqa",
+    "seq_len": 8192,
+    "skip_memory_cross_attention": true,
+    "stm_size": 4096,
+    "use_attention_output_bias": false,
+    "use_flash_attention": true,
+    "use_gated": true,
+    "use_gated_attention": true,
+    "vocab_size": 65536
+  },
+  "memory_attention_config": {
+    "att_groups": 8,
+    "att_heads": 16,
+    "att_query_groups": 8,
+    "att_type": "sqa",
+    "embed_dim": 512,
+    "interlayer_att_groups": 8,
+    "interlayer_att_query_groups": 8,
+    "interlayer_att_type": "sqa",
+    "norm_type": "classic-rms",
+    "num_groups": 3,
+    "num_layers": 21,
+    "residual_gate_type": "elementwise",
+    "residual_per_slot_gate": true,
+    "seq_len": 8192,
+    "stm_size": 4096,
+    "use_flash_attention": false,
+    "use_gated_residual": true,
+    "use_tanh_residual_gate": false
+  },
+  "memory_attention_variant": "grouped-self-interlayer",
+  "tokenizer": null,
+  "tokenizer_config": {
+    "answer_token_id": 6,
+    "bos_token_id": 2,
+    "eos_token_id": 3,
+    "pad_token_id": 0,
+    "query_token_id": 5
+  }
+}

model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:cdbc46cfc72c45840f23b9751c0d953b960b7277301b0fd75636c9eeb830c06e
+size 6099558592