AdamF92 commited on
Commit
9ec127d
·
verified ·
1 Parent(s): 736d513

Push model using huggingface_hub.

Browse files
Files changed (3) hide show
  1. README.md +12 -0
  2. config.json +52 -0
  3. model.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ pipeline_tag: text-generation
4
+ tags:
5
+ - model_hub_mixin
6
+ - pytorch_model_hub_mixin
7
+ ---
8
+
9
+ This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
10
+ - Code: [More Information Needed]
11
+ - Paper: [More Information Needed]
12
+ - Docs: [More Information Needed]
config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "encoder_config": {
3
+ "att_groups": 8,
4
+ "att_heads": 16,
5
+ "att_query_groups": 8,
6
+ "cross_att_type": "sqa",
7
+ "embed_dim": 512,
8
+ "ff_activation": "silu",
9
+ "ff_dim": 1536,
10
+ "ff_dropout": 0.0,
11
+ "num_layers": 21,
12
+ "rope_base": 100000,
13
+ "self_att_type": "sqa",
14
+ "seq_len": 8192,
15
+ "skip_memory_cross_attention": true,
16
+ "stm_size": 4096,
17
+ "use_attention_output_bias": false,
18
+ "use_flash_attention": true,
19
+ "use_gated": true,
20
+ "use_gated_attention": true,
21
+ "vocab_size": 65536
22
+ },
23
+ "memory_attention_config": {
24
+ "att_groups": 8,
25
+ "att_heads": 16,
26
+ "att_query_groups": 8,
27
+ "att_type": "sqa",
28
+ "embed_dim": 512,
29
+ "interlayer_att_groups": 8,
30
+ "interlayer_att_query_groups": 8,
31
+ "interlayer_att_type": "sqa",
32
+ "norm_type": "classic-rms",
33
+ "num_groups": 3,
34
+ "num_layers": 21,
35
+ "residual_gate_type": "elementwise",
36
+ "residual_per_slot_gate": true,
37
+ "seq_len": 8192,
38
+ "stm_size": 4096,
39
+ "use_flash_attention": false,
40
+ "use_gated_residual": true,
41
+ "use_tanh_residual_gate": false
42
+ },
43
+ "memory_attention_variant": "grouped-self-interlayer",
44
+ "tokenizer": null,
45
+ "tokenizer_config": {
46
+ "answer_token_id": 6,
47
+ "bos_token_id": 2,
48
+ "eos_token_id": 3,
49
+ "pad_token_id": 0,
50
+ "query_token_id": 5
51
+ }
52
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdbc46cfc72c45840f23b9751c0d953b960b7277301b0fd75636c9eeb830c06e
3
+ size 6099558592