luciaquirke commited on
Commit
edb8c21
·
verified ·
1 Parent(s): 6974507

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -6,25 +6,29 @@
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 0,
8
  "classifier_dropout": 0.1,
9
- "dtype": "float32",
10
  "eos_token_id": 2,
11
  "hidden_act": "gelu",
12
  "hidden_dropout": 0.0,
13
  "hidden_size": 4096,
14
  "initializer_range": 0.02,
15
  "intermediate_size": 16384,
 
16
  "layer_norm_eps": 1e-05,
17
  "max_position_embeddings": 2048,
18
  "model_type": "gpt_neox",
19
  "num_attention_heads": 32,
20
  "num_hidden_layers": 32,
 
21
  "partial_rotary_factor": 0.25,
22
- "rope_scaling": null,
 
 
 
 
23
  "rope_theta": 10000.0,
24
- "rotary_emb_base": 10000.0,
25
- "rotary_pct": 0.25,
26
  "tie_word_embeddings": false,
27
- "transformers_version": "4.56.2",
28
  "use_cache": true,
29
  "use_parallel_residual": true,
30
  "vocab_size": 50304
 
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 0,
8
  "classifier_dropout": 0.1,
9
+ "dtype": "bfloat16",
10
  "eos_token_id": 2,
11
  "hidden_act": "gelu",
12
  "hidden_dropout": 0.0,
13
  "hidden_size": 4096,
14
  "initializer_range": 0.02,
15
  "intermediate_size": 16384,
16
+ "is_decoder": false,
17
  "layer_norm_eps": 1e-05,
18
  "max_position_embeddings": 2048,
19
  "model_type": "gpt_neox",
20
  "num_attention_heads": 32,
21
  "num_hidden_layers": 32,
22
+ "pad_token_id": null,
23
  "partial_rotary_factor": 0.25,
24
+ "rope_parameters": {
25
+ "partial_rotary_factor": 0.25,
26
+ "rope_theta": 10000.0,
27
+ "rope_type": "default"
28
+ },
29
  "rope_theta": 10000.0,
 
 
30
  "tie_word_embeddings": false,
31
+ "transformers_version": "5.3.0",
32
  "use_cache": true,
33
  "use_parallel_residual": true,
34
  "vocab_size": 50304
generation_config.json CHANGED
@@ -2,5 +2,8 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.56.2"
 
 
 
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "eos_token_id": 2,
5
+ "output_attentions": false,
6
+ "output_hidden_states": false,
7
+ "transformers_version": "5.3.0",
8
+ "use_cache": true
9
  }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a66d6f33508c8be64a79196e4f657fedfbe56e57cca43c1a1e344d122298d58c
3
+ size 13712553448
tokenizer.json CHANGED
@@ -266,7 +266,7 @@
266
  },
267
  "decoder": {
268
  "type": "ByteLevel",
269
- "add_prefix_space": false,
270
  "trim_offsets": true,
271
  "use_regex": true
272
  },
@@ -274,8 +274,8 @@
274
  "type": "BPE",
275
  "dropout": null,
276
  "unk_token": null,
277
- "continuing_subword_prefix": null,
278
- "end_of_word_suffix": null,
279
  "fuse_unk": false,
280
  "byte_fallback": false,
281
  "ignore_merges": false,
 
266
  },
267
  "decoder": {
268
  "type": "ByteLevel",
269
+ "add_prefix_space": true,
270
  "trim_offsets": true,
271
  "use_regex": true
272
  },
 
274
  "type": "BPE",
275
  "dropout": null,
276
  "unk_token": null,
277
+ "continuing_subword_prefix": "",
278
+ "end_of_word_suffix": "",
279
  "fuse_unk": false,
280
  "byte_fallback": false,
281
  "ignore_merges": false,
tokenizer_config.json CHANGED
@@ -1,215 +1,14 @@
1
  {
2
- "add_bos_token": false,
3
- "add_eos_token": false,
4
  "add_prefix_space": false,
5
- "added_tokens_decoder": {
6
- "0": {
7
- "content": "<|endoftext|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "1": {
15
- "content": "<|padding|>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": true
21
- },
22
- "50254": {
23
- "content": " ",
24
- "lstrip": false,
25
- "normalized": true,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": false
29
- },
30
- "50255": {
31
- "content": " ",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false,
36
- "special": false
37
- },
38
- "50256": {
39
- "content": " ",
40
- "lstrip": false,
41
- "normalized": true,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": false
45
- },
46
- "50257": {
47
- "content": " ",
48
- "lstrip": false,
49
- "normalized": true,
50
- "rstrip": false,
51
- "single_word": false,
52
- "special": false
53
- },
54
- "50258": {
55
- "content": " ",
56
- "lstrip": false,
57
- "normalized": true,
58
- "rstrip": false,
59
- "single_word": false,
60
- "special": false
61
- },
62
- "50259": {
63
- "content": " ",
64
- "lstrip": false,
65
- "normalized": true,
66
- "rstrip": false,
67
- "single_word": false,
68
- "special": false
69
- },
70
- "50260": {
71
- "content": " ",
72
- "lstrip": false,
73
- "normalized": true,
74
- "rstrip": false,
75
- "single_word": false,
76
- "special": false
77
- },
78
- "50261": {
79
- "content": " ",
80
- "lstrip": false,
81
- "normalized": true,
82
- "rstrip": false,
83
- "single_word": false,
84
- "special": false
85
- },
86
- "50262": {
87
- "content": " ",
88
- "lstrip": false,
89
- "normalized": true,
90
- "rstrip": false,
91
- "single_word": false,
92
- "special": false
93
- },
94
- "50263": {
95
- "content": " ",
96
- "lstrip": false,
97
- "normalized": true,
98
- "rstrip": false,
99
- "single_word": false,
100
- "special": false
101
- },
102
- "50264": {
103
- "content": " ",
104
- "lstrip": false,
105
- "normalized": true,
106
- "rstrip": false,
107
- "single_word": false,
108
- "special": false
109
- },
110
- "50265": {
111
- "content": " ",
112
- "lstrip": false,
113
- "normalized": true,
114
- "rstrip": false,
115
- "single_word": false,
116
- "special": false
117
- },
118
- "50266": {
119
- "content": " ",
120
- "lstrip": false,
121
- "normalized": true,
122
- "rstrip": false,
123
- "single_word": false,
124
- "special": false
125
- },
126
- "50267": {
127
- "content": " ",
128
- "lstrip": false,
129
- "normalized": true,
130
- "rstrip": false,
131
- "single_word": false,
132
- "special": false
133
- },
134
- "50268": {
135
- "content": " ",
136
- "lstrip": false,
137
- "normalized": true,
138
- "rstrip": false,
139
- "single_word": false,
140
- "special": false
141
- },
142
- "50269": {
143
- "content": " ",
144
- "lstrip": false,
145
- "normalized": true,
146
- "rstrip": false,
147
- "single_word": false,
148
- "special": false
149
- },
150
- "50270": {
151
- "content": " ",
152
- "lstrip": false,
153
- "normalized": true,
154
- "rstrip": false,
155
- "single_word": false,
156
- "special": false
157
- },
158
- "50271": {
159
- "content": " ",
160
- "lstrip": false,
161
- "normalized": true,
162
- "rstrip": false,
163
- "single_word": false,
164
- "special": false
165
- },
166
- "50272": {
167
- "content": " ",
168
- "lstrip": false,
169
- "normalized": true,
170
- "rstrip": false,
171
- "single_word": false,
172
- "special": false
173
- },
174
- "50273": {
175
- "content": " ",
176
- "lstrip": false,
177
- "normalized": true,
178
- "rstrip": false,
179
- "single_word": false,
180
- "special": false
181
- },
182
- "50274": {
183
- "content": " ",
184
- "lstrip": false,
185
- "normalized": true,
186
- "rstrip": false,
187
- "single_word": false,
188
- "special": false
189
- },
190
- "50275": {
191
- "content": " ",
192
- "lstrip": false,
193
- "normalized": true,
194
- "rstrip": false,
195
- "single_word": false,
196
- "special": false
197
- },
198
- "50276": {
199
- "content": " ",
200
- "lstrip": false,
201
- "normalized": true,
202
- "rstrip": false,
203
- "single_word": false,
204
- "special": false
205
- }
206
- },
207
  "bos_token": "<|endoftext|>",
208
  "clean_up_tokenization_spaces": false,
209
  "eos_token": "<|endoftext|>",
210
- "extra_special_tokens": {},
 
211
  "model_max_length": 1000000000000000019884624838656,
212
  "pad_token": null,
213
  "tokenizer_class": "GPTNeoXTokenizer",
 
214
  "unk_token": "<|endoftext|>"
215
  }
 
1
  {
 
 
2
  "add_prefix_space": false,
3
+ "backend": "tokenizers",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "bos_token": "<|endoftext|>",
5
  "clean_up_tokenization_spaces": false,
6
  "eos_token": "<|endoftext|>",
7
+ "errors": "replace",
8
+ "is_local": false,
9
  "model_max_length": 1000000000000000019884624838656,
10
  "pad_token": null,
11
  "tokenizer_class": "GPTNeoXTokenizer",
12
+ "trim_offsets": true,
13
  "unk_token": "<|endoftext|>"
14
  }