Mattimax commited on
Commit
d86ae0d
·
verified ·
1 Parent(s): 3374727

Upload 5 files

Browse files
Files changed (5) hide show
  1. config.json +31 -0
  2. model.safetensors +3 -0
  3. scales.json +278 -0
  4. tokenizer.json +0 -0
  5. train_stats.json +0 -0
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "tiny-gpt",
3
+ "vocab_size": 1920,
4
+ "n_layers": 6,
5
+ "n_heads": 6,
6
+ "d_model": 240,
7
+ "d_ff": 960,
8
+ "n_ctx": 64,
9
+ "dropout": 0.0,
10
+ "tie_word_embeddings": true,
11
+ "special_tokens": {
12
+ "pad_token": "<PAD>",
13
+ "bos_token": "<BOS>",
14
+ "eos_token": "<EOS>",
15
+ "sep_token": "<SEP>",
16
+ "unk_token": "<UNK>",
17
+ "ids": {
18
+ "pad": 0,
19
+ "bos": 1,
20
+ "eos": 2,
21
+ "sep": 3,
22
+ "unk": 4
23
+ }
24
+ },
25
+ "quantization": {
26
+ "default": "int8",
27
+ "embedding_bits": 18,
28
+ "layernorm_bits": 24,
29
+ "format": "safetensors+scales"
30
+ }
31
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ac71d1ac78ca8be0cbbd6e2783d0a781173c16c38502b7be8c8d65dae12934c
3
+ size 6553752
scales.json ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tok_emb.weight": {
3
+ "scale": 3.5722721575251916e-05,
4
+ "nbits": 18,
5
+ "dtype": "int32",
6
+ "emulated": true
7
+ },
8
+ "pos_emb.weight": {
9
+ "scale": 3.314935944296658e-05,
10
+ "nbits": 18,
11
+ "dtype": "int32",
12
+ "emulated": true
13
+ },
14
+ "blocks.0.norm1.weight": {
15
+ "scale": 1.3538360433878532e-07,
16
+ "nbits": 24,
17
+ "dtype": "int32",
18
+ "emulated": true
19
+ },
20
+ "blocks.0.attn.mask": {
21
+ "scale": 0.007874015826771653,
22
+ "nbits": 8,
23
+ "dtype": "int8",
24
+ "emulated": false
25
+ },
26
+ "blocks.0.attn.W_qkv.weight": {
27
+ "scale": 0.0013009633219017568,
28
+ "nbits": 8,
29
+ "dtype": "int8",
30
+ "emulated": false
31
+ },
32
+ "blocks.0.attn.W_o.weight": {
33
+ "scale": 0.0009806638006958249,
34
+ "nbits": 8,
35
+ "dtype": "int8",
36
+ "emulated": false
37
+ },
38
+ "blocks.0.norm2.weight": {
39
+ "scale": 1.3978162281289483e-07,
40
+ "nbits": 24,
41
+ "dtype": "int32",
42
+ "emulated": true
43
+ },
44
+ "blocks.0.ff.fc1.weight": {
45
+ "scale": 0.0012522846309088159,
46
+ "nbits": 8,
47
+ "dtype": "int8",
48
+ "emulated": false
49
+ },
50
+ "blocks.0.ff.fc2.weight": {
51
+ "scale": 0.0008591893586456118,
52
+ "nbits": 8,
53
+ "dtype": "int8",
54
+ "emulated": false
55
+ },
56
+ "blocks.1.norm1.weight": {
57
+ "scale": 1.3831941095351961e-07,
58
+ "nbits": 24,
59
+ "dtype": "int32",
60
+ "emulated": true
61
+ },
62
+ "blocks.1.attn.mask": {
63
+ "scale": 0.007874015826771653,
64
+ "nbits": 8,
65
+ "dtype": "int8",
66
+ "emulated": false
67
+ },
68
+ "blocks.1.attn.W_qkv.weight": {
69
+ "scale": 0.001282494329923795,
70
+ "nbits": 8,
71
+ "dtype": "int8",
72
+ "emulated": false
73
+ },
74
+ "blocks.1.attn.W_o.weight": {
75
+ "scale": 0.0011245226614926556,
76
+ "nbits": 8,
77
+ "dtype": "int8",
78
+ "emulated": false
79
+ },
80
+ "blocks.1.norm2.weight": {
81
+ "scale": 1.4735743036467565e-07,
82
+ "nbits": 24,
83
+ "dtype": "int32",
84
+ "emulated": true
85
+ },
86
+ "blocks.1.ff.fc1.weight": {
87
+ "scale": 0.001335447659535746,
88
+ "nbits": 8,
89
+ "dtype": "int8",
90
+ "emulated": false
91
+ },
92
+ "blocks.1.ff.fc2.weight": {
93
+ "scale": 0.0009407425338697058,
94
+ "nbits": 8,
95
+ "dtype": "int8",
96
+ "emulated": false
97
+ },
98
+ "blocks.2.norm1.weight": {
99
+ "scale": 1.4124889373715176e-07,
100
+ "nbits": 24,
101
+ "dtype": "int32",
102
+ "emulated": true
103
+ },
104
+ "blocks.2.attn.mask": {
105
+ "scale": 0.007874015826771653,
106
+ "nbits": 8,
107
+ "dtype": "int8",
108
+ "emulated": false
109
+ },
110
+ "blocks.2.attn.W_qkv.weight": {
111
+ "scale": 0.001387487442367734,
112
+ "nbits": 8,
113
+ "dtype": "int8",
114
+ "emulated": false
115
+ },
116
+ "blocks.2.attn.W_o.weight": {
117
+ "scale": 0.0012036952295510599,
118
+ "nbits": 8,
119
+ "dtype": "int8",
120
+ "emulated": false
121
+ },
122
+ "blocks.2.norm2.weight": {
123
+ "scale": 1.4793962073005056e-07,
124
+ "nbits": 24,
125
+ "dtype": "int32",
126
+ "emulated": true
127
+ },
128
+ "blocks.2.ff.fc1.weight": {
129
+ "scale": 0.0015438962488531877,
130
+ "nbits": 8,
131
+ "dtype": "int8",
132
+ "emulated": false
133
+ },
134
+ "blocks.2.ff.fc2.weight": {
135
+ "scale": 0.0010037684617888083,
136
+ "nbits": 8,
137
+ "dtype": "int8",
138
+ "emulated": false
139
+ },
140
+ "blocks.3.norm1.weight": {
141
+ "scale": 1.3520645180278738e-07,
142
+ "nbits": 24,
143
+ "dtype": "int32",
144
+ "emulated": true
145
+ },
146
+ "blocks.3.attn.mask": {
147
+ "scale": 0.007874015826771653,
148
+ "nbits": 8,
149
+ "dtype": "int8",
150
+ "emulated": false
151
+ },
152
+ "blocks.3.attn.W_qkv.weight": {
153
+ "scale": 0.0011730166719523753,
154
+ "nbits": 8,
155
+ "dtype": "int8",
156
+ "emulated": false
157
+ },
158
+ "blocks.3.attn.W_o.weight": {
159
+ "scale": 0.0010373295140886681,
160
+ "nbits": 8,
161
+ "dtype": "int8",
162
+ "emulated": false
163
+ },
164
+ "blocks.3.norm2.weight": {
165
+ "scale": 1.3207615540723584e-07,
166
+ "nbits": 24,
167
+ "dtype": "int32",
168
+ "emulated": true
169
+ },
170
+ "blocks.3.ff.fc1.weight": {
171
+ "scale": 0.0011169617888365016,
172
+ "nbits": 8,
173
+ "dtype": "int8",
174
+ "emulated": false
175
+ },
176
+ "blocks.3.ff.fc2.weight": {
177
+ "scale": 0.0008502002038224287,
178
+ "nbits": 8,
179
+ "dtype": "int8",
180
+ "emulated": false
181
+ },
182
+ "blocks.4.norm1.weight": {
183
+ "scale": 1.3003884439983394e-07,
184
+ "nbits": 24,
185
+ "dtype": "int32",
186
+ "emulated": true
187
+ },
188
+ "blocks.4.attn.mask": {
189
+ "scale": 0.007874015826771653,
190
+ "nbits": 8,
191
+ "dtype": "int8",
192
+ "emulated": false
193
+ },
194
+ "blocks.4.attn.W_qkv.weight": {
195
+ "scale": 0.001148089610578582,
196
+ "nbits": 8,
197
+ "dtype": "int8",
198
+ "emulated": false
199
+ },
200
+ "blocks.4.attn.W_o.weight": {
201
+ "scale": 0.0010316128654612143,
202
+ "nbits": 8,
203
+ "dtype": "int8",
204
+ "emulated": false
205
+ },
206
+ "blocks.4.norm2.weight": {
207
+ "scale": 1.3064186788070484e-07,
208
+ "nbits": 24,
209
+ "dtype": "int32",
210
+ "emulated": true
211
+ },
212
+ "blocks.4.ff.fc1.weight": {
213
+ "scale": 0.0011656039895843145,
214
+ "nbits": 8,
215
+ "dtype": "int8",
216
+ "emulated": false
217
+ },
218
+ "blocks.4.ff.fc2.weight": {
219
+ "scale": 0.0006925634595222173,
220
+ "nbits": 8,
221
+ "dtype": "int8",
222
+ "emulated": false
223
+ },
224
+ "blocks.5.norm1.weight": {
225
+ "scale": 1.302203312470367e-07,
226
+ "nbits": 24,
227
+ "dtype": "int32",
228
+ "emulated": true
229
+ },
230
+ "blocks.5.attn.mask": {
231
+ "scale": 0.007874015826771653,
232
+ "nbits": 8,
233
+ "dtype": "int8",
234
+ "emulated": false
235
+ },
236
+ "blocks.5.attn.W_qkv.weight": {
237
+ "scale": 0.0012216476088303093,
238
+ "nbits": 8,
239
+ "dtype": "int8",
240
+ "emulated": false
241
+ },
242
+ "blocks.5.attn.W_o.weight": {
243
+ "scale": 0.0009262990260556739,
244
+ "nbits": 8,
245
+ "dtype": "int8",
246
+ "emulated": false
247
+ },
248
+ "blocks.5.norm2.weight": {
249
+ "scale": 1.308989564840049e-07,
250
+ "nbits": 24,
251
+ "dtype": "int32",
252
+ "emulated": true
253
+ },
254
+ "blocks.5.ff.fc1.weight": {
255
+ "scale": 0.0009419608504622752,
256
+ "nbits": 8,
257
+ "dtype": "int8",
258
+ "emulated": false
259
+ },
260
+ "blocks.5.ff.fc2.weight": {
261
+ "scale": 0.0005326317604001864,
262
+ "nbits": 8,
263
+ "dtype": "int8",
264
+ "emulated": false
265
+ },
266
+ "norm_f.weight": {
267
+ "scale": 1.1641727667871719e-07,
268
+ "nbits": 24,
269
+ "dtype": "int32",
270
+ "emulated": true
271
+ },
272
+ "lm_head.weight": {
273
+ "scale": 0.036867817634565696,
274
+ "nbits": 8,
275
+ "dtype": "int8",
276
+ "emulated": false
277
+ }
278
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
train_stats.json ADDED
The diff for this file is too large to render. See raw diff