C10X commited on
Commit
f704f81
·
verified ·
1 Parent(s): 3a03c75

Upload 7 files

Browse files
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 2,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 512,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2048,
15
+ "layer_types": [
16
+ "sliding_attention",
17
+ "sliding_attention",
18
+ "sliding_attention",
19
+ "full_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "full_attention"
24
+ ],
25
+ "max_position_embeddings": 8192,
26
+ "max_window_layers": 7,
27
+ "model_type": "qwen3",
28
+ "num_attention_heads": 8,
29
+ "num_hidden_layers": 8,
30
+ "num_key_value_heads": 2,
31
+ "pad_token_id": 1,
32
+ "rms_norm_eps": 1e-06,
33
+ "rope_scaling": null,
34
+ "rope_theta": 10000.0,
35
+ "sliding_window": 512,
36
+ "tie_word_embeddings": true,
37
+ "transformers_version": "4.57.6",
38
+ "use_cache": false,
39
+ "use_sliding_window": true,
40
+ "vocab_size": 32768
41
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 1,
6
+ "use_cache": false
7
+ }
metadata.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "preset": "qwen3-falcon-h1-tiny-r-90m-8layer",
3
+ "family": "qwen3",
4
+ "source": "inline-preset",
5
+ "output_dir": "D:\\Qwen3-80m-tinystories-A\\workspace\\outputs\\qwen3-falcon-h1-tiny-r-90m-8layer",
6
+ "parameters": 47195648,
7
+ "vocab_size": 32768,
8
+ "hidden_size": 512,
9
+ "num_hidden_layers": 8,
10
+ "num_attention_heads": 8,
11
+ "num_key_value_heads": 2,
12
+ "rope_theta": 10000.0
13
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:915d11fa9c97f48368bc8deef9628d0641bddd5bdcc12fb08cfee0277d8368fe
3
+ size 94401136
special_tokens_map.json ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|pad|>",
4
+ ">>ABSTRACT<<",
5
+ ">>INTRODUCTION<<",
6
+ ">>SUMMARY<<",
7
+ ">>COMMENT<<",
8
+ ">>ANSWER<<",
9
+ ">>QUESTION<<",
10
+ ">>DOMAIN<<",
11
+ ">>PREFIX<<",
12
+ ">>SUFFIX<<",
13
+ ">>MIDDLE<<",
14
+ "<|finetune_right_pad_id|>",
15
+ "<|start_header_id|>",
16
+ "<|end_header_id|>",
17
+ "<|eom_id|>",
18
+ "<|eot_id|>",
19
+ "<|begin_of_text|>",
20
+ ">>TITLE<<",
21
+ "<tool_response>",
22
+ "</tool_response>",
23
+ "<tool_call>",
24
+ "</tool_call>",
25
+ "<schema>",
26
+ "</schema>",
27
+ "<scratch_pad>",
28
+ "</scratch_pad>",
29
+ "<thinking>",
30
+ "</thinking>",
31
+ "<explanation>",
32
+ "</explanation>",
33
+ "<file_sep>",
34
+ "<repo_name>",
35
+ "<|im_end|>",
36
+ "<|im_start|>",
37
+ "<|system|>",
38
+ "<|file_sep|>",
39
+ "<|repo_name|>",
40
+ "<|repo_tree|>",
41
+ ">>UNUSED_221<<",
42
+ ">>UNUSED_222<<",
43
+ ">>UNUSED_223<<",
44
+ ">>UNUSED_224<<",
45
+ ">>UNUSED_225<<",
46
+ ">>UNUSED_226<<",
47
+ ">>UNUSED_227<<",
48
+ ">>UNUSED_228<<",
49
+ ">>UNUSED_229<<",
50
+ ">>UNUSED_230<<",
51
+ ">>UNUSED_231<<",
52
+ ">>UNUSED_232<<",
53
+ ">>UNUSED_233<<",
54
+ ">>UNUSED_234<<",
55
+ ">>UNUSED_235<<",
56
+ ">>UNUSED_236<<",
57
+ ">>UNUSED_237<<",
58
+ ">>UNUSED_238<<",
59
+ ">>UNUSED_239<<",
60
+ ">>UNUSED_240<<",
61
+ ">>UNUSED_241<<",
62
+ ">>UNUSED_242<<",
63
+ ">>UNUSED_243<<",
64
+ ">>UNUSED_244<<",
65
+ ">>UNUSED_245<<",
66
+ ">>UNUSED_246<<",
67
+ ">>UNUSED_247<<",
68
+ ">>UNUSED_248<<",
69
+ ">>UNUSED_249<<",
70
+ ">>UNUSED_250<<",
71
+ ">>UNUSED_251<<",
72
+ ">>UNUSED_252<<",
73
+ ">>UNUSED_253<<",
74
+ ">>UNUSED_254<<",
75
+ ">>UNUSED_255<<",
76
+ ">>UNUSED_256<<",
77
+ ">>UNUSED_257<<",
78
+ ">>UNUSED_258<<",
79
+ ">>UNUSED_259<<",
80
+ ">>UNUSED_260<<",
81
+ ">>UNUSED_261<<",
82
+ ">>UNUSED_262<<",
83
+ ">>UNUSED_263<<",
84
+ ">>UNUSED_264<<",
85
+ ">>UNUSED_265<<",
86
+ ">>UNUSED_266<<",
87
+ ">>UNUSED_267<<",
88
+ ">>UNUSED_268<<",
89
+ ">>UNUSED_269<<",
90
+ ">>UNUSED_270<<",
91
+ ">>UNUSED_271<<",
92
+ ">>UNUSED_272<<",
93
+ ">>UNUSED_273<<",
94
+ ">>UNUSED_274<<",
95
+ ">>UNUSED_275<<",
96
+ ">>UNUSED_276<<",
97
+ ">>UNUSED_277<<",
98
+ ">>UNUSED_278<<",
99
+ ">>UNUSED_279<<",
100
+ ">>UNUSED_280<<",
101
+ ">>UNUSED_281<<",
102
+ ">>UNUSED_282<<",
103
+ ">>UNUSED_283<<",
104
+ ">>UNUSED_284<<",
105
+ ">>UNUSED_285<<",
106
+ ">>UNUSED_286<<",
107
+ ">>UNUSED_287<<",
108
+ ">>UNUSED_288<<",
109
+ ">>UNUSED_289<<",
110
+ ">>UNUSED_290<<",
111
+ ">>UNUSED_291<<",
112
+ ">>UNUSED_292<<",
113
+ ">>UNUSED_293<<",
114
+ ">>UNUSED_294<<",
115
+ ">>UNUSED_295<<",
116
+ ">>UNUSED_296<<",
117
+ ">>UNUSED_297<<",
118
+ ">>UNUSED_298<<",
119
+ ">>UNUSED_299<<",
120
+ ">>UNUSED_300<<",
121
+ ">>UNUSED_301<<",
122
+ ">>UNUSED_302<<",
123
+ ">>UNUSED_303<<",
124
+ ">>UNUSED_304<<",
125
+ ">>UNUSED_305<<",
126
+ ">>UNUSED_306<<",
127
+ ">>UNUSED_307<<",
128
+ ">>UNUSED_308<<",
129
+ ">>UNUSED_309<<",
130
+ ">>UNUSED_310<<",
131
+ ">>UNUSED_311<<",
132
+ ">>UNUSED_312<<",
133
+ ">>UNUSED_313<<",
134
+ ">>UNUSED_314<<",
135
+ ">>UNUSED_315<<",
136
+ ">>UNUSED_316<<",
137
+ ">>UNUSED_317<<",
138
+ ">>UNUSED_318<<",
139
+ ">>UNUSED_319<<",
140
+ ">>UNUSED_320<<",
141
+ ">>UNUSED_321<<",
142
+ ">>UNUSED_322<<",
143
+ ">>UNUSED_323<<",
144
+ ">>UNUSED_324<<",
145
+ ">>UNUSED_325<<",
146
+ ">>UNUSED_326<<",
147
+ ">>UNUSED_327<<",
148
+ ">>UNUSED_328<<",
149
+ ">>UNUSED_329<<",
150
+ ">>UNUSED_330<<",
151
+ ">>UNUSED_331<<",
152
+ ">>UNUSED_332<<",
153
+ ">>UNUSED_333<<",
154
+ ">>UNUSED_334<<",
155
+ ">>UNUSED_335<<",
156
+ ">>UNUSED_336<<",
157
+ ">>UNUSED_337<<",
158
+ ">>UNUSED_338<<",
159
+ ">>UNUSED_339<<",
160
+ ">>UNUSED_340<<",
161
+ ">>UNUSED_341<<",
162
+ ">>UNUSED_342<<",
163
+ ">>UNUSED_343<<",
164
+ ">>UNUSED_344<<",
165
+ ">>UNUSED_345<<",
166
+ ">>UNUSED_346<<",
167
+ ">>UNUSED_347<<",
168
+ ">>UNUSED_348<<",
169
+ ">>UNUSED_349<<",
170
+ ">>UNUSED_350<<",
171
+ ">>UNUSED_351<<",
172
+ ">>UNUSED_352<<",
173
+ ">>UNUSED_353<<",
174
+ ">>UNUSED_354<<",
175
+ ">>UNUSED_355<<",
176
+ ">>UNUSED_356<<",
177
+ ">>UNUSED_357<<",
178
+ ">>UNUSED_358<<",
179
+ ">>UNUSED_359<<",
180
+ ">>UNUSED_360<<",
181
+ ">>UNUSED_361<<",
182
+ ">>UNUSED_362<<",
183
+ ">>UNUSED_363<<",
184
+ ">>UNUSED_364<<",
185
+ ">>UNUSED_365<<",
186
+ ">>UNUSED_366<<",
187
+ ">>UNUSED_367<<",
188
+ ">>UNUSED_368<<",
189
+ ">>UNUSED_369<<",
190
+ ">>UNUSED_370<<",
191
+ ">>UNUSED_371<<",
192
+ ">>UNUSED_372<<",
193
+ ">>UNUSED_373<<",
194
+ ">>UNUSED_374<<",
195
+ ">>UNUSED_375<<",
196
+ ">>UNUSED_376<<",
197
+ ">>UNUSED_377<<",
198
+ ">>UNUSED_378<<",
199
+ ">>UNUSED_379<<",
200
+ ">>UNUSED_380<<",
201
+ ">>UNUSED_381<<",
202
+ ">>UNUSED_382<<",
203
+ ">>UNUSED_383<<",
204
+ ">>UNUSED_384<<",
205
+ ">>UNUSED_385<<",
206
+ ">>UNUSED_386<<",
207
+ ">>UNUSED_387<<",
208
+ ">>UNUSED_388<<",
209
+ ">>UNUSED_389<<",
210
+ ">>UNUSED_390<<",
211
+ ">>UNUSED_391<<",
212
+ ">>UNUSED_392<<",
213
+ ">>UNUSED_393<<",
214
+ ">>UNUSED_394<<",
215
+ ">>UNUSED_395<<",
216
+ ">>UNUSED_396<<",
217
+ ">>UNUSED_397<<",
218
+ ">>UNUSED_398<<",
219
+ ">>UNUSED_399<<",
220
+ ">>UNUSED_400<<",
221
+ ">>UNUSED_401<<",
222
+ ">>UNUSED_402<<",
223
+ ">>UNUSED_403<<",
224
+ ">>UNUSED_404<<",
225
+ ">>UNUSED_405<<",
226
+ ">>UNUSED_406<<",
227
+ ">>UNUSED_407<<",
228
+ ">>UNUSED_408<<",
229
+ ">>UNUSED_409<<",
230
+ ">>UNUSED_410<<",
231
+ ">>UNUSED_411<<",
232
+ ">>UNUSED_412<<",
233
+ ">>UNUSED_413<<",
234
+ ">>UNUSED_414<<",
235
+ ">>UNUSED_415<<",
236
+ ">>UNUSED_416<<",
237
+ ">>UNUSED_417<<",
238
+ ">>UNUSED_418<<",
239
+ ">>UNUSED_419<<",
240
+ ">>UNUSED_420<<",
241
+ ">>UNUSED_421<<",
242
+ ">>UNUSED_422<<",
243
+ ">>UNUSED_423<<",
244
+ ">>UNUSED_424<<",
245
+ ">>UNUSED_425<<",
246
+ ">>UNUSED_426<<",
247
+ ">>UNUSED_427<<",
248
+ ">>UNUSED_428<<",
249
+ ">>UNUSED_429<<",
250
+ ">>UNUSED_430<<",
251
+ ">>UNUSED_431<<",
252
+ ">>UNUSED_432<<",
253
+ ">>UNUSED_433<<",
254
+ ">>UNUSED_434<<",
255
+ ">>UNUSED_435<<",
256
+ ">>UNUSED_436<<",
257
+ ">>UNUSED_437<<",
258
+ ">>UNUSED_438<<",
259
+ ">>UNUSED_439<<",
260
+ ">>UNUSED_440<<",
261
+ ">>UNUSED_441<<",
262
+ ">>UNUSED_442<<",
263
+ ">>UNUSED_443<<",
264
+ ">>UNUSED_444<<",
265
+ ">>UNUSED_445<<",
266
+ ">>UNUSED_446<<",
267
+ ">>UNUSED_447<<",
268
+ ">>UNUSED_448<<",
269
+ ">>UNUSED_449<<",
270
+ ">>UNUSED_450<<",
271
+ ">>UNUSED_451<<",
272
+ ">>UNUSED_452<<",
273
+ ">>UNUSED_453<<",
274
+ ">>UNUSED_454<<",
275
+ ">>UNUSED_455<<",
276
+ ">>UNUSED_456<<",
277
+ ">>UNUSED_457<<",
278
+ ">>UNUSED_458<<",
279
+ ">>UNUSED_459<<",
280
+ ">>UNUSED_460<<",
281
+ ">>UNUSED_461<<",
282
+ ">>UNUSED_462<<",
283
+ ">>UNUSED_463<<",
284
+ ">>UNUSED_464<<",
285
+ ">>UNUSED_465<<",
286
+ ">>UNUSED_466<<",
287
+ ">>UNUSED_467<<",
288
+ ">>UNUSED_468<<",
289
+ ">>UNUSED_469<<",
290
+ ">>UNUSED_470<<",
291
+ ">>UNUSED_471<<",
292
+ ">>UNUSED_472<<",
293
+ ">>UNUSED_473<<",
294
+ ">>UNUSED_474<<",
295
+ ">>UNUSED_475<<",
296
+ ">>UNUSED_476<<",
297
+ ">>UNUSED_477<<",
298
+ ">>UNUSED_478<<",
299
+ ">>UNUSED_479<<",
300
+ ">>UNUSED_480<<",
301
+ ">>UNUSED_481<<",
302
+ ">>UNUSED_482<<",
303
+ ">>UNUSED_483<<",
304
+ ">>UNUSED_484<<",
305
+ ">>UNUSED_485<<",
306
+ ">>UNUSED_486<<",
307
+ ">>UNUSED_487<<",
308
+ ">>UNUSED_488<<",
309
+ ">>UNUSED_489<<",
310
+ ">>UNUSED_490<<",
311
+ ">>UNUSED_491<<",
312
+ ">>UNUSED_492<<",
313
+ ">>UNUSED_493<<",
314
+ ">>UNUSED_494<<",
315
+ ">>UNUSED_495<<",
316
+ ">>UNUSED_496<<",
317
+ ">>UNUSED_497<<",
318
+ ">>UNUSED_498<<",
319
+ ">>UNUSED_499<<",
320
+ ">>UNUSED_500<<",
321
+ ">>UNUSED_501<<",
322
+ ">>UNUSED_502<<",
323
+ ">>UNUSED_503<<",
324
+ ">>UNUSED_504<<",
325
+ ">>UNUSED_505<<",
326
+ ">>UNUSED_506<<",
327
+ ">>UNUSED_507<<",
328
+ ">>UNUSED_508<<",
329
+ ">>UNUSED_509<<",
330
+ ">>UNUSED_510<<",
331
+ ">>UNUSED_511<<"
332
+ ],
333
+ "bos_token": {
334
+ "content": "<|begin_of_text|>",
335
+ "lstrip": false,
336
+ "normalized": false,
337
+ "rstrip": false,
338
+ "single_word": false
339
+ },
340
+ "eos_token": {
341
+ "content": "<|end_of_text|>",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false
346
+ },
347
+ "pad_token": {
348
+ "content": "<|pad|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false
353
+ }
354
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff