tensorlink-dev commited on
Commit
ab5fe31
·
verified ·
1 Parent(s): d84db49

Upload initial model version

Browse files
Files changed (2) hide show
  1. config.json +767 -0
  2. model.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,767 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "return_dict": true,
3
+ "torchscript": false,
4
+ "torch_dtype": null,
5
+ "use_bfloat16": false,
6
+ "tf_legacy_loss": false,
7
+ "pruned_heads": {},
8
+ "tie_word_embeddings": true,
9
+ "chunk_size_feed_forward": 0,
10
+ "is_encoder_decoder": false,
11
+ "is_decoder": true,
12
+ "cross_attention_hidden_size": null,
13
+ "add_cross_attention": false,
14
+ "tie_encoder_decoder": false,
15
+ "max_length": 20,
16
+ "min_length": 0,
17
+ "do_sample": false,
18
+ "early_stopping": false,
19
+ "num_beams": 1,
20
+ "num_beam_groups": 1,
21
+ "diversity_penalty": 0.0,
22
+ "temperature": 1.0,
23
+ "top_k": 50,
24
+ "top_p": 1.0,
25
+ "typical_p": 1.0,
26
+ "repetition_penalty": 1.0,
27
+ "length_penalty": 1.0,
28
+ "no_repeat_ngram_size": 0,
29
+ "encoder_no_repeat_ngram_size": 0,
30
+ "bad_words_ids": null,
31
+ "num_return_sequences": 1,
32
+ "output_scores": false,
33
+ "return_dict_in_generate": false,
34
+ "forced_bos_token_id": null,
35
+ "forced_eos_token_id": null,
36
+ "remove_invalid_values": false,
37
+ "exponential_decay_length_penalty": null,
38
+ "suppress_tokens": null,
39
+ "begin_suppress_tokens": null,
40
+ "architectures": null,
41
+ "finetuning_task": null,
42
+ "id2label": {
43
+ "0": "LABEL_0",
44
+ "1": "LABEL_1"
45
+ },
46
+ "label2id": {
47
+ "LABEL_0": 0,
48
+ "LABEL_1": 1
49
+ },
50
+ "tokenizer_class": null,
51
+ "prefix": null,
52
+ "bos_token_id": null,
53
+ "pad_token_id": null,
54
+ "eos_token_id": null,
55
+ "sep_token_id": null,
56
+ "task_specific_params": null,
57
+ "problem_type": null,
58
+ "_name_or_path": "",
59
+ "_attn_implementation_autoset": false,
60
+ "transformers_version": "4.51.3",
61
+ "target_dim": 1,
62
+ "static_dim": 0,
63
+ "dynamic_dim": 0,
64
+ "past_dynamic_dim": 0,
65
+ "static_cardinalities": null,
66
+ "dynamic_cardinalities": null,
67
+ "past_dynamic_cardinalities": null,
68
+ "static_embedding_dim": null,
69
+ "dynamic_embedding_dim": null,
70
+ "past_dynamic_embedding_dim": null,
71
+ "time_features": null,
72
+ "scaling": true,
73
+ "decoder_start_token_value": 0.0,
74
+ "feature_size": 1,
75
+ "context_length": 1024,
76
+ "prediction_length": 256,
77
+ "quantiles": [
78
+ 0.005,
79
+ 0.015,
80
+ 0.025,
81
+ 0.034999999999999996,
82
+ 0.045,
83
+ 0.055,
84
+ 0.065,
85
+ 0.07500000000000001,
86
+ 0.085,
87
+ 0.095,
88
+ 0.10500000000000001,
89
+ 0.115,
90
+ 0.125,
91
+ 0.135,
92
+ 0.14500000000000002,
93
+ 0.155,
94
+ 0.165,
95
+ 0.17500000000000002,
96
+ 0.185,
97
+ 0.195,
98
+ 0.20500000000000002,
99
+ 0.215,
100
+ 0.225,
101
+ 0.23500000000000001,
102
+ 0.245,
103
+ 0.255,
104
+ 0.265,
105
+ 0.275,
106
+ 0.28500000000000003,
107
+ 0.295,
108
+ 0.305,
109
+ 0.315,
110
+ 0.325,
111
+ 0.335,
112
+ 0.34500000000000003,
113
+ 0.35500000000000004,
114
+ 0.365,
115
+ 0.375,
116
+ 0.385,
117
+ 0.395,
118
+ 0.405,
119
+ 0.41500000000000004,
120
+ 0.425,
121
+ 0.435,
122
+ 0.445,
123
+ 0.455,
124
+ 0.465,
125
+ 0.47500000000000003,
126
+ 0.485,
127
+ 0.495,
128
+ 0.505,
129
+ 0.515,
130
+ 0.525,
131
+ 0.535,
132
+ 0.545,
133
+ 0.555,
134
+ 0.5650000000000001,
135
+ 0.5750000000000001,
136
+ 0.585,
137
+ 0.595,
138
+ 0.605,
139
+ 0.615,
140
+ 0.625,
141
+ 0.635,
142
+ 0.645,
143
+ 0.655,
144
+ 0.665,
145
+ 0.675,
146
+ 0.685,
147
+ 0.6950000000000001,
148
+ 0.7050000000000001,
149
+ 0.715,
150
+ 0.725,
151
+ 0.735,
152
+ 0.745,
153
+ 0.755,
154
+ 0.765,
155
+ 0.775,
156
+ 0.785,
157
+ 0.795,
158
+ 0.805,
159
+ 0.8150000000000001,
160
+ 0.8250000000000001,
161
+ 0.8350000000000001,
162
+ 0.845,
163
+ 0.855,
164
+ 0.865,
165
+ 0.875,
166
+ 0.885,
167
+ 0.895,
168
+ 0.905,
169
+ 0.915,
170
+ 0.925,
171
+ 0.935,
172
+ 0.9450000000000001,
173
+ 0.9550000000000001,
174
+ 0.965,
175
+ 0.975,
176
+ 0.985,
177
+ 0.995
178
+ ],
179
+ "output_token_lengths": 1,
180
+ "loss_type": "quantile",
181
+ "use_dynamic_features": false,
182
+ "use_static_features": false,
183
+ "autoregressive": true,
184
+ "gradient_checkpointing": true,
185
+ "model_type": "transformer",
186
+ "d_model": 768,
187
+ "hidden_dropout_prob": 0.1,
188
+ "max_position_embeddings": 4096,
189
+ "architecture": {
190
+ "layout": "decoder",
191
+ "num_encoder_layers": 0,
192
+ "num_decoder_layers": 16,
193
+ "share_weights": false
194
+ },
195
+ "value_embedding_config": {
196
+ "type": "value",
197
+ "dropout": 0.1,
198
+ "embedding_dim": null,
199
+ "kwargs": {
200
+ "feature_size": 1,
201
+ "d_model": 768
202
+ }
203
+ },
204
+ "positional_embedding_config": {
205
+ "type": "stacked_embedding",
206
+ "dropout": 0.1,
207
+ "embedding_dim": null,
208
+ "kwargs": {
209
+ "embedding_configs": [
210
+ {
211
+ "type": "fourier",
212
+ "args": {
213
+ "feature_size": 64
214
+ }
215
+ }
216
+ ],
217
+ "max_seq_len": 4096
218
+ }
219
+ },
220
+ "encoder_blocks": null,
221
+ "decoder_blocks": [
222
+ {
223
+ "block_type": "default_decoder",
224
+ "attention_config": {
225
+ "attention_type": "full",
226
+ "num_heads": 12,
227
+ "dropout": 0.1,
228
+ "bias": true,
229
+ "use_rope": true,
230
+ "use_alibi": true,
231
+ "rope_base": 10000,
232
+ "kwargs": {}
233
+ },
234
+ "cross_attention_config": null,
235
+ "ffn_config": {
236
+ "type": "standard",
237
+ "intermediate_size": 3072,
238
+ "activation": "gelu",
239
+ "dropout": 0.1,
240
+ "bias": true,
241
+ "num_experts": null,
242
+ "top_k": null,
243
+ "expert_intermediate_size": null,
244
+ "load_balancing_coef": 0.01,
245
+ "kwargs": {}
246
+ },
247
+ "norm_config": {
248
+ "norm_type": "layer",
249
+ "eps": 1e-05,
250
+ "kwargs": {}
251
+ },
252
+ "kwargs": {}
253
+ },
254
+ {
255
+ "block_type": "default_decoder",
256
+ "attention_config": {
257
+ "attention_type": "full",
258
+ "num_heads": 12,
259
+ "dropout": 0.1,
260
+ "bias": true,
261
+ "use_rope": true,
262
+ "use_alibi": true,
263
+ "rope_base": 10000,
264
+ "kwargs": {}
265
+ },
266
+ "cross_attention_config": null,
267
+ "ffn_config": {
268
+ "type": "standard",
269
+ "intermediate_size": 3072,
270
+ "activation": "gelu",
271
+ "dropout": 0.1,
272
+ "bias": true,
273
+ "num_experts": null,
274
+ "top_k": null,
275
+ "expert_intermediate_size": null,
276
+ "load_balancing_coef": 0.01,
277
+ "kwargs": {}
278
+ },
279
+ "norm_config": {
280
+ "norm_type": "layer",
281
+ "eps": 1e-05,
282
+ "kwargs": {}
283
+ },
284
+ "kwargs": {}
285
+ },
286
+ {
287
+ "block_type": "default_decoder",
288
+ "attention_config": {
289
+ "attention_type": "full",
290
+ "num_heads": 12,
291
+ "dropout": 0.1,
292
+ "bias": true,
293
+ "use_rope": true,
294
+ "use_alibi": true,
295
+ "rope_base": 10000,
296
+ "kwargs": {}
297
+ },
298
+ "cross_attention_config": null,
299
+ "ffn_config": {
300
+ "type": "standard",
301
+ "intermediate_size": 3072,
302
+ "activation": "gelu",
303
+ "dropout": 0.1,
304
+ "bias": true,
305
+ "num_experts": null,
306
+ "top_k": null,
307
+ "expert_intermediate_size": null,
308
+ "load_balancing_coef": 0.01,
309
+ "kwargs": {}
310
+ },
311
+ "norm_config": {
312
+ "norm_type": "layer",
313
+ "eps": 1e-05,
314
+ "kwargs": {}
315
+ },
316
+ "kwargs": {}
317
+ },
318
+ {
319
+ "block_type": "default_decoder",
320
+ "attention_config": {
321
+ "attention_type": "full",
322
+ "num_heads": 12,
323
+ "dropout": 0.1,
324
+ "bias": true,
325
+ "use_rope": true,
326
+ "use_alibi": true,
327
+ "rope_base": 10000,
328
+ "kwargs": {}
329
+ },
330
+ "cross_attention_config": null,
331
+ "ffn_config": {
332
+ "type": "standard",
333
+ "intermediate_size": 3072,
334
+ "activation": "gelu",
335
+ "dropout": 0.1,
336
+ "bias": true,
337
+ "num_experts": null,
338
+ "top_k": null,
339
+ "expert_intermediate_size": null,
340
+ "load_balancing_coef": 0.01,
341
+ "kwargs": {}
342
+ },
343
+ "norm_config": {
344
+ "norm_type": "layer",
345
+ "eps": 1e-05,
346
+ "kwargs": {}
347
+ },
348
+ "kwargs": {}
349
+ },
350
+ {
351
+ "block_type": "default_decoder",
352
+ "attention_config": {
353
+ "attention_type": "full",
354
+ "num_heads": 12,
355
+ "dropout": 0.1,
356
+ "bias": true,
357
+ "use_rope": true,
358
+ "use_alibi": true,
359
+ "rope_base": 10000,
360
+ "kwargs": {}
361
+ },
362
+ "cross_attention_config": null,
363
+ "ffn_config": {
364
+ "type": "standard",
365
+ "intermediate_size": 3072,
366
+ "activation": "gelu",
367
+ "dropout": 0.1,
368
+ "bias": true,
369
+ "num_experts": null,
370
+ "top_k": null,
371
+ "expert_intermediate_size": null,
372
+ "load_balancing_coef": 0.01,
373
+ "kwargs": {}
374
+ },
375
+ "norm_config": {
376
+ "norm_type": "layer",
377
+ "eps": 1e-05,
378
+ "kwargs": {}
379
+ },
380
+ "kwargs": {}
381
+ },
382
+ {
383
+ "block_type": "default_decoder",
384
+ "attention_config": {
385
+ "attention_type": "full",
386
+ "num_heads": 12,
387
+ "dropout": 0.1,
388
+ "bias": true,
389
+ "use_rope": true,
390
+ "use_alibi": true,
391
+ "rope_base": 10000,
392
+ "kwargs": {}
393
+ },
394
+ "cross_attention_config": null,
395
+ "ffn_config": {
396
+ "type": "standard",
397
+ "intermediate_size": 3072,
398
+ "activation": "gelu",
399
+ "dropout": 0.1,
400
+ "bias": true,
401
+ "num_experts": null,
402
+ "top_k": null,
403
+ "expert_intermediate_size": null,
404
+ "load_balancing_coef": 0.01,
405
+ "kwargs": {}
406
+ },
407
+ "norm_config": {
408
+ "norm_type": "layer",
409
+ "eps": 1e-05,
410
+ "kwargs": {}
411
+ },
412
+ "kwargs": {}
413
+ },
414
+ {
415
+ "block_type": "default_decoder",
416
+ "attention_config": {
417
+ "attention_type": "full",
418
+ "num_heads": 12,
419
+ "dropout": 0.1,
420
+ "bias": true,
421
+ "use_rope": true,
422
+ "use_alibi": true,
423
+ "rope_base": 10000,
424
+ "kwargs": {}
425
+ },
426
+ "cross_attention_config": null,
427
+ "ffn_config": {
428
+ "type": "standard",
429
+ "intermediate_size": 3072,
430
+ "activation": "gelu",
431
+ "dropout": 0.1,
432
+ "bias": true,
433
+ "num_experts": null,
434
+ "top_k": null,
435
+ "expert_intermediate_size": null,
436
+ "load_balancing_coef": 0.01,
437
+ "kwargs": {}
438
+ },
439
+ "norm_config": {
440
+ "norm_type": "layer",
441
+ "eps": 1e-05,
442
+ "kwargs": {}
443
+ },
444
+ "kwargs": {}
445
+ },
446
+ {
447
+ "block_type": "default_decoder",
448
+ "attention_config": {
449
+ "attention_type": "full",
450
+ "num_heads": 12,
451
+ "dropout": 0.1,
452
+ "bias": true,
453
+ "use_rope": true,
454
+ "use_alibi": true,
455
+ "rope_base": 10000,
456
+ "kwargs": {}
457
+ },
458
+ "cross_attention_config": null,
459
+ "ffn_config": {
460
+ "type": "standard",
461
+ "intermediate_size": 3072,
462
+ "activation": "gelu",
463
+ "dropout": 0.1,
464
+ "bias": true,
465
+ "num_experts": null,
466
+ "top_k": null,
467
+ "expert_intermediate_size": null,
468
+ "load_balancing_coef": 0.01,
469
+ "kwargs": {}
470
+ },
471
+ "norm_config": {
472
+ "norm_type": "layer",
473
+ "eps": 1e-05,
474
+ "kwargs": {}
475
+ },
476
+ "kwargs": {}
477
+ },
478
+ {
479
+ "block_type": "default_decoder",
480
+ "attention_config": {
481
+ "attention_type": "full",
482
+ "num_heads": 12,
483
+ "dropout": 0.1,
484
+ "bias": true,
485
+ "use_rope": true,
486
+ "use_alibi": true,
487
+ "rope_base": 10000,
488
+ "kwargs": {}
489
+ },
490
+ "cross_attention_config": null,
491
+ "ffn_config": {
492
+ "type": "standard",
493
+ "intermediate_size": 3072,
494
+ "activation": "gelu",
495
+ "dropout": 0.1,
496
+ "bias": true,
497
+ "num_experts": null,
498
+ "top_k": null,
499
+ "expert_intermediate_size": null,
500
+ "load_balancing_coef": 0.01,
501
+ "kwargs": {}
502
+ },
503
+ "norm_config": {
504
+ "norm_type": "layer",
505
+ "eps": 1e-05,
506
+ "kwargs": {}
507
+ },
508
+ "kwargs": {}
509
+ },
510
+ {
511
+ "block_type": "default_decoder",
512
+ "attention_config": {
513
+ "attention_type": "full",
514
+ "num_heads": 12,
515
+ "dropout": 0.1,
516
+ "bias": true,
517
+ "use_rope": true,
518
+ "use_alibi": true,
519
+ "rope_base": 10000,
520
+ "kwargs": {}
521
+ },
522
+ "cross_attention_config": null,
523
+ "ffn_config": {
524
+ "type": "standard",
525
+ "intermediate_size": 3072,
526
+ "activation": "gelu",
527
+ "dropout": 0.1,
528
+ "bias": true,
529
+ "num_experts": null,
530
+ "top_k": null,
531
+ "expert_intermediate_size": null,
532
+ "load_balancing_coef": 0.01,
533
+ "kwargs": {}
534
+ },
535
+ "norm_config": {
536
+ "norm_type": "layer",
537
+ "eps": 1e-05,
538
+ "kwargs": {}
539
+ },
540
+ "kwargs": {}
541
+ },
542
+ {
543
+ "block_type": "default_decoder",
544
+ "attention_config": {
545
+ "attention_type": "full",
546
+ "num_heads": 12,
547
+ "dropout": 0.1,
548
+ "bias": true,
549
+ "use_rope": true,
550
+ "use_alibi": true,
551
+ "rope_base": 10000,
552
+ "kwargs": {}
553
+ },
554
+ "cross_attention_config": null,
555
+ "ffn_config": {
556
+ "type": "standard",
557
+ "intermediate_size": 3072,
558
+ "activation": "gelu",
559
+ "dropout": 0.1,
560
+ "bias": true,
561
+ "num_experts": null,
562
+ "top_k": null,
563
+ "expert_intermediate_size": null,
564
+ "load_balancing_coef": 0.01,
565
+ "kwargs": {}
566
+ },
567
+ "norm_config": {
568
+ "norm_type": "layer",
569
+ "eps": 1e-05,
570
+ "kwargs": {}
571
+ },
572
+ "kwargs": {}
573
+ },
574
+ {
575
+ "block_type": "default_decoder",
576
+ "attention_config": {
577
+ "attention_type": "full",
578
+ "num_heads": 12,
579
+ "dropout": 0.1,
580
+ "bias": true,
581
+ "use_rope": true,
582
+ "use_alibi": true,
583
+ "rope_base": 10000,
584
+ "kwargs": {}
585
+ },
586
+ "cross_attention_config": null,
587
+ "ffn_config": {
588
+ "type": "standard",
589
+ "intermediate_size": 3072,
590
+ "activation": "gelu",
591
+ "dropout": 0.1,
592
+ "bias": true,
593
+ "num_experts": null,
594
+ "top_k": null,
595
+ "expert_intermediate_size": null,
596
+ "load_balancing_coef": 0.01,
597
+ "kwargs": {}
598
+ },
599
+ "norm_config": {
600
+ "norm_type": "layer",
601
+ "eps": 1e-05,
602
+ "kwargs": {}
603
+ },
604
+ "kwargs": {}
605
+ },
606
+ {
607
+ "block_type": "default_decoder",
608
+ "attention_config": {
609
+ "attention_type": "full",
610
+ "num_heads": 12,
611
+ "dropout": 0.1,
612
+ "bias": true,
613
+ "use_rope": true,
614
+ "use_alibi": true,
615
+ "rope_base": 10000,
616
+ "kwargs": {}
617
+ },
618
+ "cross_attention_config": null,
619
+ "ffn_config": {
620
+ "type": "standard",
621
+ "intermediate_size": 3072,
622
+ "activation": "gelu",
623
+ "dropout": 0.1,
624
+ "bias": true,
625
+ "num_experts": null,
626
+ "top_k": null,
627
+ "expert_intermediate_size": null,
628
+ "load_balancing_coef": 0.01,
629
+ "kwargs": {}
630
+ },
631
+ "norm_config": {
632
+ "norm_type": "layer",
633
+ "eps": 1e-05,
634
+ "kwargs": {}
635
+ },
636
+ "kwargs": {}
637
+ },
638
+ {
639
+ "block_type": "default_decoder",
640
+ "attention_config": {
641
+ "attention_type": "full",
642
+ "num_heads": 12,
643
+ "dropout": 0.1,
644
+ "bias": true,
645
+ "use_rope": true,
646
+ "use_alibi": true,
647
+ "rope_base": 10000,
648
+ "kwargs": {}
649
+ },
650
+ "cross_attention_config": null,
651
+ "ffn_config": {
652
+ "type": "standard",
653
+ "intermediate_size": 3072,
654
+ "activation": "gelu",
655
+ "dropout": 0.1,
656
+ "bias": true,
657
+ "num_experts": null,
658
+ "top_k": null,
659
+ "expert_intermediate_size": null,
660
+ "load_balancing_coef": 0.01,
661
+ "kwargs": {}
662
+ },
663
+ "norm_config": {
664
+ "norm_type": "layer",
665
+ "eps": 1e-05,
666
+ "kwargs": {}
667
+ },
668
+ "kwargs": {}
669
+ },
670
+ {
671
+ "block_type": "default_decoder",
672
+ "attention_config": {
673
+ "attention_type": "full",
674
+ "num_heads": 12,
675
+ "dropout": 0.1,
676
+ "bias": true,
677
+ "use_rope": true,
678
+ "use_alibi": true,
679
+ "rope_base": 10000,
680
+ "kwargs": {}
681
+ },
682
+ "cross_attention_config": null,
683
+ "ffn_config": {
684
+ "type": "standard",
685
+ "intermediate_size": 3072,
686
+ "activation": "gelu",
687
+ "dropout": 0.1,
688
+ "bias": true,
689
+ "num_experts": null,
690
+ "top_k": null,
691
+ "expert_intermediate_size": null,
692
+ "load_balancing_coef": 0.01,
693
+ "kwargs": {}
694
+ },
695
+ "norm_config": {
696
+ "norm_type": "layer",
697
+ "eps": 1e-05,
698
+ "kwargs": {}
699
+ },
700
+ "kwargs": {}
701
+ },
702
+ {
703
+ "block_type": "default_decoder",
704
+ "attention_config": {
705
+ "attention_type": "full",
706
+ "num_heads": 12,
707
+ "dropout": 0.1,
708
+ "bias": true,
709
+ "use_rope": true,
710
+ "use_alibi": true,
711
+ "rope_base": 10000,
712
+ "kwargs": {}
713
+ },
714
+ "cross_attention_config": null,
715
+ "ffn_config": {
716
+ "type": "standard",
717
+ "intermediate_size": 3072,
718
+ "activation": "gelu",
719
+ "dropout": 0.1,
720
+ "bias": true,
721
+ "num_experts": null,
722
+ "top_k": null,
723
+ "expert_intermediate_size": null,
724
+ "load_balancing_coef": 0.01,
725
+ "kwargs": {}
726
+ },
727
+ "norm_config": {
728
+ "norm_type": "layer",
729
+ "eps": 1e-05,
730
+ "kwargs": {}
731
+ },
732
+ "kwargs": {}
733
+ }
734
+ ],
735
+ "output_head_config": {
736
+ "type": "distpred",
737
+ "output_size": 100,
738
+ "kwargs": {
739
+ "num_outputs": 100,
740
+ "feature_size": 1
741
+ }
742
+ },
743
+ "norm_config": {
744
+ "norm_type": "layer",
745
+ "eps": 1e-05,
746
+ "kwargs": {}
747
+ },
748
+ "head_agg_config": {
749
+ "type": "mean",
750
+ "kwargs": {}
751
+ },
752
+ "loss_config": {
753
+ "type": "crps",
754
+ "kwargs": {
755
+ "scaling_type": "minmax",
756
+ "scaling_dim": 1,
757
+ "scaling_eps": 1e-08
758
+ }
759
+ },
760
+ "output_attentions": false,
761
+ "output_hidden_states": false,
762
+ "use_teacher_forcing": true,
763
+ "quantizer_config": null,
764
+ "vocab_size": null,
765
+ "decoder_start_token_id": null,
766
+ "num_quantiles": 100
767
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:384311e5de374e80bd3fc7e573d70842b23a77ca4da4620d7dfa2730f64200df
3
+ size 454072768