tensorlink-dev commited on
Commit
71dbd15
·
verified ·
1 Parent(s): f021c6e

Upload initial model version

Browse files
Files changed (2) hide show
  1. config.json +251 -164
  2. model.safetensors +2 -2
config.json CHANGED
@@ -1,35 +1,205 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "architecture": {
3
- "layout": "encoder-decoder",
4
- "num_encoder_layers": 2,
5
  "num_decoder_layers": 2,
6
  "share_weights": false
7
  },
8
- "attention_blocks": {
9
- "encoder_attention": {
10
- "attention_type": "full",
11
- "num_heads": 4,
12
- "dropout": 0.1,
13
- "kwargs": {}
14
- },
15
- "decoder_attention": {
16
- "attention_type": "full",
17
- "num_heads": 4,
18
- "dropout": 0.1,
19
- "kwargs": {}
20
- },
21
- "decoder_cross_attention": {
22
- "attention_type": "full",
23
- "num_heads": 4,
24
- "dropout": 0.1,
25
- "kwargs": {}
26
- }
27
- },
28
  "value_embedding_config": {
29
  "type": "value",
30
  "dropout": 0.1,
31
  "embedding_dim": null,
32
- "kwargs": {}
 
 
 
 
33
  },
34
  "positional_embedding_config": {
35
  "type": "stacked_embedding",
@@ -38,111 +208,90 @@
38
  "kwargs": {
39
  "embedding_configs": [
40
  {
41
- "type": "sinusoidal",
42
- "args": {
43
- "max_seq_len": 128
44
- }
45
- },
46
- {
47
- "type": "timedelta",
48
  "args": {
49
- "hidden_dim": 16
50
- }
51
- },
52
- {
53
- "type": "learned_abs",
54
- "args": {
55
- "max_seq_len": 128
56
  }
57
  }
58
  ],
59
- "max_seq_len": 4096
60
  }
61
  },
62
- "feedforward_config": {
63
- "type": "standard",
64
- "intermediate_size": 256,
65
- "activation": "gelu",
66
- "dropout": 0.1,
67
- "kwargs": {}
68
- },
69
- "output_head_config": {
70
- "type": "linear",
71
- "output_size": 1,
72
- "kwargs": {}
73
- },
74
- "encoder_blocks": [
75
  {
76
- "block_type": "default_encoder",
77
  "attention_config": {
78
  "attention_type": "full",
79
  "num_heads": 2,
80
  "dropout": 0.1,
 
 
 
 
81
  "kwargs": {}
82
  },
 
83
  "ffn_config": {
84
  "type": "standard",
85
- "intermediate_size": 64,
86
  "activation": "gelu",
87
  "dropout": 0.1,
 
 
 
 
 
88
  "kwargs": {}
89
  },
90
- "kwargs": {}
91
- },
92
- {
93
- "block_type": "default_encoder",
94
- "attention_config": {
95
- "attention_type": "full",
96
- "num_heads": 2,
97
- "dropout": 0.1,
98
- "kwargs": {}
99
- },
100
- "ffn_config": {
101
- "type": "standard",
102
- "intermediate_size": 64,
103
- "activation": "gelu",
104
- "dropout": 0.1,
105
  "kwargs": {}
106
  },
107
  "kwargs": {}
108
- }
109
- ],
110
- "decoder_blocks": [
111
  {
112
  "block_type": "default_decoder",
113
  "attention_config": {
114
  "attention_type": "full",
115
  "num_heads": 2,
116
  "dropout": 0.1,
 
 
 
 
117
  "kwargs": {}
118
  },
 
119
  "ffn_config": {
120
  "type": "standard",
121
- "intermediate_size": 64,
122
  "activation": "gelu",
123
  "dropout": 0.1,
 
 
 
 
 
124
  "kwargs": {}
125
  },
126
- "kwargs": {}
127
- },
128
- {
129
- "block_type": "default_decoder",
130
- "attention_config": {
131
- "attention_type": "full",
132
- "num_heads": 2,
133
- "dropout": 0.1,
134
- "kwargs": {}
135
- },
136
- "ffn_config": {
137
- "type": "standard",
138
- "intermediate_size": 64,
139
- "activation": "gelu",
140
- "dropout": 0.1,
141
  "kwargs": {}
142
  },
143
  "kwargs": {}
144
  }
145
  ],
 
 
 
 
 
 
 
 
146
  "norm_config": {
147
  "norm_type": "layer",
148
  "eps": 1e-05,
@@ -152,84 +301,22 @@
152
  "type": "mean",
153
  "kwargs": {}
154
  },
155
- "hidden_size": 32,
156
- "num_quantiles": 3,
 
 
 
 
 
 
 
 
 
157
  "output_attentions": false,
158
  "output_hidden_states": false,
159
  "use_teacher_forcing": true,
160
- "hidden_dropout_prob": 0.1,
161
- "return_dict": true,
162
- "torchscript": false,
163
- "torch_dtype": null,
164
- "use_bfloat16": false,
165
- "tf_legacy_loss": false,
166
- "pruned_heads": {},
167
- "tie_word_embeddings": true,
168
- "chunk_size_feed_forward": 0,
169
- "is_encoder_decoder": false,
170
- "is_decoder": false,
171
- "cross_attention_hidden_size": null,
172
- "add_cross_attention": false,
173
- "tie_encoder_decoder": false,
174
- "max_length": 20,
175
- "min_length": 0,
176
- "do_sample": false,
177
- "early_stopping": false,
178
- "num_beams": 1,
179
- "num_beam_groups": 1,
180
- "diversity_penalty": 0.0,
181
- "temperature": 1.0,
182
- "top_k": 50,
183
- "top_p": 1.0,
184
- "typical_p": 1.0,
185
- "repetition_penalty": 1.0,
186
- "length_penalty": 1.0,
187
- "no_repeat_ngram_size": 0,
188
- "encoder_no_repeat_ngram_size": 0,
189
- "bad_words_ids": null,
190
- "num_return_sequences": 1,
191
- "output_scores": false,
192
- "return_dict_in_generate": false,
193
- "forced_bos_token_id": null,
194
- "forced_eos_token_id": null,
195
- "remove_invalid_values": false,
196
- "exponential_decay_length_penalty": null,
197
- "suppress_tokens": null,
198
- "begin_suppress_tokens": null,
199
- "architectures": null,
200
- "finetuning_task": null,
201
- "id2label": {
202
- "0": "LABEL_0",
203
- "1": "LABEL_1"
204
- },
205
- "label2id": {
206
- "LABEL_0": 0,
207
- "LABEL_1": 1
208
- },
209
- "tokenizer_class": null,
210
- "prefix": null,
211
- "bos_token_id": null,
212
- "pad_token_id": null,
213
- "eos_token_id": null,
214
- "sep_token_id": null,
215
- "decoder_start_token_id": 3.0,
216
- "task_specific_params": null,
217
- "problem_type": null,
218
- "_name_or_path": "",
219
- "_attn_implementation_autoset": false,
220
- "transformers_version": "4.51.3",
221
- "model_type": "transformer",
222
- "feature_size": 1,
223
- "context_length": 16,
224
- "prediction_length": 4,
225
- "quantiles": [
226
- 0.1,
227
- 0.5,
228
- 0.9
229
- ],
230
- "output_token_lengths": 1,
231
- "loss_type": "quantile",
232
- "use_dynamic_features": false,
233
- "use_static_features": false,
234
- "autoregressive": false
235
  }
 
1
  {
2
+ "return_dict": true,
3
+ "torchscript": false,
4
+ "torch_dtype": null,
5
+ "use_bfloat16": false,
6
+ "tf_legacy_loss": false,
7
+ "pruned_heads": {},
8
+ "tie_word_embeddings": true,
9
+ "chunk_size_feed_forward": 0,
10
+ "is_encoder_decoder": false,
11
+ "is_decoder": true,
12
+ "cross_attention_hidden_size": null,
13
+ "add_cross_attention": false,
14
+ "tie_encoder_decoder": false,
15
+ "max_length": 20,
16
+ "min_length": 0,
17
+ "do_sample": false,
18
+ "early_stopping": false,
19
+ "num_beams": 1,
20
+ "num_beam_groups": 1,
21
+ "diversity_penalty": 0.0,
22
+ "temperature": 1.0,
23
+ "top_k": 50,
24
+ "top_p": 1.0,
25
+ "typical_p": 1.0,
26
+ "repetition_penalty": 1.0,
27
+ "length_penalty": 1.0,
28
+ "no_repeat_ngram_size": 0,
29
+ "encoder_no_repeat_ngram_size": 0,
30
+ "bad_words_ids": null,
31
+ "num_return_sequences": 1,
32
+ "output_scores": false,
33
+ "return_dict_in_generate": false,
34
+ "forced_bos_token_id": null,
35
+ "forced_eos_token_id": null,
36
+ "remove_invalid_values": false,
37
+ "exponential_decay_length_penalty": null,
38
+ "suppress_tokens": null,
39
+ "begin_suppress_tokens": null,
40
+ "architectures": null,
41
+ "finetuning_task": null,
42
+ "id2label": {
43
+ "0": "LABEL_0",
44
+ "1": "LABEL_1"
45
+ },
46
+ "label2id": {
47
+ "LABEL_0": 0,
48
+ "LABEL_1": 1
49
+ },
50
+ "tokenizer_class": null,
51
+ "prefix": null,
52
+ "bos_token_id": null,
53
+ "pad_token_id": null,
54
+ "eos_token_id": null,
55
+ "sep_token_id": null,
56
+ "task_specific_params": null,
57
+ "problem_type": null,
58
+ "_name_or_path": "",
59
+ "transformers_version": "4.52.2",
60
+ "target_dim": 1,
61
+ "static_dim": 0,
62
+ "dynamic_dim": 0,
63
+ "past_dynamic_dim": 0,
64
+ "static_cardinalities": null,
65
+ "dynamic_cardinalities": null,
66
+ "past_dynamic_cardinalities": null,
67
+ "static_embedding_dim": null,
68
+ "dynamic_embedding_dim": null,
69
+ "past_dynamic_embedding_dim": null,
70
+ "time_features": null,
71
+ "scaling": true,
72
+ "decoder_start_token_value": 0.0,
73
+ "feature_size": 1,
74
+ "context_length": 1024,
75
+ "prediction_length": 256,
76
+ "quantiles": [
77
+ 0.005,
78
+ 0.015,
79
+ 0.025,
80
+ 0.034999999999999996,
81
+ 0.045,
82
+ 0.055,
83
+ 0.065,
84
+ 0.07500000000000001,
85
+ 0.085,
86
+ 0.095,
87
+ 0.10500000000000001,
88
+ 0.115,
89
+ 0.125,
90
+ 0.135,
91
+ 0.14500000000000002,
92
+ 0.155,
93
+ 0.165,
94
+ 0.17500000000000002,
95
+ 0.185,
96
+ 0.195,
97
+ 0.20500000000000002,
98
+ 0.215,
99
+ 0.225,
100
+ 0.23500000000000001,
101
+ 0.245,
102
+ 0.255,
103
+ 0.265,
104
+ 0.275,
105
+ 0.28500000000000003,
106
+ 0.295,
107
+ 0.305,
108
+ 0.315,
109
+ 0.325,
110
+ 0.335,
111
+ 0.34500000000000003,
112
+ 0.35500000000000004,
113
+ 0.365,
114
+ 0.375,
115
+ 0.385,
116
+ 0.395,
117
+ 0.405,
118
+ 0.41500000000000004,
119
+ 0.425,
120
+ 0.435,
121
+ 0.445,
122
+ 0.455,
123
+ 0.465,
124
+ 0.47500000000000003,
125
+ 0.485,
126
+ 0.495,
127
+ 0.505,
128
+ 0.515,
129
+ 0.525,
130
+ 0.535,
131
+ 0.545,
132
+ 0.555,
133
+ 0.5650000000000001,
134
+ 0.5750000000000001,
135
+ 0.585,
136
+ 0.595,
137
+ 0.605,
138
+ 0.615,
139
+ 0.625,
140
+ 0.635,
141
+ 0.645,
142
+ 0.655,
143
+ 0.665,
144
+ 0.675,
145
+ 0.685,
146
+ 0.6950000000000001,
147
+ 0.7050000000000001,
148
+ 0.715,
149
+ 0.725,
150
+ 0.735,
151
+ 0.745,
152
+ 0.755,
153
+ 0.765,
154
+ 0.775,
155
+ 0.785,
156
+ 0.795,
157
+ 0.805,
158
+ 0.8150000000000001,
159
+ 0.8250000000000001,
160
+ 0.8350000000000001,
161
+ 0.845,
162
+ 0.855,
163
+ 0.865,
164
+ 0.875,
165
+ 0.885,
166
+ 0.895,
167
+ 0.905,
168
+ 0.915,
169
+ 0.925,
170
+ 0.935,
171
+ 0.9450000000000001,
172
+ 0.9550000000000001,
173
+ 0.965,
174
+ 0.975,
175
+ 0.985,
176
+ 0.995
177
+ ],
178
+ "output_token_lengths": 1,
179
+ "loss_type": "quantile",
180
+ "use_dynamic_features": false,
181
+ "use_static_features": false,
182
+ "autoregressive": true,
183
+ "gradient_checkpointing": true,
184
+ "model_type": "transformer",
185
+ "d_model": 32,
186
+ "hidden_dropout_prob": 0.1,
187
+ "max_position_embeddings": 4096,
188
  "architecture": {
189
+ "layout": "decoder",
190
+ "num_encoder_layers": 0,
191
  "num_decoder_layers": 2,
192
  "share_weights": false
193
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  "value_embedding_config": {
195
  "type": "value",
196
  "dropout": 0.1,
197
  "embedding_dim": null,
198
+ "kwargs": {
199
+ "feature_size": 1,
200
+ "d_model": 32,
201
+ "use_layer_norm": true
202
+ }
203
  },
204
  "positional_embedding_config": {
205
  "type": "stacked_embedding",
 
208
  "kwargs": {
209
  "embedding_configs": [
210
  {
211
+ "type": "fourier",
 
 
 
 
 
 
212
  "args": {
213
+ "feature_size": 64
 
 
 
 
 
 
214
  }
215
  }
216
  ],
217
+ "max_seq_len": 1000
218
  }
219
  },
220
+ "encoder_blocks": null,
221
+ "decoder_blocks": [
 
 
 
 
 
 
 
 
 
 
 
222
  {
223
+ "block_type": "default_decoder",
224
  "attention_config": {
225
  "attention_type": "full",
226
  "num_heads": 2,
227
  "dropout": 0.1,
228
+ "bias": true,
229
+ "use_rope": true,
230
+ "use_alibi": true,
231
+ "rope_base": 10000,
232
  "kwargs": {}
233
  },
234
+ "cross_attention_config": null,
235
  "ffn_config": {
236
  "type": "standard",
237
+ "intermediate_size": 128,
238
  "activation": "gelu",
239
  "dropout": 0.1,
240
+ "bias": true,
241
+ "num_experts": null,
242
+ "top_k": null,
243
+ "expert_intermediate_size": null,
244
+ "load_balancing_coef": 0.01,
245
  "kwargs": {}
246
  },
247
+ "norm_config": {
248
+ "norm_type": "layer",
249
+ "eps": 1e-05,
 
 
 
 
 
 
 
 
 
 
 
 
250
  "kwargs": {}
251
  },
252
  "kwargs": {}
253
+ },
 
 
254
  {
255
  "block_type": "default_decoder",
256
  "attention_config": {
257
  "attention_type": "full",
258
  "num_heads": 2,
259
  "dropout": 0.1,
260
+ "bias": true,
261
+ "use_rope": true,
262
+ "use_alibi": true,
263
+ "rope_base": 10000,
264
  "kwargs": {}
265
  },
266
+ "cross_attention_config": null,
267
  "ffn_config": {
268
  "type": "standard",
269
+ "intermediate_size": 128,
270
  "activation": "gelu",
271
  "dropout": 0.1,
272
+ "bias": true,
273
+ "num_experts": null,
274
+ "top_k": null,
275
+ "expert_intermediate_size": null,
276
+ "load_balancing_coef": 0.01,
277
  "kwargs": {}
278
  },
279
+ "norm_config": {
280
+ "norm_type": "layer",
281
+ "eps": 1e-05,
 
 
 
 
 
 
 
 
 
 
 
 
282
  "kwargs": {}
283
  },
284
  "kwargs": {}
285
  }
286
  ],
287
+ "output_head_config": {
288
+ "type": "distpred",
289
+ "output_size": 100,
290
+ "kwargs": {
291
+ "num_outputs": 100,
292
+ "feature_size": 1
293
+ }
294
+ },
295
  "norm_config": {
296
  "norm_type": "layer",
297
  "eps": 1e-05,
 
301
  "type": "mean",
302
  "kwargs": {}
303
  },
304
+ "loss_config": {
305
+ "type": "crps",
306
+ "kwargs": {
307
+ "reduction": "mean",
308
+ "estimator": "pwm",
309
+ "spread_lambda": 0.0,
310
+ "spread_penalty_type": "log",
311
+ "spread_penalty_epsilon": 0.001,
312
+ "scaling_type": "none"
313
+ }
314
+ },
315
  "output_attentions": false,
316
  "output_hidden_states": false,
317
  "use_teacher_forcing": true,
318
+ "quantizer_config": null,
319
+ "vocab_size": null,
320
+ "decoder_start_token_id": null,
321
+ "num_quantiles": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6e48b3d3c88b34881896a31683954e5f39b707926a3c8d9aee0b74657e18a8b
3
- size 252476
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71fdb87ec51270c8919baa9db70d81b1cf6d5dd63eb5a87af162f6ba3cfd0efd
3
+ size 123656