tensorlink-dev commited on
Commit
954c95c
·
verified ·
1 Parent(s): d17d62c

Save model using custom save_hf

Browse files
Files changed (2) hide show
  1. config.json +285 -0
  2. model.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "use_cache": true,
3
+ "aux_loss_weight": 0.01,
4
+ "return_dict": true,
5
+ "torchscript": false,
6
+ "torch_dtype": null,
7
+ "use_bfloat16": false,
8
+ "tf_legacy_loss": false,
9
+ "pruned_heads": {},
10
+ "tie_word_embeddings": true,
11
+ "chunk_size_feed_forward": 0,
12
+ "is_encoder_decoder": false,
13
+ "is_decoder": true,
14
+ "cross_attention_hidden_size": null,
15
+ "add_cross_attention": false,
16
+ "tie_encoder_decoder": false,
17
+ "max_length": 20,
18
+ "min_length": 0,
19
+ "do_sample": false,
20
+ "early_stopping": false,
21
+ "num_beams": 1,
22
+ "num_beam_groups": 1,
23
+ "diversity_penalty": 0.0,
24
+ "temperature": 1.0,
25
+ "top_k": 50,
26
+ "top_p": 1.0,
27
+ "typical_p": 1.0,
28
+ "repetition_penalty": 1.0,
29
+ "length_penalty": 1.0,
30
+ "no_repeat_ngram_size": 0,
31
+ "encoder_no_repeat_ngram_size": 0,
32
+ "bad_words_ids": null,
33
+ "num_return_sequences": 1,
34
+ "output_scores": false,
35
+ "return_dict_in_generate": false,
36
+ "forced_bos_token_id": null,
37
+ "forced_eos_token_id": null,
38
+ "remove_invalid_values": false,
39
+ "exponential_decay_length_penalty": null,
40
+ "suppress_tokens": null,
41
+ "begin_suppress_tokens": null,
42
+ "architectures": null,
43
+ "finetuning_task": null,
44
+ "id2label": {
45
+ "0": "LABEL_0",
46
+ "1": "LABEL_1"
47
+ },
48
+ "label2id": {
49
+ "LABEL_0": 0,
50
+ "LABEL_1": 1
51
+ },
52
+ "tokenizer_class": null,
53
+ "prefix": null,
54
+ "bos_token_id": null,
55
+ "pad_token_id": null,
56
+ "eos_token_id": null,
57
+ "sep_token_id": null,
58
+ "task_specific_params": null,
59
+ "problem_type": null,
60
+ "_name_or_path": "",
61
+ "transformers_version": "4.53.0",
62
+ "static_dim": 0,
63
+ "dynamic_dim": 0,
64
+ "past_dynamic_dim": 0,
65
+ "static_cardinalities": null,
66
+ "dynamic_cardinalities": null,
67
+ "past_dynamic_cardinalities": null,
68
+ "static_embedding_dim": null,
69
+ "dynamic_embedding_dim": null,
70
+ "past_dynamic_embedding_dim": null,
71
+ "time_features": null,
72
+ "scaling": true,
73
+ "decoder_start_token_value": 0.0,
74
+ "feature_size": 1,
75
+ "context_length": 1024,
76
+ "prediction_length": 256,
77
+ "quantiles": [
78
+ 0.005,
79
+ 0.015,
80
+ 0.025,
81
+ 0.034999999999999996,
82
+ 0.045,
83
+ 0.055,
84
+ 0.065,
85
+ 0.07500000000000001,
86
+ 0.085,
87
+ 0.095,
88
+ 0.10500000000000001,
89
+ 0.115,
90
+ 0.125,
91
+ 0.135,
92
+ 0.14500000000000002,
93
+ 0.155,
94
+ 0.165,
95
+ 0.17500000000000002,
96
+ 0.185,
97
+ 0.195,
98
+ 0.20500000000000002,
99
+ 0.215,
100
+ 0.225,
101
+ 0.23500000000000001,
102
+ 0.245,
103
+ 0.255,
104
+ 0.265,
105
+ 0.275,
106
+ 0.28500000000000003,
107
+ 0.295,
108
+ 0.305,
109
+ 0.315,
110
+ 0.325,
111
+ 0.335,
112
+ 0.34500000000000003,
113
+ 0.35500000000000004,
114
+ 0.365,
115
+ 0.375,
116
+ 0.385,
117
+ 0.395,
118
+ 0.405,
119
+ 0.41500000000000004,
120
+ 0.425,
121
+ 0.435,
122
+ 0.445,
123
+ 0.455,
124
+ 0.465,
125
+ 0.47500000000000003,
126
+ 0.485,
127
+ 0.495,
128
+ 0.505,
129
+ 0.515,
130
+ 0.525,
131
+ 0.535,
132
+ 0.545,
133
+ 0.555,
134
+ 0.5650000000000001,
135
+ 0.5750000000000001,
136
+ 0.585,
137
+ 0.595,
138
+ 0.605,
139
+ 0.615,
140
+ 0.625,
141
+ 0.635,
142
+ 0.645,
143
+ 0.655,
144
+ 0.665,
145
+ 0.675,
146
+ 0.685,
147
+ 0.6950000000000001,
148
+ 0.7050000000000001,
149
+ 0.715,
150
+ 0.725,
151
+ 0.735,
152
+ 0.745,
153
+ 0.755,
154
+ 0.765,
155
+ 0.775,
156
+ 0.785,
157
+ 0.795,
158
+ 0.805,
159
+ 0.8150000000000001,
160
+ 0.8250000000000001,
161
+ 0.8350000000000001,
162
+ 0.845,
163
+ 0.855,
164
+ 0.865,
165
+ 0.875,
166
+ 0.885,
167
+ 0.895,
168
+ 0.905,
169
+ 0.915,
170
+ 0.925,
171
+ 0.935,
172
+ 0.9450000000000001,
173
+ 0.9550000000000001,
174
+ 0.965,
175
+ 0.975,
176
+ 0.985,
177
+ 0.995
178
+ ],
179
+ "output_token_lengths": 1,
180
+ "loss_type": "quantile",
181
+ "use_dynamic_features": false,
182
+ "use_static_features": false,
183
+ "autoregressive": true,
184
+ "gradient_checkpointing": true,
185
+ "model_type": "transformer",
186
+ "d_model": 16,
187
+ "hidden_dropout_prob": 0.1,
188
+ "max_position_embeddings": 4096,
189
+ "architecture": {
190
+ "layout": "decoder",
191
+ "num_encoder_layers": 0,
192
+ "num_decoder_layers": 1,
193
+ "share_weights": false
194
+ },
195
+ "value_embedding_config": {
196
+ "type": "value",
197
+ "dropout": 0.1,
198
+ "embedding_dim": null,
199
+ "kwargs": {
200
+ "feature_size": 1,
201
+ "d_model": 16,
202
+ "use_layer_norm": true
203
+ }
204
+ },
205
+ "positional_embedding_config": {
206
+ "type": "stacked_embedding",
207
+ "dropout": 0.1,
208
+ "embedding_dim": null,
209
+ "kwargs": {
210
+ "embedding_configs": [],
211
+ "max_seq_len": 4096
212
+ }
213
+ },
214
+ "encoder_blocks": null,
215
+ "decoder_blocks": [
216
+ {
217
+ "block_type": "default_decoder",
218
+ "attention_config": {
219
+ "attention_type": "full",
220
+ "num_heads": 1,
221
+ "dropout": 0.1,
222
+ "bias": true,
223
+ "use_rope": true,
224
+ "use_alibi": false,
225
+ "rope_base": 10000,
226
+ "kwargs": {}
227
+ },
228
+ "cross_attention_config": null,
229
+ "ffn_config": {
230
+ "type": "standard",
231
+ "intermediate_size": 64,
232
+ "activation": "gelu",
233
+ "dropout": 0.1,
234
+ "bias": true,
235
+ "num_experts": null,
236
+ "top_k": null,
237
+ "expert_intermediate_size": null,
238
+ "load_balancing_coef": 0.01,
239
+ "kwargs": {}
240
+ },
241
+ "norm_config": {
242
+ "norm_type": "layer",
243
+ "eps": 1e-05,
244
+ "kwargs": {}
245
+ },
246
+ "kwargs": {}
247
+ }
248
+ ],
249
+ "output_head_config": {
250
+ "type": "distpred",
251
+ "output_size": 100,
252
+ "kwargs": {
253
+ "num_outputs": 100,
254
+ "feature_size": 1
255
+ }
256
+ },
257
+ "norm_config": {
258
+ "norm_type": "layer",
259
+ "eps": 1e-05,
260
+ "kwargs": {}
261
+ },
262
+ "head_agg_config": {
263
+ "type": "mean",
264
+ "kwargs": {}
265
+ },
266
+ "loss_config": {
267
+ "type": "crps",
268
+ "kwargs": {
269
+ "reduction": "mean",
270
+ "estimator": "pwm",
271
+ "spread_lambda": 0,
272
+ "spread_penalty_type": "symmetric_log",
273
+ "spread_penalty_epsilon": 0,
274
+ "scaling_type": "none",
275
+ "spread_target_spread": 0
276
+ }
277
+ },
278
+ "output_attentions": false,
279
+ "output_hidden_states": false,
280
+ "use_teacher_forcing": true,
281
+ "quantizer_config": null,
282
+ "vocab_size": null,
283
+ "decoder_start_token_id": null,
284
+ "num_quantiles": 100
285
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d3243333515a27856205cb388032c37de0a1881c29904fc59eb00fa217cc734
3
+ size 22128