oiisa commited on
Commit
91cfcce
·
verified ·
1 Parent(s): efdc82e

Delete model

Browse files
model/config.json DELETED
@@ -1,61 +0,0 @@
1
- {
2
- "_name_or_path": "google-t5/t5-small",
3
- "architectures": [
4
- "T5ForConditionalGeneration"
5
- ],
6
- "classifier_dropout": 0.0,
7
- "d_ff": 2048,
8
- "d_kv": 64,
9
- "d_model": 512,
10
- "decoder_start_token_id": 0,
11
- "dense_act_fn": "relu",
12
- "dropout_rate": 0.1,
13
- "eos_token_id": 1,
14
- "feed_forward_proj": "relu",
15
- "initializer_factor": 1.0,
16
- "is_encoder_decoder": true,
17
- "is_gated_act": false,
18
- "layer_norm_epsilon": 1e-06,
19
- "model_type": "t5",
20
- "n_positions": 512,
21
- "num_decoder_layers": 6,
22
- "num_heads": 8,
23
- "num_layers": 6,
24
- "output_past": true,
25
- "pad_token_id": 0,
26
- "relative_attention_max_distance": 128,
27
- "relative_attention_num_buckets": 32,
28
- "task_specific_params": {
29
- "summarization": {
30
- "early_stopping": true,
31
- "length_penalty": 2.0,
32
- "max_length": 200,
33
- "min_length": 30,
34
- "no_repeat_ngram_size": 3,
35
- "num_beams": 4,
36
- "prefix": "summarize: "
37
- },
38
- "translation_en_to_de": {
39
- "early_stopping": true,
40
- "max_length": 300,
41
- "num_beams": 4,
42
- "prefix": "translate English to German: "
43
- },
44
- "translation_en_to_fr": {
45
- "early_stopping": true,
46
- "max_length": 300,
47
- "num_beams": 4,
48
- "prefix": "translate English to French: "
49
- },
50
- "translation_en_to_ro": {
51
- "early_stopping": true,
52
- "max_length": 300,
53
- "num_beams": 4,
54
- "prefix": "translate English to Romanian: "
55
- }
56
- },
57
- "torch_dtype": "float32",
58
- "transformers_version": "4.47.0",
59
- "use_cache": true,
60
- "vocab_size": 32128
61
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model/generation_config.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "decoder_start_token_id": 0,
4
- "eos_token_id": 1,
5
- "pad_token_id": 0,
6
- "transformers_version": "4.47.0"
7
- }
 
 
 
 
 
 
 
 
model/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:de52eaba2eee8309b67c8c4e2f976b32617abd28a08fa8761b00e1214711700c
3
- size 242041896
 
 
 
 
model/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:96d2f6417dab4121134db6c9a0a52d5ca647f6116469c0d746f9c1e2ffc1ab10
3
- size 14244
 
 
 
 
model/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:abe219c40d13942f8616d969af97cde95d76ae15a5c271bc32d6bd60e10cc69a
3
- size 1064
 
 
 
 
model/trainer_state.json DELETED
@@ -1,318 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
- "eval_steps": 100,
6
- "global_step": 1966,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.1017293997965412,
13
- "grad_norm": 1.07757568359375,
14
- "learning_rate": 4.985827402694507e-05,
15
- "loss": 1.4448,
16
- "step": 100
17
- },
18
- {
19
- "epoch": 0.1017293997965412,
20
- "eval_loss": 0.5420752167701721,
21
- "eval_runtime": 81.2287,
22
- "eval_samples_per_second": 193.478,
23
- "eval_steps_per_second": 24.191,
24
- "step": 100
25
- },
26
- {
27
- "epoch": 0.2034587995930824,
28
- "grad_norm": 0.9271150827407837,
29
- "learning_rate": 4.943470300789534e-05,
30
- "loss": 0.6104,
31
- "step": 200
32
- },
33
- {
34
- "epoch": 0.2034587995930824,
35
- "eval_loss": 0.39222481846809387,
36
- "eval_runtime": 80.7623,
37
- "eval_samples_per_second": 194.596,
38
- "eval_steps_per_second": 24.331,
39
- "step": 200
40
- },
41
- {
42
- "epoch": 0.3051881993896236,
43
- "grad_norm": 0.8333455324172974,
44
- "learning_rate": 4.873408942403743e-05,
45
- "loss": 0.4651,
46
- "step": 300
47
- },
48
- {
49
- "epoch": 0.3051881993896236,
50
- "eval_loss": 0.32220107316970825,
51
- "eval_runtime": 80.7592,
52
- "eval_samples_per_second": 194.603,
53
- "eval_steps_per_second": 24.332,
54
- "step": 300
55
- },
56
- {
57
- "epoch": 0.4069175991861648,
58
- "grad_norm": 0.8338972330093384,
59
- "learning_rate": 4.7764376886723956e-05,
60
- "loss": 0.4106,
61
- "step": 400
62
- },
63
- {
64
- "epoch": 0.4069175991861648,
65
- "eval_loss": 0.2739136815071106,
66
- "eval_runtime": 80.8876,
67
- "eval_samples_per_second": 194.294,
68
- "eval_steps_per_second": 24.293,
69
- "step": 400
70
- },
71
- {
72
- "epoch": 0.508646998982706,
73
- "grad_norm": 0.6324864029884338,
74
- "learning_rate": 4.6536560072189665e-05,
75
- "loss": 0.3699,
76
- "step": 500
77
- },
78
- {
79
- "epoch": 0.508646998982706,
80
- "eval_loss": 0.2465265691280365,
81
- "eval_runtime": 80.8298,
82
- "eval_samples_per_second": 194.433,
83
- "eval_steps_per_second": 24.31,
84
- "step": 500
85
- },
86
- {
87
- "epoch": 0.6103763987792472,
88
- "grad_norm": 0.7570372223854065,
89
- "learning_rate": 4.506456006305641e-05,
90
- "loss": 0.3256,
91
- "step": 600
92
- },
93
- {
94
- "epoch": 0.6103763987792472,
95
- "eval_loss": 0.22816891968250275,
96
- "eval_runtime": 80.8701,
97
- "eval_samples_per_second": 194.336,
98
- "eval_steps_per_second": 24.298,
99
- "step": 600
100
- },
101
- {
102
- "epoch": 0.7121057985757884,
103
- "grad_norm": 0.6379811763763428,
104
- "learning_rate": 4.336506651001469e-05,
105
- "loss": 0.3194,
106
- "step": 700
107
- },
108
- {
109
- "epoch": 0.7121057985757884,
110
- "eval_loss": 0.21390707790851593,
111
- "eval_runtime": 80.9268,
112
- "eval_samples_per_second": 194.2,
113
- "eval_steps_per_second": 24.281,
114
- "step": 700
115
- },
116
- {
117
- "epoch": 0.8138351983723296,
118
- "grad_norm": 0.8042012453079224,
119
- "learning_rate": 4.145734840326494e-05,
120
- "loss": 0.3032,
121
- "step": 800
122
- },
123
- {
124
- "epoch": 0.8138351983723296,
125
- "eval_loss": 0.20113198459148407,
126
- "eval_runtime": 80.8778,
127
- "eval_samples_per_second": 194.318,
128
- "eval_steps_per_second": 24.296,
129
- "step": 800
130
- },
131
- {
132
- "epoch": 0.9155645981688708,
133
- "grad_norm": 0.721193253993988,
134
- "learning_rate": 3.936303559920664e-05,
135
- "loss": 0.2787,
136
- "step": 900
137
- },
138
- {
139
- "epoch": 0.9155645981688708,
140
- "eval_loss": 0.19362396001815796,
141
- "eval_runtime": 80.735,
142
- "eval_samples_per_second": 194.662,
143
- "eval_steps_per_second": 24.339,
144
- "step": 900
145
- },
146
- {
147
- "epoch": 1.017293997965412,
148
- "grad_norm": 0.5696636438369751,
149
- "learning_rate": 3.710587357944272e-05,
150
- "loss": 0.2672,
151
- "step": 1000
152
- },
153
- {
154
- "epoch": 1.017293997965412,
155
- "eval_loss": 0.18811464309692383,
156
- "eval_runtime": 80.9806,
157
- "eval_samples_per_second": 194.071,
158
- "eval_steps_per_second": 24.265,
159
- "step": 1000
160
- },
161
- {
162
- "epoch": 1.1190233977619533,
163
- "grad_norm": 0.5195199847221375,
164
- "learning_rate": 3.471145422266069e-05,
165
- "loss": 0.2665,
166
- "step": 1100
167
- },
168
- {
169
- "epoch": 1.1190233977619533,
170
- "eval_loss": 0.18336237967014313,
171
- "eval_runtime": 81.0197,
172
- "eval_samples_per_second": 193.978,
173
- "eval_steps_per_second": 24.253,
174
- "step": 1100
175
- },
176
- {
177
- "epoch": 1.2207527975584944,
178
- "grad_norm": 0.5171718001365662,
179
- "learning_rate": 3.220692564191985e-05,
180
- "loss": 0.2597,
181
- "step": 1200
182
- },
183
- {
184
- "epoch": 1.2207527975584944,
185
- "eval_loss": 0.17948384582996368,
186
- "eval_runtime": 80.9251,
187
- "eval_samples_per_second": 194.204,
188
- "eval_steps_per_second": 24.282,
189
- "step": 1200
190
- },
191
- {
192
- "epoch": 1.3224821973550356,
193
- "grad_norm": 0.6012802124023438,
194
- "learning_rate": 2.9620684377232173e-05,
195
- "loss": 0.2457,
196
- "step": 1300
197
- },
198
- {
199
- "epoch": 1.3224821973550356,
200
- "eval_loss": 0.17490074038505554,
201
- "eval_runtime": 80.6967,
202
- "eval_samples_per_second": 194.754,
203
- "eval_steps_per_second": 24.35,
204
- "step": 1300
205
- },
206
- {
207
- "epoch": 1.4242115971515767,
208
- "grad_norm": 0.8013952374458313,
209
- "learning_rate": 2.6982053433381048e-05,
210
- "loss": 0.2446,
211
- "step": 1400
212
- },
213
- {
214
- "epoch": 1.4242115971515767,
215
- "eval_loss": 0.17289136350154877,
216
- "eval_runtime": 80.8694,
217
- "eval_samples_per_second": 194.338,
218
- "eval_steps_per_second": 24.298,
219
- "step": 1400
220
- },
221
- {
222
- "epoch": 1.5259409969481181,
223
- "grad_norm": 0.5540690422058105,
224
- "learning_rate": 2.4320949813410496e-05,
225
- "loss": 0.2417,
226
- "step": 1500
227
- },
228
- {
229
- "epoch": 1.5259409969481181,
230
- "eval_loss": 0.1698693186044693,
231
- "eval_runtime": 80.754,
232
- "eval_samples_per_second": 194.616,
233
- "eval_steps_per_second": 24.333,
234
- "step": 1500
235
- },
236
- {
237
- "epoch": 1.627670396744659,
238
- "grad_norm": 0.6591632962226868,
239
- "learning_rate": 2.166754531731575e-05,
240
- "loss": 0.2312,
241
- "step": 1600
242
- },
243
- {
244
- "epoch": 1.627670396744659,
245
- "eval_loss": 0.16674023866653442,
246
- "eval_runtime": 80.7008,
247
- "eval_samples_per_second": 194.744,
248
- "eval_steps_per_second": 24.349,
249
- "step": 1600
250
- },
251
- {
252
- "epoch": 1.7293997965412005,
253
- "grad_norm": 1.1154977083206177,
254
- "learning_rate": 1.90519244518262e-05,
255
- "loss": 0.2362,
256
- "step": 1700
257
- },
258
- {
259
- "epoch": 1.7293997965412005,
260
- "eval_loss": 0.16516855359077454,
261
- "eval_runtime": 80.7504,
262
- "eval_samples_per_second": 194.624,
263
- "eval_steps_per_second": 24.334,
264
- "step": 1700
265
- },
266
- {
267
- "epoch": 1.8311291963377416,
268
- "grad_norm": 0.6043060421943665,
269
- "learning_rate": 1.6503743329926174e-05,
270
- "loss": 0.2308,
271
- "step": 1800
272
- },
273
- {
274
- "epoch": 1.8311291963377416,
275
- "eval_loss": 0.16346405446529388,
276
- "eval_runtime": 80.6619,
277
- "eval_samples_per_second": 194.838,
278
- "eval_steps_per_second": 24.361,
279
- "step": 1800
280
- },
281
- {
282
- "epoch": 1.9328585961342828,
283
- "grad_norm": 0.6331253051757812,
284
- "learning_rate": 1.4051893427537416e-05,
285
- "loss": 0.2278,
286
- "step": 1900
287
- },
288
- {
289
- "epoch": 1.9328585961342828,
290
- "eval_loss": 0.16204476356506348,
291
- "eval_runtime": 80.7035,
292
- "eval_samples_per_second": 194.738,
293
- "eval_steps_per_second": 24.348,
294
- "step": 1900
295
- }
296
- ],
297
- "logging_steps": 100,
298
- "max_steps": 2949,
299
- "num_input_tokens_seen": 0,
300
- "num_train_epochs": 3,
301
- "save_steps": 500,
302
- "stateful_callbacks": {
303
- "TrainerControl": {
304
- "args": {
305
- "should_epoch_stop": false,
306
- "should_evaluate": false,
307
- "should_log": false,
308
- "should_save": true,
309
- "should_training_stop": false
310
- },
311
- "attributes": {}
312
- }
313
- },
314
- "total_flos": 8507720982331392.0,
315
- "train_batch_size": 64,
316
- "trial_name": null,
317
- "trial_params": null
318
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cd7d483f5b00f9b5d354c02efd4836ba470c8d241b371b36d550ab140ed4cb8
3
- size 5304