SANDEEPNADESAN commited on
Commit
6a0b7ee
·
verified ·
1 Parent(s): a1428e0

Initial model upload

Browse files
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "total_flos": 4.64951937687552e+17,
4
+ "train_loss": 0.3516346867879232,
5
+ "train_runtime": 232.2268,
6
+ "train_samples_per_second": 25.837,
7
+ "train_steps_per_second": 1.615
8
+ }
checkpoint-350/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ViTForImageClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "dtype": "float32",
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Normal",
13
+ "1": "Stroke"
14
+ },
15
+ "image_size": 224,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "Normal": "0",
20
+ "Stroke": "1"
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "model_type": "vit",
24
+ "num_attention_heads": 12,
25
+ "num_channels": 3,
26
+ "num_hidden_layers": 12,
27
+ "patch_size": 16,
28
+ "pooler_act": "tanh",
29
+ "pooler_output_size": 768,
30
+ "problem_type": "single_label_classification",
31
+ "qkv_bias": true,
32
+ "transformers_version": "4.56.1"
33
+ }
checkpoint-350/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cbf166d41eee416d013784388cb6329a093caa28fe2a65c40b639264d1cb02c
3
+ size 343223968
checkpoint-350/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbbe42e500d13c5b12208fbb0188e4a13c3ea5ab070c26df14185484b5d88f63
3
+ size 686572107
checkpoint-350/preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "resample": 2,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 224,
21
+ "width": 224
22
+ }
23
+ }
checkpoint-350/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17cd930da9783ca70bad4b9cdeee6a06c0acea8f34645a333c93341f487f66a3
3
+ size 14645
checkpoint-350/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7a3b7c7f6ae3d4b9e2fcb0774d91cea73c152177e3f6818dfc11db3a9a74275
3
+ size 1383
checkpoint-350/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47cfe82f13a5be2b56585c903bfd0022928fc105dd431787360f5e72fbf89c56
3
+ size 1465
checkpoint-350/trainer_state.json ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 350,
3
+ "best_metric": 0.19462016224861145,
4
+ "best_model_checkpoint": "./vit-stroke-detector/checkpoint-350",
5
+ "epoch": 2.8,
6
+ "eval_steps": 50,
7
+ "global_step": 350,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.08,
14
+ "grad_norm": 0.828596830368042,
15
+ "learning_rate": 0.0001952,
16
+ "loss": 0.6849,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.16,
21
+ "grad_norm": 1.6220552921295166,
22
+ "learning_rate": 0.00018986666666666668,
23
+ "loss": 0.6632,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.24,
28
+ "grad_norm": 1.931017279624939,
29
+ "learning_rate": 0.00018453333333333334,
30
+ "loss": 0.6737,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.32,
35
+ "grad_norm": 0.5325367450714111,
36
+ "learning_rate": 0.00017920000000000002,
37
+ "loss": 0.6441,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.4,
42
+ "grad_norm": 0.20815162360668182,
43
+ "learning_rate": 0.00017386666666666667,
44
+ "loss": 0.6959,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.4,
49
+ "eval_accuracy": 0.6007984031936128,
50
+ "eval_loss": 0.6659401059150696,
51
+ "eval_runtime": 12.6028,
52
+ "eval_samples_per_second": 39.753,
53
+ "eval_steps_per_second": 4.999,
54
+ "step": 50
55
+ },
56
+ {
57
+ "epoch": 0.48,
58
+ "grad_norm": 0.6144075393676758,
59
+ "learning_rate": 0.00016853333333333336,
60
+ "loss": 0.6542,
61
+ "step": 60
62
+ },
63
+ {
64
+ "epoch": 0.56,
65
+ "grad_norm": 0.3622696101665497,
66
+ "learning_rate": 0.0001632,
67
+ "loss": 0.6336,
68
+ "step": 70
69
+ },
70
+ {
71
+ "epoch": 0.64,
72
+ "grad_norm": 1.7294650077819824,
73
+ "learning_rate": 0.00015786666666666666,
74
+ "loss": 0.6705,
75
+ "step": 80
76
+ },
77
+ {
78
+ "epoch": 0.72,
79
+ "grad_norm": 1.2596920728683472,
80
+ "learning_rate": 0.00015253333333333335,
81
+ "loss": 0.6144,
82
+ "step": 90
83
+ },
84
+ {
85
+ "epoch": 0.8,
86
+ "grad_norm": 2.432494640350342,
87
+ "learning_rate": 0.0001472,
88
+ "loss": 0.6038,
89
+ "step": 100
90
+ },
91
+ {
92
+ "epoch": 0.8,
93
+ "eval_accuracy": 0.624750499001996,
94
+ "eval_loss": 0.6349905133247375,
95
+ "eval_runtime": 4.8798,
96
+ "eval_samples_per_second": 102.668,
97
+ "eval_steps_per_second": 12.91,
98
+ "step": 100
99
+ },
100
+ {
101
+ "epoch": 0.88,
102
+ "grad_norm": 0.8303850293159485,
103
+ "learning_rate": 0.00014186666666666668,
104
+ "loss": 0.5625,
105
+ "step": 110
106
+ },
107
+ {
108
+ "epoch": 0.96,
109
+ "grad_norm": 1.7314858436584473,
110
+ "learning_rate": 0.00013653333333333334,
111
+ "loss": 0.4656,
112
+ "step": 120
113
+ },
114
+ {
115
+ "epoch": 1.04,
116
+ "grad_norm": 2.2635459899902344,
117
+ "learning_rate": 0.00013120000000000002,
118
+ "loss": 0.401,
119
+ "step": 130
120
+ },
121
+ {
122
+ "epoch": 1.12,
123
+ "grad_norm": 1.9528553485870361,
124
+ "learning_rate": 0.00012586666666666667,
125
+ "loss": 0.5874,
126
+ "step": 140
127
+ },
128
+ {
129
+ "epoch": 1.2,
130
+ "grad_norm": 2.2214958667755127,
131
+ "learning_rate": 0.00012053333333333334,
132
+ "loss": 0.4041,
133
+ "step": 150
134
+ },
135
+ {
136
+ "epoch": 1.2,
137
+ "eval_accuracy": 0.7904191616766467,
138
+ "eval_loss": 0.5236000418663025,
139
+ "eval_runtime": 6.2426,
140
+ "eval_samples_per_second": 80.255,
141
+ "eval_steps_per_second": 10.092,
142
+ "step": 150
143
+ },
144
+ {
145
+ "epoch": 1.28,
146
+ "grad_norm": 1.578341007232666,
147
+ "learning_rate": 0.0001152,
148
+ "loss": 0.4256,
149
+ "step": 160
150
+ },
151
+ {
152
+ "epoch": 1.3599999999999999,
153
+ "grad_norm": 0.521945059299469,
154
+ "learning_rate": 0.00010986666666666668,
155
+ "loss": 0.3168,
156
+ "step": 170
157
+ },
158
+ {
159
+ "epoch": 1.44,
160
+ "grad_norm": 1.1707364320755005,
161
+ "learning_rate": 0.00010453333333333333,
162
+ "loss": 0.4316,
163
+ "step": 180
164
+ },
165
+ {
166
+ "epoch": 1.52,
167
+ "grad_norm": 1.4519349336624146,
168
+ "learning_rate": 9.92e-05,
169
+ "loss": 0.2944,
170
+ "step": 190
171
+ },
172
+ {
173
+ "epoch": 1.6,
174
+ "grad_norm": 1.0237702131271362,
175
+ "learning_rate": 9.386666666666667e-05,
176
+ "loss": 0.3243,
177
+ "step": 200
178
+ },
179
+ {
180
+ "epoch": 1.6,
181
+ "eval_accuracy": 0.874251497005988,
182
+ "eval_loss": 0.3280556797981262,
183
+ "eval_runtime": 4.9813,
184
+ "eval_samples_per_second": 100.577,
185
+ "eval_steps_per_second": 12.647,
186
+ "step": 200
187
+ },
188
+ {
189
+ "epoch": 1.6800000000000002,
190
+ "grad_norm": 4.305712699890137,
191
+ "learning_rate": 8.853333333333333e-05,
192
+ "loss": 0.3118,
193
+ "step": 210
194
+ },
195
+ {
196
+ "epoch": 1.76,
197
+ "grad_norm": 1.6635700464248657,
198
+ "learning_rate": 8.32e-05,
199
+ "loss": 0.2534,
200
+ "step": 220
201
+ },
202
+ {
203
+ "epoch": 1.8399999999999999,
204
+ "grad_norm": 1.5301051139831543,
205
+ "learning_rate": 7.786666666666667e-05,
206
+ "loss": 0.2702,
207
+ "step": 230
208
+ },
209
+ {
210
+ "epoch": 1.92,
211
+ "grad_norm": 0.7002310156822205,
212
+ "learning_rate": 7.253333333333334e-05,
213
+ "loss": 0.1926,
214
+ "step": 240
215
+ },
216
+ {
217
+ "epoch": 2.0,
218
+ "grad_norm": 0.4949961304664612,
219
+ "learning_rate": 6.720000000000001e-05,
220
+ "loss": 0.2041,
221
+ "step": 250
222
+ },
223
+ {
224
+ "epoch": 2.0,
225
+ "eval_accuracy": 0.8922155688622755,
226
+ "eval_loss": 0.28237032890319824,
227
+ "eval_runtime": 4.9486,
228
+ "eval_samples_per_second": 101.242,
229
+ "eval_steps_per_second": 12.731,
230
+ "step": 250
231
+ },
232
+ {
233
+ "epoch": 2.08,
234
+ "grad_norm": 13.5546236038208,
235
+ "learning_rate": 6.186666666666668e-05,
236
+ "loss": 0.173,
237
+ "step": 260
238
+ },
239
+ {
240
+ "epoch": 2.16,
241
+ "grad_norm": 7.708662986755371,
242
+ "learning_rate": 5.6533333333333336e-05,
243
+ "loss": 0.1206,
244
+ "step": 270
245
+ },
246
+ {
247
+ "epoch": 2.24,
248
+ "grad_norm": 0.44631195068359375,
249
+ "learning_rate": 5.1200000000000004e-05,
250
+ "loss": 0.1615,
251
+ "step": 280
252
+ },
253
+ {
254
+ "epoch": 2.32,
255
+ "grad_norm": 1.9430702924728394,
256
+ "learning_rate": 4.5866666666666666e-05,
257
+ "loss": 0.1196,
258
+ "step": 290
259
+ },
260
+ {
261
+ "epoch": 2.4,
262
+ "grad_norm": 2.0821869373321533,
263
+ "learning_rate": 4.0533333333333334e-05,
264
+ "loss": 0.117,
265
+ "step": 300
266
+ },
267
+ {
268
+ "epoch": 2.4,
269
+ "eval_accuracy": 0.9161676646706587,
270
+ "eval_loss": 0.22871683537960052,
271
+ "eval_runtime": 4.8973,
272
+ "eval_samples_per_second": 102.302,
273
+ "eval_steps_per_second": 12.864,
274
+ "step": 300
275
+ },
276
+ {
277
+ "epoch": 2.48,
278
+ "grad_norm": 9.634779930114746,
279
+ "learning_rate": 3.52e-05,
280
+ "loss": 0.0606,
281
+ "step": 310
282
+ },
283
+ {
284
+ "epoch": 2.56,
285
+ "grad_norm": 6.319236755371094,
286
+ "learning_rate": 2.986666666666667e-05,
287
+ "loss": 0.0774,
288
+ "step": 320
289
+ },
290
+ {
291
+ "epoch": 2.64,
292
+ "grad_norm": 9.121068000793457,
293
+ "learning_rate": 2.4533333333333334e-05,
294
+ "loss": 0.06,
295
+ "step": 330
296
+ },
297
+ {
298
+ "epoch": 2.7199999999999998,
299
+ "grad_norm": 3.4029245376586914,
300
+ "learning_rate": 1.9200000000000003e-05,
301
+ "loss": 0.0997,
302
+ "step": 340
303
+ },
304
+ {
305
+ "epoch": 2.8,
306
+ "grad_norm": 0.8972381353378296,
307
+ "learning_rate": 1.3866666666666667e-05,
308
+ "loss": 0.0353,
309
+ "step": 350
310
+ },
311
+ {
312
+ "epoch": 2.8,
313
+ "eval_accuracy": 0.9401197604790419,
314
+ "eval_loss": 0.19462016224861145,
315
+ "eval_runtime": 5.5189,
316
+ "eval_samples_per_second": 90.78,
317
+ "eval_steps_per_second": 11.415,
318
+ "step": 350
319
+ }
320
+ ],
321
+ "logging_steps": 10,
322
+ "max_steps": 375,
323
+ "num_input_tokens_seen": 0,
324
+ "num_train_epochs": 3,
325
+ "save_steps": 50,
326
+ "stateful_callbacks": {
327
+ "TrainerControl": {
328
+ "args": {
329
+ "should_epoch_stop": false,
330
+ "should_evaluate": false,
331
+ "should_log": false,
332
+ "should_save": true,
333
+ "should_training_stop": false
334
+ },
335
+ "attributes": {}
336
+ }
337
+ },
338
+ "total_flos": 4.339551418417152e+17,
339
+ "train_batch_size": 16,
340
+ "trial_name": null,
341
+ "trial_params": null
342
+ }
checkpoint-350/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8317ceaa60875571340e1a38d19c2d04367ed0d503c395ed80f409547047289
3
+ size 5777
checkpoint-375/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ViTForImageClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "dtype": "float32",
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Normal",
13
+ "1": "Stroke"
14
+ },
15
+ "image_size": 224,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "Normal": "0",
20
+ "Stroke": "1"
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "model_type": "vit",
24
+ "num_attention_heads": 12,
25
+ "num_channels": 3,
26
+ "num_hidden_layers": 12,
27
+ "patch_size": 16,
28
+ "pooler_act": "tanh",
29
+ "pooler_output_size": 768,
30
+ "problem_type": "single_label_classification",
31
+ "qkv_bias": true,
32
+ "transformers_version": "4.56.1"
33
+ }
checkpoint-375/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a067b33266d306c455b262c2efccd9cc3fdf3a1dff8cb6dcd5f5446c36d89ea
3
+ size 343223968
checkpoint-375/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8193b1640fc6ef97c9dbc111117e2e562c7212f79c0bc5e5c084a6ca5310ca9
3
+ size 686572107
checkpoint-375/preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "resample": 2,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 224,
21
+ "width": 224
22
+ }
23
+ }
checkpoint-375/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17cd930da9783ca70bad4b9cdeee6a06c0acea8f34645a333c93341f487f66a3
3
+ size 14645
checkpoint-375/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac58ef7454dc327cdd1497d560e3c1cadc5881183c106e69fda6701af10248e8
3
+ size 1383
checkpoint-375/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:842c9cfc377474f3cc400aa68eb12f46fcbb5333ee717560db8d34b4443de63e
3
+ size 1465
checkpoint-375/trainer_state.json ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 350,
3
+ "best_metric": 0.19462016224861145,
4
+ "best_model_checkpoint": "./vit-stroke-detector/checkpoint-350",
5
+ "epoch": 3.0,
6
+ "eval_steps": 50,
7
+ "global_step": 375,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.08,
14
+ "grad_norm": 0.828596830368042,
15
+ "learning_rate": 0.0001952,
16
+ "loss": 0.6849,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.16,
21
+ "grad_norm": 1.6220552921295166,
22
+ "learning_rate": 0.00018986666666666668,
23
+ "loss": 0.6632,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.24,
28
+ "grad_norm": 1.931017279624939,
29
+ "learning_rate": 0.00018453333333333334,
30
+ "loss": 0.6737,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.32,
35
+ "grad_norm": 0.5325367450714111,
36
+ "learning_rate": 0.00017920000000000002,
37
+ "loss": 0.6441,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.4,
42
+ "grad_norm": 0.20815162360668182,
43
+ "learning_rate": 0.00017386666666666667,
44
+ "loss": 0.6959,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.4,
49
+ "eval_accuracy": 0.6007984031936128,
50
+ "eval_loss": 0.6659401059150696,
51
+ "eval_runtime": 12.6028,
52
+ "eval_samples_per_second": 39.753,
53
+ "eval_steps_per_second": 4.999,
54
+ "step": 50
55
+ },
56
+ {
57
+ "epoch": 0.48,
58
+ "grad_norm": 0.6144075393676758,
59
+ "learning_rate": 0.00016853333333333336,
60
+ "loss": 0.6542,
61
+ "step": 60
62
+ },
63
+ {
64
+ "epoch": 0.56,
65
+ "grad_norm": 0.3622696101665497,
66
+ "learning_rate": 0.0001632,
67
+ "loss": 0.6336,
68
+ "step": 70
69
+ },
70
+ {
71
+ "epoch": 0.64,
72
+ "grad_norm": 1.7294650077819824,
73
+ "learning_rate": 0.00015786666666666666,
74
+ "loss": 0.6705,
75
+ "step": 80
76
+ },
77
+ {
78
+ "epoch": 0.72,
79
+ "grad_norm": 1.2596920728683472,
80
+ "learning_rate": 0.00015253333333333335,
81
+ "loss": 0.6144,
82
+ "step": 90
83
+ },
84
+ {
85
+ "epoch": 0.8,
86
+ "grad_norm": 2.432494640350342,
87
+ "learning_rate": 0.0001472,
88
+ "loss": 0.6038,
89
+ "step": 100
90
+ },
91
+ {
92
+ "epoch": 0.8,
93
+ "eval_accuracy": 0.624750499001996,
94
+ "eval_loss": 0.6349905133247375,
95
+ "eval_runtime": 4.8798,
96
+ "eval_samples_per_second": 102.668,
97
+ "eval_steps_per_second": 12.91,
98
+ "step": 100
99
+ },
100
+ {
101
+ "epoch": 0.88,
102
+ "grad_norm": 0.8303850293159485,
103
+ "learning_rate": 0.00014186666666666668,
104
+ "loss": 0.5625,
105
+ "step": 110
106
+ },
107
+ {
108
+ "epoch": 0.96,
109
+ "grad_norm": 1.7314858436584473,
110
+ "learning_rate": 0.00013653333333333334,
111
+ "loss": 0.4656,
112
+ "step": 120
113
+ },
114
+ {
115
+ "epoch": 1.04,
116
+ "grad_norm": 2.2635459899902344,
117
+ "learning_rate": 0.00013120000000000002,
118
+ "loss": 0.401,
119
+ "step": 130
120
+ },
121
+ {
122
+ "epoch": 1.12,
123
+ "grad_norm": 1.9528553485870361,
124
+ "learning_rate": 0.00012586666666666667,
125
+ "loss": 0.5874,
126
+ "step": 140
127
+ },
128
+ {
129
+ "epoch": 1.2,
130
+ "grad_norm": 2.2214958667755127,
131
+ "learning_rate": 0.00012053333333333334,
132
+ "loss": 0.4041,
133
+ "step": 150
134
+ },
135
+ {
136
+ "epoch": 1.2,
137
+ "eval_accuracy": 0.7904191616766467,
138
+ "eval_loss": 0.5236000418663025,
139
+ "eval_runtime": 6.2426,
140
+ "eval_samples_per_second": 80.255,
141
+ "eval_steps_per_second": 10.092,
142
+ "step": 150
143
+ },
144
+ {
145
+ "epoch": 1.28,
146
+ "grad_norm": 1.578341007232666,
147
+ "learning_rate": 0.0001152,
148
+ "loss": 0.4256,
149
+ "step": 160
150
+ },
151
+ {
152
+ "epoch": 1.3599999999999999,
153
+ "grad_norm": 0.521945059299469,
154
+ "learning_rate": 0.00010986666666666668,
155
+ "loss": 0.3168,
156
+ "step": 170
157
+ },
158
+ {
159
+ "epoch": 1.44,
160
+ "grad_norm": 1.1707364320755005,
161
+ "learning_rate": 0.00010453333333333333,
162
+ "loss": 0.4316,
163
+ "step": 180
164
+ },
165
+ {
166
+ "epoch": 1.52,
167
+ "grad_norm": 1.4519349336624146,
168
+ "learning_rate": 9.92e-05,
169
+ "loss": 0.2944,
170
+ "step": 190
171
+ },
172
+ {
173
+ "epoch": 1.6,
174
+ "grad_norm": 1.0237702131271362,
175
+ "learning_rate": 9.386666666666667e-05,
176
+ "loss": 0.3243,
177
+ "step": 200
178
+ },
179
+ {
180
+ "epoch": 1.6,
181
+ "eval_accuracy": 0.874251497005988,
182
+ "eval_loss": 0.3280556797981262,
183
+ "eval_runtime": 4.9813,
184
+ "eval_samples_per_second": 100.577,
185
+ "eval_steps_per_second": 12.647,
186
+ "step": 200
187
+ },
188
+ {
189
+ "epoch": 1.6800000000000002,
190
+ "grad_norm": 4.305712699890137,
191
+ "learning_rate": 8.853333333333333e-05,
192
+ "loss": 0.3118,
193
+ "step": 210
194
+ },
195
+ {
196
+ "epoch": 1.76,
197
+ "grad_norm": 1.6635700464248657,
198
+ "learning_rate": 8.32e-05,
199
+ "loss": 0.2534,
200
+ "step": 220
201
+ },
202
+ {
203
+ "epoch": 1.8399999999999999,
204
+ "grad_norm": 1.5301051139831543,
205
+ "learning_rate": 7.786666666666667e-05,
206
+ "loss": 0.2702,
207
+ "step": 230
208
+ },
209
+ {
210
+ "epoch": 1.92,
211
+ "grad_norm": 0.7002310156822205,
212
+ "learning_rate": 7.253333333333334e-05,
213
+ "loss": 0.1926,
214
+ "step": 240
215
+ },
216
+ {
217
+ "epoch": 2.0,
218
+ "grad_norm": 0.4949961304664612,
219
+ "learning_rate": 6.720000000000001e-05,
220
+ "loss": 0.2041,
221
+ "step": 250
222
+ },
223
+ {
224
+ "epoch": 2.0,
225
+ "eval_accuracy": 0.8922155688622755,
226
+ "eval_loss": 0.28237032890319824,
227
+ "eval_runtime": 4.9486,
228
+ "eval_samples_per_second": 101.242,
229
+ "eval_steps_per_second": 12.731,
230
+ "step": 250
231
+ },
232
+ {
233
+ "epoch": 2.08,
234
+ "grad_norm": 13.5546236038208,
235
+ "learning_rate": 6.186666666666668e-05,
236
+ "loss": 0.173,
237
+ "step": 260
238
+ },
239
+ {
240
+ "epoch": 2.16,
241
+ "grad_norm": 7.708662986755371,
242
+ "learning_rate": 5.6533333333333336e-05,
243
+ "loss": 0.1206,
244
+ "step": 270
245
+ },
246
+ {
247
+ "epoch": 2.24,
248
+ "grad_norm": 0.44631195068359375,
249
+ "learning_rate": 5.1200000000000004e-05,
250
+ "loss": 0.1615,
251
+ "step": 280
252
+ },
253
+ {
254
+ "epoch": 2.32,
255
+ "grad_norm": 1.9430702924728394,
256
+ "learning_rate": 4.5866666666666666e-05,
257
+ "loss": 0.1196,
258
+ "step": 290
259
+ },
260
+ {
261
+ "epoch": 2.4,
262
+ "grad_norm": 2.0821869373321533,
263
+ "learning_rate": 4.0533333333333334e-05,
264
+ "loss": 0.117,
265
+ "step": 300
266
+ },
267
+ {
268
+ "epoch": 2.4,
269
+ "eval_accuracy": 0.9161676646706587,
270
+ "eval_loss": 0.22871683537960052,
271
+ "eval_runtime": 4.8973,
272
+ "eval_samples_per_second": 102.302,
273
+ "eval_steps_per_second": 12.864,
274
+ "step": 300
275
+ },
276
+ {
277
+ "epoch": 2.48,
278
+ "grad_norm": 9.634779930114746,
279
+ "learning_rate": 3.52e-05,
280
+ "loss": 0.0606,
281
+ "step": 310
282
+ },
283
+ {
284
+ "epoch": 2.56,
285
+ "grad_norm": 6.319236755371094,
286
+ "learning_rate": 2.986666666666667e-05,
287
+ "loss": 0.0774,
288
+ "step": 320
289
+ },
290
+ {
291
+ "epoch": 2.64,
292
+ "grad_norm": 9.121068000793457,
293
+ "learning_rate": 2.4533333333333334e-05,
294
+ "loss": 0.06,
295
+ "step": 330
296
+ },
297
+ {
298
+ "epoch": 2.7199999999999998,
299
+ "grad_norm": 3.4029245376586914,
300
+ "learning_rate": 1.9200000000000003e-05,
301
+ "loss": 0.0997,
302
+ "step": 340
303
+ },
304
+ {
305
+ "epoch": 2.8,
306
+ "grad_norm": 0.8972381353378296,
307
+ "learning_rate": 1.3866666666666667e-05,
308
+ "loss": 0.0353,
309
+ "step": 350
310
+ },
311
+ {
312
+ "epoch": 2.8,
313
+ "eval_accuracy": 0.9401197604790419,
314
+ "eval_loss": 0.19462016224861145,
315
+ "eval_runtime": 5.5189,
316
+ "eval_samples_per_second": 90.78,
317
+ "eval_steps_per_second": 11.415,
318
+ "step": 350
319
+ },
320
+ {
321
+ "epoch": 2.88,
322
+ "grad_norm": 0.1985252946615219,
323
+ "learning_rate": 8.533333333333334e-06,
324
+ "loss": 0.0833,
325
+ "step": 360
326
+ },
327
+ {
328
+ "epoch": 2.96,
329
+ "grad_norm": 0.129100501537323,
330
+ "learning_rate": 3.2000000000000003e-06,
331
+ "loss": 0.0775,
332
+ "step": 370
333
+ }
334
+ ],
335
+ "logging_steps": 10,
336
+ "max_steps": 375,
337
+ "num_input_tokens_seen": 0,
338
+ "num_train_epochs": 3,
339
+ "save_steps": 50,
340
+ "stateful_callbacks": {
341
+ "TrainerControl": {
342
+ "args": {
343
+ "should_epoch_stop": false,
344
+ "should_evaluate": false,
345
+ "should_log": false,
346
+ "should_save": true,
347
+ "should_training_stop": true
348
+ },
349
+ "attributes": {}
350
+ }
351
+ },
352
+ "total_flos": 4.64951937687552e+17,
353
+ "train_batch_size": 16,
354
+ "trial_name": null,
355
+ "trial_params": null
356
+ }
checkpoint-375/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8317ceaa60875571340e1a38d19c2d04367ed0d503c395ed80f409547047289
3
+ size 5777
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ViTForImageClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "dtype": "float32",
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Normal",
13
+ "1": "Stroke"
14
+ },
15
+ "image_size": 224,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "Normal": "0",
20
+ "Stroke": "1"
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "model_type": "vit",
24
+ "num_attention_heads": 12,
25
+ "num_channels": 3,
26
+ "num_hidden_layers": 12,
27
+ "patch_size": 16,
28
+ "pooler_act": "tanh",
29
+ "pooler_output_size": 768,
30
+ "problem_type": "single_label_classification",
31
+ "qkv_bias": true,
32
+ "transformers_version": "4.56.1"
33
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cbf166d41eee416d013784388cb6329a093caa28fe2a65c40b639264d1cb02c
3
+ size 343223968
preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "resample": 2,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 224,
21
+ "width": 224
22
+ }
23
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "total_flos": 4.64951937687552e+17,
4
+ "train_loss": 0.3516346867879232,
5
+ "train_runtime": 232.2268,
6
+ "train_samples_per_second": 25.837,
7
+ "train_steps_per_second": 1.615
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 350,
3
+ "best_metric": 0.19462016224861145,
4
+ "best_model_checkpoint": "./vit-stroke-detector/checkpoint-350",
5
+ "epoch": 3.0,
6
+ "eval_steps": 50,
7
+ "global_step": 375,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.08,
14
+ "grad_norm": 0.828596830368042,
15
+ "learning_rate": 0.0001952,
16
+ "loss": 0.6849,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.16,
21
+ "grad_norm": 1.6220552921295166,
22
+ "learning_rate": 0.00018986666666666668,
23
+ "loss": 0.6632,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.24,
28
+ "grad_norm": 1.931017279624939,
29
+ "learning_rate": 0.00018453333333333334,
30
+ "loss": 0.6737,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.32,
35
+ "grad_norm": 0.5325367450714111,
36
+ "learning_rate": 0.00017920000000000002,
37
+ "loss": 0.6441,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.4,
42
+ "grad_norm": 0.20815162360668182,
43
+ "learning_rate": 0.00017386666666666667,
44
+ "loss": 0.6959,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.4,
49
+ "eval_accuracy": 0.6007984031936128,
50
+ "eval_loss": 0.6659401059150696,
51
+ "eval_runtime": 12.6028,
52
+ "eval_samples_per_second": 39.753,
53
+ "eval_steps_per_second": 4.999,
54
+ "step": 50
55
+ },
56
+ {
57
+ "epoch": 0.48,
58
+ "grad_norm": 0.6144075393676758,
59
+ "learning_rate": 0.00016853333333333336,
60
+ "loss": 0.6542,
61
+ "step": 60
62
+ },
63
+ {
64
+ "epoch": 0.56,
65
+ "grad_norm": 0.3622696101665497,
66
+ "learning_rate": 0.0001632,
67
+ "loss": 0.6336,
68
+ "step": 70
69
+ },
70
+ {
71
+ "epoch": 0.64,
72
+ "grad_norm": 1.7294650077819824,
73
+ "learning_rate": 0.00015786666666666666,
74
+ "loss": 0.6705,
75
+ "step": 80
76
+ },
77
+ {
78
+ "epoch": 0.72,
79
+ "grad_norm": 1.2596920728683472,
80
+ "learning_rate": 0.00015253333333333335,
81
+ "loss": 0.6144,
82
+ "step": 90
83
+ },
84
+ {
85
+ "epoch": 0.8,
86
+ "grad_norm": 2.432494640350342,
87
+ "learning_rate": 0.0001472,
88
+ "loss": 0.6038,
89
+ "step": 100
90
+ },
91
+ {
92
+ "epoch": 0.8,
93
+ "eval_accuracy": 0.624750499001996,
94
+ "eval_loss": 0.6349905133247375,
95
+ "eval_runtime": 4.8798,
96
+ "eval_samples_per_second": 102.668,
97
+ "eval_steps_per_second": 12.91,
98
+ "step": 100
99
+ },
100
+ {
101
+ "epoch": 0.88,
102
+ "grad_norm": 0.8303850293159485,
103
+ "learning_rate": 0.00014186666666666668,
104
+ "loss": 0.5625,
105
+ "step": 110
106
+ },
107
+ {
108
+ "epoch": 0.96,
109
+ "grad_norm": 1.7314858436584473,
110
+ "learning_rate": 0.00013653333333333334,
111
+ "loss": 0.4656,
112
+ "step": 120
113
+ },
114
+ {
115
+ "epoch": 1.04,
116
+ "grad_norm": 2.2635459899902344,
117
+ "learning_rate": 0.00013120000000000002,
118
+ "loss": 0.401,
119
+ "step": 130
120
+ },
121
+ {
122
+ "epoch": 1.12,
123
+ "grad_norm": 1.9528553485870361,
124
+ "learning_rate": 0.00012586666666666667,
125
+ "loss": 0.5874,
126
+ "step": 140
127
+ },
128
+ {
129
+ "epoch": 1.2,
130
+ "grad_norm": 2.2214958667755127,
131
+ "learning_rate": 0.00012053333333333334,
132
+ "loss": 0.4041,
133
+ "step": 150
134
+ },
135
+ {
136
+ "epoch": 1.2,
137
+ "eval_accuracy": 0.7904191616766467,
138
+ "eval_loss": 0.5236000418663025,
139
+ "eval_runtime": 6.2426,
140
+ "eval_samples_per_second": 80.255,
141
+ "eval_steps_per_second": 10.092,
142
+ "step": 150
143
+ },
144
+ {
145
+ "epoch": 1.28,
146
+ "grad_norm": 1.578341007232666,
147
+ "learning_rate": 0.0001152,
148
+ "loss": 0.4256,
149
+ "step": 160
150
+ },
151
+ {
152
+ "epoch": 1.3599999999999999,
153
+ "grad_norm": 0.521945059299469,
154
+ "learning_rate": 0.00010986666666666668,
155
+ "loss": 0.3168,
156
+ "step": 170
157
+ },
158
+ {
159
+ "epoch": 1.44,
160
+ "grad_norm": 1.1707364320755005,
161
+ "learning_rate": 0.00010453333333333333,
162
+ "loss": 0.4316,
163
+ "step": 180
164
+ },
165
+ {
166
+ "epoch": 1.52,
167
+ "grad_norm": 1.4519349336624146,
168
+ "learning_rate": 9.92e-05,
169
+ "loss": 0.2944,
170
+ "step": 190
171
+ },
172
+ {
173
+ "epoch": 1.6,
174
+ "grad_norm": 1.0237702131271362,
175
+ "learning_rate": 9.386666666666667e-05,
176
+ "loss": 0.3243,
177
+ "step": 200
178
+ },
179
+ {
180
+ "epoch": 1.6,
181
+ "eval_accuracy": 0.874251497005988,
182
+ "eval_loss": 0.3280556797981262,
183
+ "eval_runtime": 4.9813,
184
+ "eval_samples_per_second": 100.577,
185
+ "eval_steps_per_second": 12.647,
186
+ "step": 200
187
+ },
188
+ {
189
+ "epoch": 1.6800000000000002,
190
+ "grad_norm": 4.305712699890137,
191
+ "learning_rate": 8.853333333333333e-05,
192
+ "loss": 0.3118,
193
+ "step": 210
194
+ },
195
+ {
196
+ "epoch": 1.76,
197
+ "grad_norm": 1.6635700464248657,
198
+ "learning_rate": 8.32e-05,
199
+ "loss": 0.2534,
200
+ "step": 220
201
+ },
202
+ {
203
+ "epoch": 1.8399999999999999,
204
+ "grad_norm": 1.5301051139831543,
205
+ "learning_rate": 7.786666666666667e-05,
206
+ "loss": 0.2702,
207
+ "step": 230
208
+ },
209
+ {
210
+ "epoch": 1.92,
211
+ "grad_norm": 0.7002310156822205,
212
+ "learning_rate": 7.253333333333334e-05,
213
+ "loss": 0.1926,
214
+ "step": 240
215
+ },
216
+ {
217
+ "epoch": 2.0,
218
+ "grad_norm": 0.4949961304664612,
219
+ "learning_rate": 6.720000000000001e-05,
220
+ "loss": 0.2041,
221
+ "step": 250
222
+ },
223
+ {
224
+ "epoch": 2.0,
225
+ "eval_accuracy": 0.8922155688622755,
226
+ "eval_loss": 0.28237032890319824,
227
+ "eval_runtime": 4.9486,
228
+ "eval_samples_per_second": 101.242,
229
+ "eval_steps_per_second": 12.731,
230
+ "step": 250
231
+ },
232
+ {
233
+ "epoch": 2.08,
234
+ "grad_norm": 13.5546236038208,
235
+ "learning_rate": 6.186666666666668e-05,
236
+ "loss": 0.173,
237
+ "step": 260
238
+ },
239
+ {
240
+ "epoch": 2.16,
241
+ "grad_norm": 7.708662986755371,
242
+ "learning_rate": 5.6533333333333336e-05,
243
+ "loss": 0.1206,
244
+ "step": 270
245
+ },
246
+ {
247
+ "epoch": 2.24,
248
+ "grad_norm": 0.44631195068359375,
249
+ "learning_rate": 5.1200000000000004e-05,
250
+ "loss": 0.1615,
251
+ "step": 280
252
+ },
253
+ {
254
+ "epoch": 2.32,
255
+ "grad_norm": 1.9430702924728394,
256
+ "learning_rate": 4.5866666666666666e-05,
257
+ "loss": 0.1196,
258
+ "step": 290
259
+ },
260
+ {
261
+ "epoch": 2.4,
262
+ "grad_norm": 2.0821869373321533,
263
+ "learning_rate": 4.0533333333333334e-05,
264
+ "loss": 0.117,
265
+ "step": 300
266
+ },
267
+ {
268
+ "epoch": 2.4,
269
+ "eval_accuracy": 0.9161676646706587,
270
+ "eval_loss": 0.22871683537960052,
271
+ "eval_runtime": 4.8973,
272
+ "eval_samples_per_second": 102.302,
273
+ "eval_steps_per_second": 12.864,
274
+ "step": 300
275
+ },
276
+ {
277
+ "epoch": 2.48,
278
+ "grad_norm": 9.634779930114746,
279
+ "learning_rate": 3.52e-05,
280
+ "loss": 0.0606,
281
+ "step": 310
282
+ },
283
+ {
284
+ "epoch": 2.56,
285
+ "grad_norm": 6.319236755371094,
286
+ "learning_rate": 2.986666666666667e-05,
287
+ "loss": 0.0774,
288
+ "step": 320
289
+ },
290
+ {
291
+ "epoch": 2.64,
292
+ "grad_norm": 9.121068000793457,
293
+ "learning_rate": 2.4533333333333334e-05,
294
+ "loss": 0.06,
295
+ "step": 330
296
+ },
297
+ {
298
+ "epoch": 2.7199999999999998,
299
+ "grad_norm": 3.4029245376586914,
300
+ "learning_rate": 1.9200000000000003e-05,
301
+ "loss": 0.0997,
302
+ "step": 340
303
+ },
304
+ {
305
+ "epoch": 2.8,
306
+ "grad_norm": 0.8972381353378296,
307
+ "learning_rate": 1.3866666666666667e-05,
308
+ "loss": 0.0353,
309
+ "step": 350
310
+ },
311
+ {
312
+ "epoch": 2.8,
313
+ "eval_accuracy": 0.9401197604790419,
314
+ "eval_loss": 0.19462016224861145,
315
+ "eval_runtime": 5.5189,
316
+ "eval_samples_per_second": 90.78,
317
+ "eval_steps_per_second": 11.415,
318
+ "step": 350
319
+ },
320
+ {
321
+ "epoch": 2.88,
322
+ "grad_norm": 0.1985252946615219,
323
+ "learning_rate": 8.533333333333334e-06,
324
+ "loss": 0.0833,
325
+ "step": 360
326
+ },
327
+ {
328
+ "epoch": 2.96,
329
+ "grad_norm": 0.129100501537323,
330
+ "learning_rate": 3.2000000000000003e-06,
331
+ "loss": 0.0775,
332
+ "step": 370
333
+ },
334
+ {
335
+ "epoch": 3.0,
336
+ "step": 375,
337
+ "total_flos": 4.64951937687552e+17,
338
+ "train_loss": 0.3516346867879232,
339
+ "train_runtime": 232.2268,
340
+ "train_samples_per_second": 25.837,
341
+ "train_steps_per_second": 1.615
342
+ }
343
+ ],
344
+ "logging_steps": 10,
345
+ "max_steps": 375,
346
+ "num_input_tokens_seen": 0,
347
+ "num_train_epochs": 3,
348
+ "save_steps": 50,
349
+ "stateful_callbacks": {
350
+ "TrainerControl": {
351
+ "args": {
352
+ "should_epoch_stop": false,
353
+ "should_evaluate": false,
354
+ "should_log": false,
355
+ "should_save": true,
356
+ "should_training_stop": true
357
+ },
358
+ "attributes": {}
359
+ }
360
+ },
361
+ "total_flos": 4.64951937687552e+17,
362
+ "train_batch_size": 16,
363
+ "trial_name": null,
364
+ "trial_params": null
365
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8317ceaa60875571340e1a38d19c2d04367ed0d503c395ed80f409547047289
3
+ size 5777