Mattysmittttt commited on
Commit
41f96f5
·
verified ·
1 Parent(s): ec97dbb

prune intermediate checkpoint-581 (optimizer state, not needed for inference)

Browse files
checkpoint-581/config.json DELETED
@@ -1,134 +0,0 @@
1
- {
2
- "apply_layernorm": true,
3
- "architectures": [
4
- "Dinov2ForImageClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.0,
7
- "drop_path_rate": 0.0,
8
- "dtype": "float32",
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.0,
11
- "hidden_size": 384,
12
- "id2label": {
13
- "0": "us_erdl",
14
- "1": "us_m81_woodland",
15
- "2": "us_dcu_chocolate_chip",
16
- "3": "us_dcu_3color",
17
- "4": "us_marpat_woodland",
18
- "5": "us_marpat_desert",
19
- "6": "us_ucp",
20
- "7": "us_multicam",
21
- "8": "us_ocp_scorpion",
22
- "9": "us_aor1",
23
- "10": "us_aor2",
24
- "11": "us_tigerstripe",
25
- "12": "uk_dpm_woodland",
26
- "13": "uk_dpm_desert",
27
- "14": "uk_mtp",
28
- "15": "de_flecktarn",
29
- "16": "de_tropentarn",
30
- "17": "de_splittertarn",
31
- "18": "ru_klmk",
32
- "19": "ru_ttsko",
33
- "20": "ru_vsr_93",
34
- "21": "ru_emr_digital_flora",
35
- "22": "ru_surpat",
36
- "23": "ru_partizan",
37
- "24": "ca_cadpat_tw",
38
- "25": "ca_cadpat_ar",
39
- "26": "fr_cce",
40
- "27": "fr_daguet",
41
- "28": "it_vegetata",
42
- "29": "au_auscam",
43
- "30": "au_amcu",
44
- "31": "se_m90",
45
- "32": "ch_taz_90",
46
- "33": "no_m75",
47
- "34": "cn_type07_universal",
48
- "35": "cn_type07_desert",
49
- "36": "kr_granite",
50
- "37": "jp_jgsdf",
51
- "38": "commercial_kryptek_mandrake",
52
- "39": "commercial_atacs_au"
53
- },
54
- "image_size": 518,
55
- "initializer_range": 0.02,
56
- "label2id": {
57
- "au_amcu": 30,
58
- "au_auscam": 29,
59
- "ca_cadpat_ar": 25,
60
- "ca_cadpat_tw": 24,
61
- "ch_taz_90": 32,
62
- "cn_type07_desert": 35,
63
- "cn_type07_universal": 34,
64
- "commercial_atacs_au": 39,
65
- "commercial_kryptek_mandrake": 38,
66
- "de_flecktarn": 15,
67
- "de_splittertarn": 17,
68
- "de_tropentarn": 16,
69
- "fr_cce": 26,
70
- "fr_daguet": 27,
71
- "it_vegetata": 28,
72
- "jp_jgsdf": 37,
73
- "kr_granite": 36,
74
- "no_m75": 33,
75
- "ru_emr_digital_flora": 21,
76
- "ru_klmk": 18,
77
- "ru_partizan": 23,
78
- "ru_surpat": 22,
79
- "ru_ttsko": 19,
80
- "ru_vsr_93": 20,
81
- "se_m90": 31,
82
- "uk_dpm_desert": 13,
83
- "uk_dpm_woodland": 12,
84
- "uk_mtp": 14,
85
- "us_aor1": 9,
86
- "us_aor2": 10,
87
- "us_dcu_3color": 3,
88
- "us_dcu_chocolate_chip": 2,
89
- "us_erdl": 0,
90
- "us_m81_woodland": 1,
91
- "us_marpat_desert": 5,
92
- "us_marpat_woodland": 4,
93
- "us_multicam": 7,
94
- "us_ocp_scorpion": 8,
95
- "us_tigerstripe": 11,
96
- "us_ucp": 6
97
- },
98
- "layer_norm_eps": 1e-06,
99
- "layerscale_value": 1.0,
100
- "mlp_ratio": 4,
101
- "model_type": "dinov2",
102
- "num_attention_heads": 6,
103
- "num_channels": 3,
104
- "num_hidden_layers": 12,
105
- "out_features": [
106
- "stage12"
107
- ],
108
- "out_indices": [
109
- 12
110
- ],
111
- "patch_size": 14,
112
- "problem_type": "single_label_classification",
113
- "qkv_bias": true,
114
- "reshape_hidden_states": true,
115
- "stage_names": [
116
- "stem",
117
- "stage1",
118
- "stage2",
119
- "stage3",
120
- "stage4",
121
- "stage5",
122
- "stage6",
123
- "stage7",
124
- "stage8",
125
- "stage9",
126
- "stage10",
127
- "stage11",
128
- "stage12"
129
- ],
130
- "transformers_version": "5.7.0",
131
- "use_cache": false,
132
- "use_mask_token": true,
133
- "use_swiglu_ffn": false
134
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-581/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e70c3561255a6355fb8b3f2bf9f1908dc6f0ffe450a79fc7d9de81ef48dcbd05
3
- size 88374736
 
 
 
 
checkpoint-581/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c530284e3d9d25bc8c2a0cdb085de1f36b99f3bc1de3f53881f0e6bd631ec2a
3
- size 176876939
 
 
 
 
checkpoint-581/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f13fa096c82c4698939c19f78b0766bd5c2a2142c49acdf650aabfa34b99d7f1
3
- size 14455
 
 
 
 
checkpoint-581/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b31335693ae80680c90999bc6cfab87b4543a49278f1c79cd67beaba0cc4cd1
3
- size 1465
 
 
 
 
checkpoint-581/trainer_state.json DELETED
@@ -1,307 +0,0 @@
1
- {
2
- "best_global_step": 581,
3
- "best_metric": 0.7325842696629213,
4
- "best_model_checkpoint": "checkpoints/camonet/checkpoint-581",
5
- "epoch": 7.0,
6
- "eval_steps": 500,
7
- "global_step": 581,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.24096385542168675,
14
- "grad_norm": 111.35398864746094,
15
- "learning_rate": 1.417910447761194e-05,
16
- "loss": 4.043815612792969,
17
- "step": 20
18
- },
19
- {
20
- "epoch": 0.4819277108433735,
21
- "grad_norm": 71.05435943603516,
22
- "learning_rate": 2.91044776119403e-05,
23
- "loss": 3.5163707733154297,
24
- "step": 40
25
- },
26
- {
27
- "epoch": 0.7228915662650602,
28
- "grad_norm": 60.80617141723633,
29
- "learning_rate": 4.402985074626866e-05,
30
- "loss": 3.0165132522583007,
31
- "step": 60
32
- },
33
- {
34
- "epoch": 0.963855421686747,
35
- "grad_norm": 64.55865478515625,
36
- "learning_rate": 4.9950171333287335e-05,
37
- "loss": 2.4458030700683593,
38
- "step": 80
39
- },
40
- {
41
- "epoch": 1.0,
42
- "eval_loss": 2.536519765853882,
43
- "eval_runtime": 6.8652,
44
- "eval_samples_per_second": 64.82,
45
- "eval_steps_per_second": 2.039,
46
- "eval_top1": 0.3325842696629214,
47
- "eval_top3": 0.5438202247191011,
48
- "step": 83
49
- },
50
- {
51
- "epoch": 1.2048192771084336,
52
- "grad_norm": 60.52080535888672,
53
- "learning_rate": 4.964638184169378e-05,
54
- "loss": 2.153371238708496,
55
- "step": 100
56
- },
57
- {
58
- "epoch": 1.4457831325301205,
59
- "grad_norm": 70.68125915527344,
60
- "learning_rate": 4.906984324751821e-05,
61
- "loss": 1.828770637512207,
62
- "step": 120
63
- },
64
- {
65
- "epoch": 1.6867469879518073,
66
- "grad_norm": 60.982425689697266,
67
- "learning_rate": 4.822693581319326e-05,
68
- "loss": 1.7083017349243164,
69
- "step": 140
70
- },
71
- {
72
- "epoch": 1.927710843373494,
73
- "grad_norm": 58.39655303955078,
74
- "learning_rate": 4.712698757103414e-05,
75
- "loss": 1.712203025817871,
76
- "step": 160
77
- },
78
- {
79
- "epoch": 2.0,
80
- "eval_loss": 1.7305916547775269,
81
- "eval_runtime": 7.5482,
82
- "eval_samples_per_second": 58.954,
83
- "eval_steps_per_second": 1.855,
84
- "eval_top1": 0.5415730337078651,
85
- "eval_top3": 0.7146067415730337,
86
- "step": 166
87
- },
88
- {
89
- "epoch": 2.1686746987951806,
90
- "grad_norm": 60.08979415893555,
91
- "learning_rate": 4.5782171094600005e-05,
92
- "loss": 1.3887232780456542,
93
- "step": 180
94
- },
95
- {
96
- "epoch": 2.4096385542168672,
97
- "grad_norm": 64.29576873779297,
98
- "learning_rate": 4.420736879094927e-05,
99
- "loss": 1.3437091827392578,
100
- "step": 200
101
- },
102
- {
103
- "epoch": 2.6506024096385543,
104
- "grad_norm": 62.950355529785156,
105
- "learning_rate": 4.242000820453141e-05,
106
- "loss": 1.139847469329834,
107
- "step": 220
108
- },
109
- {
110
- "epoch": 2.891566265060241,
111
- "grad_norm": 43.11043167114258,
112
- "learning_rate": 4.043986915532434e-05,
113
- "loss": 1.2316542625427247,
114
- "step": 240
115
- },
116
- {
117
- "epoch": 3.0,
118
- "eval_loss": 1.4789479970932007,
119
- "eval_runtime": 7.5421,
120
- "eval_samples_per_second": 59.002,
121
- "eval_steps_per_second": 1.856,
122
- "eval_top1": 0.6269662921348315,
123
- "eval_top3": 0.7842696629213484,
124
- "step": 249
125
- },
126
- {
127
- "epoch": 3.1325301204819276,
128
- "grad_norm": 66.58777618408203,
129
- "learning_rate": 3.828886484552254e-05,
130
- "loss": 1.033012580871582,
131
- "step": 260
132
- },
133
- {
134
- "epoch": 3.3734939759036147,
135
- "grad_norm": 53.87052536010742,
136
- "learning_rate": 3.5990799357157864e-05,
137
- "loss": 0.8578171730041504,
138
- "step": 280
139
- },
140
- {
141
- "epoch": 3.6144578313253013,
142
- "grad_norm": 72.20587921142578,
143
- "learning_rate": 3.357110422430506e-05,
144
- "loss": 0.8009697914123535,
145
- "step": 300
146
- },
147
- {
148
- "epoch": 3.855421686746988,
149
- "grad_norm": 47.637420654296875,
150
- "learning_rate": 3.105655699509458e-05,
151
- "loss": 0.8424944877624512,
152
- "step": 320
153
- },
154
- {
155
- "epoch": 4.0,
156
- "eval_loss": 1.3673882484436035,
157
- "eval_runtime": 8.6387,
158
- "eval_samples_per_second": 51.512,
159
- "eval_steps_per_second": 1.621,
160
- "eval_top1": 0.6202247191011236,
161
- "eval_top3": 0.8112359550561797,
162
- "step": 332
163
- },
164
- {
165
- "epoch": 4.096385542168675,
166
- "grad_norm": 44.70951461791992,
167
- "learning_rate": 2.8474984898065406e-05,
168
- "loss": 0.7181183815002441,
169
- "step": 340
170
- },
171
- {
172
- "epoch": 4.337349397590361,
173
- "grad_norm": 40.632354736328125,
174
- "learning_rate": 2.5854956892233006e-05,
175
- "loss": 0.6570661544799805,
176
- "step": 360
177
- },
178
- {
179
- "epoch": 4.578313253012048,
180
- "grad_norm": 41.45029067993164,
181
- "learning_rate": 2.32254675087996e-05,
182
- "loss": 0.5137750148773194,
183
- "step": 380
184
- },
185
- {
186
- "epoch": 4.8192771084337345,
187
- "grad_norm": 53.68215560913086,
188
- "learning_rate": 2.061561598327112e-05,
189
- "loss": 0.5405148029327392,
190
- "step": 400
191
- },
192
- {
193
- "epoch": 5.0,
194
- "eval_loss": 1.1706817150115967,
195
- "eval_runtime": 8.8354,
196
- "eval_samples_per_second": 50.365,
197
- "eval_steps_per_second": 1.585,
198
- "eval_top1": 0.6831460674157304,
199
- "eval_top3": 0.849438202247191,
200
- "step": 415
201
- },
202
- {
203
- "epoch": 5.0602409638554215,
204
- "grad_norm": 75.12175750732422,
205
- "learning_rate": 1.8054284228864366e-05,
206
- "loss": 0.49901180267333983,
207
- "step": 420
208
- },
209
- {
210
- "epoch": 5.301204819277109,
211
- "grad_norm": 18.631591796875,
212
- "learning_rate": 1.55698172149106e-05,
213
- "loss": 0.29165282249450686,
214
- "step": 440
215
- },
216
- {
217
- "epoch": 5.542168674698795,
218
- "grad_norm": 10.731932640075684,
219
- "learning_rate": 1.3189709287346813e-05,
220
- "loss": 0.3043174982070923,
221
- "step": 460
222
- },
223
- {
224
- "epoch": 5.783132530120482,
225
- "grad_norm": 47.11884689331055,
226
- "learning_rate": 1.0940299902627938e-05,
227
- "loss": 0.41364297866821287,
228
- "step": 480
229
- },
230
- {
231
- "epoch": 6.0,
232
- "eval_loss": 1.1097288131713867,
233
- "eval_runtime": 8.2234,
234
- "eval_samples_per_second": 54.114,
235
- "eval_steps_per_second": 1.702,
236
- "eval_top1": 0.7258426966292135,
237
- "eval_top3": 0.8674157303370786,
238
- "step": 498
239
- },
240
- {
241
- "epoch": 6.024096385542169,
242
- "grad_norm": 20.367740631103516,
243
- "learning_rate": 8.846482142219752e-06,
244
- "loss": 0.34232077598571775,
245
- "step": 500
246
- },
247
- {
248
- "epoch": 6.265060240963855,
249
- "grad_norm": 29.076051712036133,
250
- "learning_rate": 6.931427233395696e-06,
251
- "loss": 0.18118011951446533,
252
- "step": 520
253
- },
254
- {
255
- "epoch": 6.506024096385542,
256
- "grad_norm": 21.859106063842773,
257
- "learning_rate": 5.21632812492738e-06,
258
- "loss": 0.204935622215271,
259
- "step": 540
260
- },
261
- {
262
- "epoch": 6.746987951807229,
263
- "grad_norm": 24.722841262817383,
264
- "learning_rate": 3.7201649553876366e-06,
265
- "loss": 0.21130716800689697,
266
- "step": 560
267
- },
268
- {
269
- "epoch": 6.9879518072289155,
270
- "grad_norm": 27.248981475830078,
271
- "learning_rate": 2.4594950095101513e-06,
272
- "loss": 0.2202404260635376,
273
- "step": 580
274
- },
275
- {
276
- "epoch": 7.0,
277
- "eval_loss": 1.105672001838684,
278
- "eval_runtime": 8.1718,
279
- "eval_samples_per_second": 54.455,
280
- "eval_steps_per_second": 1.713,
281
- "eval_top1": 0.7325842696629213,
282
- "eval_top3": 0.8741573033707866,
283
- "step": 581
284
- }
285
- ],
286
- "logging_steps": 20,
287
- "max_steps": 664,
288
- "num_input_tokens_seen": 0,
289
- "num_train_epochs": 8,
290
- "save_steps": 500,
291
- "stateful_callbacks": {
292
- "TrainerControl": {
293
- "args": {
294
- "should_epoch_stop": false,
295
- "should_evaluate": false,
296
- "should_log": false,
297
- "should_save": true,
298
- "should_training_stop": false
299
- },
300
- "attributes": {}
301
- }
302
- },
303
- "total_flos": 4.8058967165239296e+17,
304
- "train_batch_size": 32,
305
- "trial_name": null,
306
- "trial_params": null
307
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-581/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6afabf8f636fefd143058269d2e6c5383477744d5a9e8c2dc666c57c59670119
3
- size 5265