johnomeara commited on
Commit
b7936e5
·
verified ·
1 Parent(s): 92ddf35

Upload folder using huggingface_hub

Browse files
Files changed (7) hide show
  1. config.json +2 -2
  2. model.safetensors +1 -1
  3. optimizer.pt +2 -2
  4. rng_state.pth +2 -2
  5. scheduler.pt +2 -2
  6. trainer_state.json +252 -728
  7. training_args.bin +2 -2
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "facebook/dinov2-large",
3
  "apply_layernorm": true,
4
  "architectures": [
5
  "Dinov2ForImageClassification"
@@ -64,6 +63,7 @@
64
  "stage24"
65
  ],
66
  "torch_dtype": "float32",
67
- "transformers_version": "4.47.1",
 
68
  "use_swiglu_ffn": false
69
  }
 
1
  {
 
2
  "apply_layernorm": true,
3
  "architectures": [
4
  "Dinov2ForImageClassification"
 
63
  "stage24"
64
  ],
65
  "torch_dtype": "float32",
66
+ "transformers_version": "4.55.0",
67
+ "use_mask_token": true,
68
  "use_swiglu_ffn": false
69
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4db45e1375a111330f445f36615ec59a496219282a40ba73885298d28e14472
3
  size 1217542512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0008d0b179f0af6309b99b8c6fabf134a11ed6bdd66ded13444541a4a3e6fbcb
3
  size 1217542512
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d60761ac8c8b5a161ea9b8f512bb1924d67f201c99d9adca3a338bf88aac23f
3
- size 2435341946
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48abcb40e932df8c553badeafb3b0a18b0f7d781ab7f8395c3cf20c85c189a66
3
+ size 2435342411
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:660ccec1688742a08ac50147863109a12b04472562e401ba83f158155084b971
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4395a530497f268c762a08cd4fcee96c37463e64496c3f6e4a0c83bb1f5337f6
3
+ size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fc364201fbceac17ee28f0c25f6cd2003904f59d3a8ddb2469ba7bfdd346578
3
- size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2e6312b4ddd707deada4d923ee63434e3e31f0333703f16ac94f13036d755de
3
+ size 1465
trainer_state.json CHANGED
@@ -1,858 +1,382 @@
1
  {
2
- "best_metric": 0.9417692129092176,
3
- "best_model_checkpoint": "Crosswalk/dinov2/checkpoint-924",
4
- "epoch": 22.0,
 
5
  "eval_steps": 500,
6
- "global_step": 924,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.24242424242424243,
13
- "grad_norm": 1809.6781005859375,
14
- "learning_rate": 9.70873786407767e-07,
15
- "loss": 4.7087,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.48484848484848486,
20
- "grad_norm": 190.5601806640625,
21
- "learning_rate": 1.941747572815534e-06,
22
- "loss": 3.034,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.7272727272727273,
27
- "grad_norm": 76.29146575927734,
28
- "learning_rate": 2.912621359223301e-06,
29
- "loss": 2.0024,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 0.9696969696969697,
34
- "grad_norm": 75.1009750366211,
35
- "learning_rate": 3.883495145631068e-06,
36
- "loss": 1.4019,
 
 
 
 
 
 
 
 
 
37
  "step": 40
38
  },
39
  {
40
- "epoch": 1.0,
41
- "eval_loss": 0.2978227138519287,
42
- "eval_macro_f1": 0.8815037150933147,
43
- "eval_runtime": 7.5247,
44
- "eval_samples_per_second": 43.856,
45
- "eval_steps_per_second": 5.582,
46
- "step": 42
47
- },
48
- {
49
- "epoch": 1.1939393939393939,
50
- "grad_norm": 86.90376281738281,
51
- "learning_rate": 4.854368932038836e-06,
52
- "loss": 0.7179,
53
  "step": 50
54
  },
55
  {
56
- "epoch": 1.4363636363636363,
57
- "grad_norm": 118.144287109375,
58
- "learning_rate": 5.825242718446602e-06,
59
- "loss": 0.9737,
60
  "step": 60
61
  },
62
  {
63
- "epoch": 1.6787878787878787,
64
- "grad_norm": 174.987548828125,
65
- "learning_rate": 6.79611650485437e-06,
66
- "loss": 1.3976,
67
  "step": 70
68
  },
69
  {
70
- "epoch": 1.9212121212121214,
71
- "grad_norm": 35.86643981933594,
72
- "learning_rate": 7.766990291262136e-06,
73
- "loss": 0.967,
 
 
 
 
 
 
 
 
 
74
  "step": 80
75
  },
76
  {
77
- "epoch": 2.0,
78
- "eval_loss": 0.22084859013557434,
79
- "eval_macro_f1": 0.9229339286881953,
80
- "eval_runtime": 5.8751,
81
- "eval_samples_per_second": 56.169,
82
- "eval_steps_per_second": 7.149,
83
- "step": 84
84
- },
85
- {
86
- "epoch": 2.1454545454545455,
87
- "grad_norm": 232.57371520996094,
88
- "learning_rate": 8.737864077669904e-06,
89
- "loss": 1.323,
90
  "step": 90
91
  },
92
  {
93
- "epoch": 2.3878787878787877,
94
- "grad_norm": 72.19599914550781,
95
- "learning_rate": 9.708737864077671e-06,
96
- "loss": 1.005,
97
  "step": 100
98
  },
99
  {
100
- "epoch": 2.6303030303030304,
101
- "grad_norm": 55.63515853881836,
102
- "learning_rate": 9.924078091106291e-06,
103
- "loss": 0.6989,
 
 
 
 
 
 
 
 
 
104
  "step": 110
105
  },
106
  {
107
- "epoch": 2.8727272727272726,
108
- "grad_norm": 56.631591796875,
109
- "learning_rate": 9.815618221258135e-06,
110
- "loss": 0.7527,
111
  "step": 120
112
  },
113
  {
114
- "epoch": 3.0,
115
- "eval_loss": 0.3025018870830536,
116
- "eval_macro_f1": 0.9009343690194753,
117
- "eval_runtime": 6.0811,
118
- "eval_samples_per_second": 54.266,
119
- "eval_steps_per_second": 6.907,
120
- "step": 126
121
- },
122
- {
123
- "epoch": 3.096969696969697,
124
- "grad_norm": 40.3350715637207,
125
- "learning_rate": 9.70715835140998e-06,
126
- "loss": 0.6221,
127
  "step": 130
128
  },
129
  {
130
- "epoch": 3.3393939393939394,
131
- "grad_norm": 95.85454559326172,
132
- "learning_rate": 9.598698481561823e-06,
133
- "loss": 0.9071,
 
 
 
 
 
 
 
 
 
134
  "step": 140
135
  },
136
  {
137
- "epoch": 3.581818181818182,
138
- "grad_norm": 94.18701934814453,
139
- "learning_rate": 9.490238611713667e-06,
140
- "loss": 0.7042,
141
  "step": 150
142
  },
143
  {
144
- "epoch": 3.824242424242424,
145
- "grad_norm": 477.8069763183594,
146
- "learning_rate": 9.38177874186551e-06,
147
- "loss": 0.635,
148
  "step": 160
149
  },
150
  {
151
- "epoch": 4.0,
152
- "eval_loss": 0.22365985810756683,
153
- "eval_macro_f1": 0.9049918736939866,
154
- "eval_runtime": 5.9294,
155
- "eval_samples_per_second": 55.655,
156
- "eval_steps_per_second": 7.083,
157
- "step": 168
158
- },
159
- {
160
- "epoch": 4.048484848484849,
161
- "grad_norm": 110.27919006347656,
162
- "learning_rate": 9.273318872017354e-06,
163
- "loss": 1.1381,
164
  "step": 170
165
  },
166
  {
167
- "epoch": 4.290909090909091,
168
- "grad_norm": 58.735958099365234,
169
- "learning_rate": 9.1648590021692e-06,
170
- "loss": 0.7225,
 
 
 
 
 
 
 
 
 
171
  "step": 180
172
  },
173
  {
174
- "epoch": 4.533333333333333,
175
- "grad_norm": 75.20926666259766,
176
- "learning_rate": 9.056399132321042e-06,
177
- "loss": 0.4634,
178
  "step": 190
179
  },
180
  {
181
- "epoch": 4.775757575757575,
182
- "grad_norm": 18.876136779785156,
183
- "learning_rate": 8.947939262472886e-06,
184
- "loss": 0.6293,
185
  "step": 200
186
  },
187
  {
188
- "epoch": 5.0,
189
- "grad_norm": 8.281109809875488,
190
- "learning_rate": 8.83947939262473e-06,
191
- "loss": 0.6632,
192
  "step": 210
193
  },
194
  {
195
- "epoch": 5.0,
196
- "eval_loss": 0.2299780696630478,
197
- "eval_macro_f1": 0.9176304185040354,
198
- "eval_runtime": 6.0281,
199
- "eval_samples_per_second": 54.744,
200
- "eval_steps_per_second": 6.967,
201
  "step": 210
202
  },
203
  {
204
- "epoch": 5.242424242424242,
205
- "grad_norm": 13.756321907043457,
206
- "learning_rate": 8.731019522776574e-06,
207
- "loss": 0.4708,
208
  "step": 220
209
  },
210
  {
211
- "epoch": 5.484848484848484,
212
- "grad_norm": 59.22605895996094,
213
- "learning_rate": 8.622559652928418e-06,
214
- "loss": 0.7127,
215
  "step": 230
216
  },
217
  {
218
- "epoch": 5.7272727272727275,
219
- "grad_norm": 32.43043899536133,
220
- "learning_rate": 8.514099783080262e-06,
221
- "loss": 0.5682,
222
  "step": 240
223
  },
224
  {
225
- "epoch": 5.96969696969697,
226
- "grad_norm": 54.722599029541016,
227
- "learning_rate": 8.405639913232104e-06,
228
- "loss": 0.8667,
 
 
 
 
 
 
 
 
 
229
  "step": 250
230
  },
231
  {
232
- "epoch": 6.0,
233
- "eval_loss": 0.2767850160598755,
234
- "eval_macro_f1": 0.9210700618192522,
235
- "eval_runtime": 6.2004,
236
- "eval_samples_per_second": 53.223,
237
- "eval_steps_per_second": 6.774,
238
- "step": 252
239
- },
240
- {
241
- "epoch": 6.193939393939394,
242
- "grad_norm": 18.175823211669922,
243
- "learning_rate": 8.29718004338395e-06,
244
- "loss": 0.6752,
245
  "step": 260
246
  },
247
  {
248
- "epoch": 6.4363636363636365,
249
- "grad_norm": 56.679988861083984,
250
- "learning_rate": 8.188720173535792e-06,
251
- "loss": 0.3727,
252
  "step": 270
253
  },
254
  {
255
- "epoch": 6.678787878787879,
256
- "grad_norm": 57.3917236328125,
257
- "learning_rate": 8.080260303687636e-06,
258
- "loss": 1.0167,
 
 
 
 
 
 
 
 
 
259
  "step": 280
260
  },
261
  {
262
- "epoch": 6.921212121212121,
263
- "grad_norm": 42.186038970947266,
264
- "learning_rate": 7.97180043383948e-06,
265
- "loss": 0.9377,
266
  "step": 290
267
  },
268
  {
269
- "epoch": 7.0,
270
- "eval_loss": 0.29274508357048035,
271
- "eval_macro_f1": 0.9138863000931967,
272
- "eval_runtime": 6.1213,
273
- "eval_samples_per_second": 53.91,
274
- "eval_steps_per_second": 6.861,
275
- "step": 294
276
- },
277
- {
278
- "epoch": 7.1454545454545455,
279
- "grad_norm": 41.31782531738281,
280
- "learning_rate": 7.863340563991324e-06,
281
- "loss": 0.3818,
282
  "step": 300
283
  },
284
  {
285
- "epoch": 7.387878787878788,
286
- "grad_norm": 4.223178863525391,
287
- "learning_rate": 7.754880694143168e-06,
288
- "loss": 0.4503,
289
  "step": 310
290
  },
291
  {
292
- "epoch": 7.63030303030303,
293
- "grad_norm": 35.64258575439453,
294
- "learning_rate": 7.646420824295012e-06,
295
- "loss": 0.6038,
 
 
 
 
 
 
 
 
 
296
  "step": 320
297
  },
298
  {
299
- "epoch": 7.872727272727273,
300
- "grad_norm": 37.91206359863281,
301
- "learning_rate": 7.537960954446856e-06,
302
- "loss": 0.5407,
303
  "step": 330
304
  },
305
  {
306
- "epoch": 8.0,
307
- "eval_loss": 0.20143219828605652,
308
- "eval_macro_f1": 0.9357970705676355,
309
- "eval_runtime": 5.9715,
310
- "eval_samples_per_second": 55.263,
311
- "eval_steps_per_second": 7.033,
312
- "step": 336
313
- },
314
- {
315
- "epoch": 8.096969696969698,
316
- "grad_norm": 9.571391105651855,
317
- "learning_rate": 7.429501084598699e-06,
318
- "loss": 0.3311,
319
  "step": 340
320
  },
321
- {
322
- "epoch": 8.33939393939394,
323
- "grad_norm": 30.14655876159668,
324
- "learning_rate": 7.321041214750543e-06,
325
- "loss": 0.5367,
326
- "step": 350
327
- },
328
- {
329
- "epoch": 8.581818181818182,
330
- "grad_norm": 125.38350677490234,
331
- "learning_rate": 7.212581344902386e-06,
332
- "loss": 0.4511,
333
- "step": 360
334
- },
335
- {
336
- "epoch": 8.824242424242424,
337
- "grad_norm": 283.20819091796875,
338
- "learning_rate": 7.104121475054231e-06,
339
- "loss": 0.5474,
340
- "step": 370
341
- },
342
- {
343
- "epoch": 9.0,
344
- "eval_loss": 0.329227477312088,
345
- "eval_macro_f1": 0.8817302125547928,
346
- "eval_runtime": 5.984,
347
- "eval_samples_per_second": 55.147,
348
- "eval_steps_per_second": 7.019,
349
- "step": 378
350
- },
351
- {
352
- "epoch": 9.048484848484849,
353
- "grad_norm": 31.927379608154297,
354
- "learning_rate": 6.995661605206075e-06,
355
- "loss": 0.3963,
356
- "step": 380
357
- },
358
- {
359
- "epoch": 9.290909090909091,
360
- "grad_norm": 10.10098934173584,
361
- "learning_rate": 6.887201735357918e-06,
362
- "loss": 0.445,
363
- "step": 390
364
- },
365
- {
366
- "epoch": 9.533333333333333,
367
- "grad_norm": 1.947770118713379,
368
- "learning_rate": 6.778741865509761e-06,
369
- "loss": 0.5947,
370
- "step": 400
371
- },
372
- {
373
- "epoch": 9.775757575757575,
374
- "grad_norm": 54.11802673339844,
375
- "learning_rate": 6.670281995661606e-06,
376
- "loss": 0.6001,
377
- "step": 410
378
- },
379
  {
380
  "epoch": 10.0,
381
- "grad_norm": 0.004518165718764067,
382
- "learning_rate": 6.56182212581345e-06,
383
- "loss": 0.412,
384
- "step": 420
385
  },
386
  {
387
  "epoch": 10.0,
388
- "eval_loss": 0.3594599962234497,
389
- "eval_macro_f1": 0.907735321528425,
390
- "eval_runtime": 6.0125,
391
- "eval_samples_per_second": 54.885,
392
- "eval_steps_per_second": 6.985,
393
- "step": 420
394
- },
395
- {
396
- "epoch": 10.242424242424242,
397
- "grad_norm": 0.22337216138839722,
398
- "learning_rate": 6.453362255965293e-06,
399
- "loss": 0.2798,
400
- "step": 430
401
  },
402
  {
403
- "epoch": 10.484848484848484,
404
- "grad_norm": 39.639984130859375,
405
- "learning_rate": 6.344902386117138e-06,
406
- "loss": 0.4377,
407
- "step": 440
408
  },
409
  {
410
- "epoch": 10.727272727272727,
411
- "grad_norm": 53.85198211669922,
412
- "learning_rate": 6.236442516268981e-06,
413
- "loss": 0.2063,
414
- "step": 450
415
  },
416
  {
417
- "epoch": 10.969696969696969,
418
- "grad_norm": 69.08942413330078,
419
- "learning_rate": 6.127982646420825e-06,
420
- "loss": 0.2884,
421
- "step": 460
422
  },
423
  {
424
  "epoch": 11.0,
425
- "eval_loss": 0.2930862307548523,
426
- "eval_macro_f1": 0.9380839806371721,
427
- "eval_runtime": 6.0147,
428
- "eval_samples_per_second": 54.866,
429
- "eval_steps_per_second": 6.983,
430
- "step": 462
431
- },
432
- {
433
- "epoch": 11.193939393939393,
434
- "grad_norm": 63.143218994140625,
435
- "learning_rate": 6.019522776572668e-06,
436
- "loss": 0.6075,
437
- "step": 470
438
- },
439
- {
440
- "epoch": 11.436363636363636,
441
- "grad_norm": 7.950187683105469,
442
- "learning_rate": 5.911062906724513e-06,
443
- "loss": 0.2654,
444
- "step": 480
445
- },
446
- {
447
- "epoch": 11.67878787878788,
448
- "grad_norm": 82.30758666992188,
449
- "learning_rate": 5.802603036876356e-06,
450
- "loss": 0.2474,
451
- "step": 490
452
- },
453
- {
454
- "epoch": 11.921212121212122,
455
- "grad_norm": 7.340689182281494,
456
- "learning_rate": 5.6941431670282e-06,
457
- "loss": 0.2405,
458
- "step": 500
459
- },
460
- {
461
- "epoch": 12.0,
462
- "eval_loss": 0.3316686451435089,
463
- "eval_macro_f1": 0.9209216589861751,
464
- "eval_runtime": 5.8916,
465
- "eval_samples_per_second": 56.012,
466
- "eval_steps_per_second": 7.129,
467
- "step": 504
468
- },
469
- {
470
- "epoch": 12.145454545454545,
471
- "grad_norm": 37.141632080078125,
472
- "learning_rate": 5.585683297180043e-06,
473
- "loss": 0.3349,
474
- "step": 510
475
- },
476
- {
477
- "epoch": 12.387878787878789,
478
- "grad_norm": 34.024383544921875,
479
- "learning_rate": 5.477223427331888e-06,
480
- "loss": 0.1742,
481
- "step": 520
482
- },
483
- {
484
- "epoch": 12.63030303030303,
485
- "grad_norm": 46.10781478881836,
486
- "learning_rate": 5.368763557483731e-06,
487
- "loss": 0.2115,
488
- "step": 530
489
- },
490
- {
491
- "epoch": 12.872727272727273,
492
- "grad_norm": 126.67015838623047,
493
- "learning_rate": 5.260303687635575e-06,
494
- "loss": 0.8788,
495
- "step": 540
496
- },
497
- {
498
- "epoch": 13.0,
499
- "eval_loss": 0.37741926312446594,
500
- "eval_macro_f1": 0.9058106453305834,
501
- "eval_runtime": 6.6329,
502
- "eval_samples_per_second": 49.752,
503
- "eval_steps_per_second": 6.332,
504
- "step": 546
505
- },
506
- {
507
- "epoch": 13.096969696969698,
508
- "grad_norm": 35.44662094116211,
509
- "learning_rate": 5.151843817787418e-06,
510
- "loss": 0.5591,
511
- "step": 550
512
- },
513
- {
514
- "epoch": 13.33939393939394,
515
- "grad_norm": 57.34544372558594,
516
- "learning_rate": 5.043383947939263e-06,
517
- "loss": 0.213,
518
- "step": 560
519
- },
520
- {
521
- "epoch": 13.581818181818182,
522
- "grad_norm": 15.285223960876465,
523
- "learning_rate": 4.934924078091107e-06,
524
- "loss": 0.203,
525
- "step": 570
526
- },
527
- {
528
- "epoch": 13.824242424242424,
529
- "grad_norm": 28.99003028869629,
530
- "learning_rate": 4.82646420824295e-06,
531
- "loss": 0.4163,
532
- "step": 580
533
- },
534
- {
535
- "epoch": 14.0,
536
- "eval_loss": 0.39865490794181824,
537
- "eval_macro_f1": 0.9196508840275697,
538
- "eval_runtime": 5.8701,
539
- "eval_samples_per_second": 56.217,
540
- "eval_steps_per_second": 7.155,
541
- "step": 588
542
- },
543
- {
544
- "epoch": 14.048484848484849,
545
- "grad_norm": 346.8255310058594,
546
- "learning_rate": 4.718004338394794e-06,
547
- "loss": 0.26,
548
- "step": 590
549
- },
550
- {
551
- "epoch": 14.290909090909091,
552
- "grad_norm": 38.04654312133789,
553
- "learning_rate": 4.609544468546638e-06,
554
- "loss": 0.3813,
555
- "step": 600
556
- },
557
- {
558
- "epoch": 14.533333333333333,
559
- "grad_norm": 34.71643829345703,
560
- "learning_rate": 4.501084598698482e-06,
561
- "loss": 0.0974,
562
- "step": 610
563
- },
564
- {
565
- "epoch": 14.775757575757575,
566
- "grad_norm": 34.031890869140625,
567
- "learning_rate": 4.392624728850326e-06,
568
- "loss": 0.4881,
569
- "step": 620
570
- },
571
- {
572
- "epoch": 15.0,
573
- "grad_norm": 0.0002771662548184395,
574
- "learning_rate": 4.284164859002169e-06,
575
- "loss": 0.4126,
576
- "step": 630
577
- },
578
- {
579
- "epoch": 15.0,
580
- "eval_loss": 0.35451531410217285,
581
- "eval_macro_f1": 0.9235679411519468,
582
- "eval_runtime": 6.0428,
583
- "eval_samples_per_second": 54.611,
584
- "eval_steps_per_second": 6.95,
585
- "step": 630
586
- },
587
- {
588
- "epoch": 15.242424242424242,
589
- "grad_norm": 82.48748779296875,
590
- "learning_rate": 4.175704989154013e-06,
591
- "loss": 0.4444,
592
- "step": 640
593
- },
594
- {
595
- "epoch": 15.484848484848484,
596
- "grad_norm": 0.2618753910064697,
597
- "learning_rate": 4.067245119305857e-06,
598
- "loss": 0.2083,
599
- "step": 650
600
- },
601
- {
602
- "epoch": 15.727272727272727,
603
- "grad_norm": 98.34405517578125,
604
- "learning_rate": 3.958785249457701e-06,
605
- "loss": 0.4785,
606
- "step": 660
607
- },
608
- {
609
- "epoch": 15.969696969696969,
610
- "grad_norm": 0.37142229080200195,
611
- "learning_rate": 3.8503253796095445e-06,
612
- "loss": 0.1583,
613
- "step": 670
614
- },
615
- {
616
- "epoch": 16.0,
617
- "eval_loss": 0.38117873668670654,
618
- "eval_macro_f1": 0.9268860086407444,
619
- "eval_runtime": 6.9311,
620
- "eval_samples_per_second": 47.612,
621
- "eval_steps_per_second": 6.06,
622
- "step": 672
623
- },
624
- {
625
- "epoch": 16.193939393939395,
626
- "grad_norm": 40.54330825805664,
627
- "learning_rate": 3.741865509761389e-06,
628
- "loss": 0.0774,
629
- "step": 680
630
- },
631
- {
632
- "epoch": 16.436363636363637,
633
- "grad_norm": 0.23675695061683655,
634
- "learning_rate": 3.6334056399132324e-06,
635
- "loss": 0.1639,
636
- "step": 690
637
- },
638
- {
639
- "epoch": 16.67878787878788,
640
- "grad_norm": 47.12529373168945,
641
- "learning_rate": 3.5249457700650764e-06,
642
- "loss": 0.306,
643
- "step": 700
644
- },
645
- {
646
- "epoch": 16.921212121212122,
647
- "grad_norm": 0.3993530571460724,
648
- "learning_rate": 3.41648590021692e-06,
649
- "loss": 0.2376,
650
- "step": 710
651
- },
652
- {
653
- "epoch": 17.0,
654
- "eval_loss": 0.4087267816066742,
655
- "eval_macro_f1": 0.9295990205081115,
656
- "eval_runtime": 6.1306,
657
- "eval_samples_per_second": 53.828,
658
- "eval_steps_per_second": 6.851,
659
- "step": 714
660
- },
661
- {
662
- "epoch": 17.145454545454545,
663
- "grad_norm": 0.34205177426338196,
664
- "learning_rate": 3.308026030368764e-06,
665
- "loss": 0.0332,
666
- "step": 720
667
- },
668
- {
669
- "epoch": 17.387878787878787,
670
- "grad_norm": 0.5112647414207458,
671
- "learning_rate": 3.1995661605206075e-06,
672
- "loss": 0.1332,
673
- "step": 730
674
- },
675
- {
676
- "epoch": 17.63030303030303,
677
- "grad_norm": 120.2950439453125,
678
- "learning_rate": 3.0911062906724515e-06,
679
- "loss": 0.2503,
680
- "step": 740
681
- },
682
- {
683
- "epoch": 17.87272727272727,
684
- "grad_norm": 223.04759216308594,
685
- "learning_rate": 2.982646420824295e-06,
686
- "loss": 0.2703,
687
- "step": 750
688
- },
689
- {
690
- "epoch": 18.0,
691
- "eval_loss": 0.43362897634506226,
692
- "eval_macro_f1": 0.9264924264924266,
693
- "eval_runtime": 5.861,
694
- "eval_samples_per_second": 56.305,
695
- "eval_steps_per_second": 7.166,
696
- "step": 756
697
- },
698
- {
699
- "epoch": 18.096969696969698,
700
- "grad_norm": 54.66193771362305,
701
- "learning_rate": 2.874186550976139e-06,
702
- "loss": 0.1274,
703
- "step": 760
704
- },
705
- {
706
- "epoch": 18.33939393939394,
707
- "grad_norm": 54.846466064453125,
708
- "learning_rate": 2.765726681127983e-06,
709
- "loss": 0.2751,
710
- "step": 770
711
- },
712
- {
713
- "epoch": 18.581818181818182,
714
- "grad_norm": 53.97863006591797,
715
- "learning_rate": 2.6572668112798266e-06,
716
- "loss": 0.359,
717
- "step": 780
718
- },
719
- {
720
- "epoch": 18.824242424242424,
721
- "grad_norm": 81.63549041748047,
722
- "learning_rate": 2.5488069414316706e-06,
723
- "loss": 0.1819,
724
- "step": 790
725
- },
726
- {
727
- "epoch": 19.0,
728
- "eval_loss": 0.3480012118816376,
729
- "eval_macro_f1": 0.9236528192931639,
730
- "eval_runtime": 7.0471,
731
- "eval_samples_per_second": 46.828,
732
- "eval_steps_per_second": 5.96,
733
- "step": 798
734
- },
735
- {
736
- "epoch": 19.048484848484847,
737
- "grad_norm": 41.54087448120117,
738
- "learning_rate": 2.440347071583514e-06,
739
- "loss": 0.6373,
740
- "step": 800
741
- },
742
- {
743
- "epoch": 19.29090909090909,
744
- "grad_norm": 9.001028060913086,
745
- "learning_rate": 2.331887201735358e-06,
746
- "loss": 0.2971,
747
- "step": 810
748
- },
749
- {
750
- "epoch": 19.533333333333335,
751
- "grad_norm": 114.6279525756836,
752
- "learning_rate": 2.2234273318872017e-06,
753
- "loss": 0.1943,
754
- "step": 820
755
- },
756
- {
757
- "epoch": 19.775757575757577,
758
- "grad_norm": 18.022676467895508,
759
- "learning_rate": 2.1149674620390457e-06,
760
- "loss": 0.1207,
761
- "step": 830
762
- },
763
- {
764
- "epoch": 20.0,
765
- "grad_norm": 0.0001882202341221273,
766
- "learning_rate": 2.0065075921908892e-06,
767
- "loss": 0.1324,
768
- "step": 840
769
- },
770
- {
771
- "epoch": 20.0,
772
- "eval_loss": 0.4493299424648285,
773
- "eval_macro_f1": 0.9384902143522833,
774
- "eval_runtime": 6.0147,
775
- "eval_samples_per_second": 54.865,
776
- "eval_steps_per_second": 6.983,
777
- "step": 840
778
- },
779
- {
780
- "epoch": 20.242424242424242,
781
- "grad_norm": 13.740226745605469,
782
- "learning_rate": 1.8980477223427332e-06,
783
- "loss": 0.294,
784
- "step": 850
785
- },
786
- {
787
- "epoch": 20.484848484848484,
788
- "grad_norm": 6.870513916015625,
789
- "learning_rate": 1.7895878524945772e-06,
790
- "loss": 0.1323,
791
- "step": 860
792
- },
793
- {
794
- "epoch": 20.727272727272727,
795
- "grad_norm": 2.74729585647583,
796
- "learning_rate": 1.681127982646421e-06,
797
- "loss": 0.019,
798
- "step": 870
799
- },
800
- {
801
- "epoch": 20.96969696969697,
802
- "grad_norm": 17.18338966369629,
803
- "learning_rate": 1.572668112798265e-06,
804
- "loss": 0.1312,
805
- "step": 880
806
- },
807
- {
808
- "epoch": 21.0,
809
- "eval_loss": 0.40448498725891113,
810
- "eval_macro_f1": 0.9384902143522833,
811
- "eval_runtime": 6.133,
812
- "eval_samples_per_second": 53.808,
813
- "eval_steps_per_second": 6.848,
814
- "step": 882
815
- },
816
- {
817
- "epoch": 21.193939393939395,
818
- "grad_norm": 108.9833755493164,
819
- "learning_rate": 1.4642082429501087e-06,
820
- "loss": 0.2499,
821
- "step": 890
822
- },
823
- {
824
- "epoch": 21.436363636363637,
825
- "grad_norm": 0.291847825050354,
826
- "learning_rate": 1.3557483731019525e-06,
827
- "loss": 0.1708,
828
- "step": 900
829
- },
830
- {
831
- "epoch": 21.67878787878788,
832
- "grad_norm": 35.08168029785156,
833
- "learning_rate": 1.2472885032537963e-06,
834
- "loss": 0.0802,
835
- "step": 910
836
- },
837
- {
838
- "epoch": 21.921212121212122,
839
- "grad_norm": 0.05401836335659027,
840
- "learning_rate": 1.13882863340564e-06,
841
- "loss": 0.1662,
842
- "step": 920
843
- },
844
- {
845
- "epoch": 22.0,
846
- "eval_loss": 0.3166828453540802,
847
- "eval_macro_f1": 0.9417692129092176,
848
- "eval_runtime": 6.0442,
849
- "eval_samples_per_second": 54.598,
850
- "eval_steps_per_second": 6.949,
851
- "step": 924
852
  }
853
  ],
854
  "logging_steps": 10,
855
- "max_steps": 1025,
856
  "num_input_tokens_seen": 0,
857
  "num_train_epochs": 25,
858
  "save_steps": 500,
@@ -868,7 +392,7 @@
868
  "attributes": {}
869
  }
870
  },
871
- "total_flos": 1.0371596050603966e+19,
872
  "train_batch_size": 8,
873
  "trial_name": null,
874
  "trial_params": null
 
1
  {
2
+ "best_global_step": 385,
3
+ "best_metric": 0.9489664082687339,
4
+ "best_model_checkpoint": "Crosswalk/dinov2/checkpoint-385",
5
+ "epoch": 11.0,
6
  "eval_steps": 500,
7
+ "global_step": 385,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.28776978417266186,
14
+ "grad_norm": 25.290170669555664,
15
+ "learning_rate": 1.0227272727272729e-06,
16
+ "loss": 0.7613,
17
  "step": 10
18
  },
19
  {
20
+ "epoch": 0.5755395683453237,
21
+ "grad_norm": 19.748838424682617,
22
+ "learning_rate": 2.1590909090909092e-06,
23
+ "loss": 0.4653,
24
  "step": 20
25
  },
26
  {
27
+ "epoch": 0.8633093525179856,
28
+ "grad_norm": 642.5625,
29
+ "learning_rate": 3.2954545454545456e-06,
30
+ "loss": 0.3536,
31
  "step": 30
32
  },
33
  {
34
+ "epoch": 1.0,
35
+ "eval_loss": 0.3255312740802765,
36
+ "eval_macro_f1": 0.8774975492380359,
37
+ "eval_runtime": 26.9331,
38
+ "eval_samples_per_second": 8.8,
39
+ "eval_steps_per_second": 1.114,
40
+ "step": 35
41
+ },
42
+ {
43
+ "epoch": 1.143884892086331,
44
+ "grad_norm": 18.897483825683594,
45
+ "learning_rate": 4.4318181818181824e-06,
46
+ "loss": 0.3086,
47
  "step": 40
48
  },
49
  {
50
+ "epoch": 1.4316546762589928,
51
+ "grad_norm": 7.919783115386963,
52
+ "learning_rate": 5.568181818181818e-06,
53
+ "loss": 0.1994,
 
 
 
 
 
 
 
 
 
54
  "step": 50
55
  },
56
  {
57
+ "epoch": 1.7194244604316546,
58
+ "grad_norm": 16.423315048217773,
59
+ "learning_rate": 6.704545454545454e-06,
60
+ "loss": 0.2462,
61
  "step": 60
62
  },
63
  {
64
+ "epoch": 2.0,
65
+ "grad_norm": 71.16481018066406,
66
+ "learning_rate": 7.840909090909091e-06,
67
+ "loss": 0.2856,
68
  "step": 70
69
  },
70
  {
71
+ "epoch": 2.0,
72
+ "eval_loss": 0.30485469102859497,
73
+ "eval_macro_f1": 0.8690491043579004,
74
+ "eval_runtime": 29.715,
75
+ "eval_samples_per_second": 7.976,
76
+ "eval_steps_per_second": 1.01,
77
+ "step": 70
78
+ },
79
+ {
80
+ "epoch": 2.287769784172662,
81
+ "grad_norm": 30.668006896972656,
82
+ "learning_rate": 8.977272727272727e-06,
83
+ "loss": 0.1576,
84
  "step": 80
85
  },
86
  {
87
+ "epoch": 2.5755395683453237,
88
+ "grad_norm": 7.689505100250244,
89
+ "learning_rate": 9.987293519695045e-06,
90
+ "loss": 0.212,
 
 
 
 
 
 
 
 
 
91
  "step": 90
92
  },
93
  {
94
+ "epoch": 2.8633093525179856,
95
+ "grad_norm": 30.23891830444336,
96
+ "learning_rate": 9.86022871664549e-06,
97
+ "loss": 0.3253,
98
  "step": 100
99
  },
100
  {
101
+ "epoch": 3.0,
102
+ "eval_loss": 0.2832421362400055,
103
+ "eval_macro_f1": 0.892023399486086,
104
+ "eval_runtime": 29.6551,
105
+ "eval_samples_per_second": 7.992,
106
+ "eval_steps_per_second": 1.012,
107
+ "step": 105
108
+ },
109
+ {
110
+ "epoch": 3.143884892086331,
111
+ "grad_norm": 18.362842559814453,
112
+ "learning_rate": 9.733163913595934e-06,
113
+ "loss": 0.2215,
114
  "step": 110
115
  },
116
  {
117
+ "epoch": 3.431654676258993,
118
+ "grad_norm": 44.57633590698242,
119
+ "learning_rate": 9.60609911054638e-06,
120
+ "loss": 0.1955,
121
  "step": 120
122
  },
123
  {
124
+ "epoch": 3.7194244604316546,
125
+ "grad_norm": 10.040678024291992,
126
+ "learning_rate": 9.479034307496824e-06,
127
+ "loss": 0.2095,
 
 
 
 
 
 
 
 
 
128
  "step": 130
129
  },
130
  {
131
+ "epoch": 4.0,
132
+ "grad_norm": 14.543230056762695,
133
+ "learning_rate": 9.35196950444727e-06,
134
+ "loss": 0.2134,
135
+ "step": 140
136
+ },
137
+ {
138
+ "epoch": 4.0,
139
+ "eval_loss": 0.28937143087387085,
140
+ "eval_macro_f1": 0.9314632735685368,
141
+ "eval_runtime": 30.5498,
142
+ "eval_samples_per_second": 7.758,
143
+ "eval_steps_per_second": 0.982,
144
  "step": 140
145
  },
146
  {
147
+ "epoch": 4.287769784172662,
148
+ "grad_norm": 41.635799407958984,
149
+ "learning_rate": 9.224904701397714e-06,
150
+ "loss": 0.0965,
151
  "step": 150
152
  },
153
  {
154
+ "epoch": 4.575539568345324,
155
+ "grad_norm": 21.393756866455078,
156
+ "learning_rate": 9.09783989834816e-06,
157
+ "loss": 0.1777,
158
  "step": 160
159
  },
160
  {
161
+ "epoch": 4.863309352517986,
162
+ "grad_norm": 18.873945236206055,
163
+ "learning_rate": 8.970775095298603e-06,
164
+ "loss": 0.2155,
 
 
 
 
 
 
 
 
 
165
  "step": 170
166
  },
167
  {
168
+ "epoch": 5.0,
169
+ "eval_loss": 0.26767683029174805,
170
+ "eval_macro_f1": 0.9277555631264681,
171
+ "eval_runtime": 26.6297,
172
+ "eval_samples_per_second": 8.9,
173
+ "eval_steps_per_second": 1.127,
174
+ "step": 175
175
+ },
176
+ {
177
+ "epoch": 5.143884892086331,
178
+ "grad_norm": 6.233393669128418,
179
+ "learning_rate": 8.843710292249047e-06,
180
+ "loss": 0.1474,
181
  "step": 180
182
  },
183
  {
184
+ "epoch": 5.431654676258993,
185
+ "grad_norm": 20.907960891723633,
186
+ "learning_rate": 8.716645489199493e-06,
187
+ "loss": 0.1279,
188
  "step": 190
189
  },
190
  {
191
+ "epoch": 5.719424460431655,
192
+ "grad_norm": 17.63884162902832,
193
+ "learning_rate": 8.589580686149937e-06,
194
+ "loss": 0.211,
195
  "step": 200
196
  },
197
  {
198
+ "epoch": 6.0,
199
+ "grad_norm": 20.169618606567383,
200
+ "learning_rate": 8.462515883100381e-06,
201
+ "loss": 0.108,
202
  "step": 210
203
  },
204
  {
205
+ "epoch": 6.0,
206
+ "eval_loss": 0.2715422213077545,
207
+ "eval_macro_f1": 0.9110821288835689,
208
+ "eval_runtime": 32.5783,
209
+ "eval_samples_per_second": 7.275,
210
+ "eval_steps_per_second": 0.921,
211
  "step": 210
212
  },
213
  {
214
+ "epoch": 6.287769784172662,
215
+ "grad_norm": 6.99655294418335,
216
+ "learning_rate": 8.335451080050827e-06,
217
+ "loss": 0.1142,
218
  "step": 220
219
  },
220
  {
221
+ "epoch": 6.575539568345324,
222
+ "grad_norm": 20.068387985229492,
223
+ "learning_rate": 8.20838627700127e-06,
224
+ "loss": 0.2136,
225
  "step": 230
226
  },
227
  {
228
+ "epoch": 6.863309352517986,
229
+ "grad_norm": 12.439704895019531,
230
+ "learning_rate": 8.081321473951716e-06,
231
+ "loss": 0.1461,
232
  "step": 240
233
  },
234
  {
235
+ "epoch": 7.0,
236
+ "eval_loss": 0.49200543761253357,
237
+ "eval_macro_f1": 0.8094617047505047,
238
+ "eval_runtime": 38.0217,
239
+ "eval_samples_per_second": 6.233,
240
+ "eval_steps_per_second": 0.789,
241
+ "step": 245
242
+ },
243
+ {
244
+ "epoch": 7.143884892086331,
245
+ "grad_norm": 4.405595302581787,
246
+ "learning_rate": 7.95425667090216e-06,
247
+ "loss": 0.1535,
248
  "step": 250
249
  },
250
  {
251
+ "epoch": 7.431654676258993,
252
+ "grad_norm": 7.879202365875244,
253
+ "learning_rate": 7.827191867852606e-06,
254
+ "loss": 0.2075,
 
 
 
 
 
 
 
 
 
255
  "step": 260
256
  },
257
  {
258
+ "epoch": 7.719424460431655,
259
+ "grad_norm": 4.8070268630981445,
260
+ "learning_rate": 7.70012706480305e-06,
261
+ "loss": 0.1155,
262
  "step": 270
263
  },
264
  {
265
+ "epoch": 8.0,
266
+ "grad_norm": 12.835691452026367,
267
+ "learning_rate": 7.573062261753494e-06,
268
+ "loss": 0.1726,
269
+ "step": 280
270
+ },
271
+ {
272
+ "epoch": 8.0,
273
+ "eval_loss": 0.2971098721027374,
274
+ "eval_macro_f1": 0.9273962481754455,
275
+ "eval_runtime": 30.7621,
276
+ "eval_samples_per_second": 7.704,
277
+ "eval_steps_per_second": 0.975,
278
  "step": 280
279
  },
280
  {
281
+ "epoch": 8.287769784172662,
282
+ "grad_norm": 10.0529146194458,
283
+ "learning_rate": 7.44599745870394e-06,
284
+ "loss": 0.0781,
285
  "step": 290
286
  },
287
  {
288
+ "epoch": 8.575539568345324,
289
+ "grad_norm": 8.834739685058594,
290
+ "learning_rate": 7.318932655654384e-06,
291
+ "loss": 0.2262,
 
 
 
 
 
 
 
 
 
292
  "step": 300
293
  },
294
  {
295
+ "epoch": 8.863309352517986,
296
+ "grad_norm": 8.839664459228516,
297
+ "learning_rate": 7.191867852604829e-06,
298
+ "loss": 0.082,
299
  "step": 310
300
  },
301
  {
302
+ "epoch": 9.0,
303
+ "eval_loss": 0.38130030035972595,
304
+ "eval_macro_f1": 0.8962043795620438,
305
+ "eval_runtime": 32.1353,
306
+ "eval_samples_per_second": 7.375,
307
+ "eval_steps_per_second": 0.934,
308
+ "step": 315
309
+ },
310
+ {
311
+ "epoch": 9.14388489208633,
312
+ "grad_norm": 14.185478210449219,
313
+ "learning_rate": 7.064803049555273e-06,
314
+ "loss": 0.1111,
315
  "step": 320
316
  },
317
  {
318
+ "epoch": 9.431654676258994,
319
+ "grad_norm": 2.165837049484253,
320
+ "learning_rate": 6.937738246505718e-06,
321
+ "loss": 0.0988,
322
  "step": 330
323
  },
324
  {
325
+ "epoch": 9.719424460431654,
326
+ "grad_norm": 1.7402962446212769,
327
+ "learning_rate": 6.810673443456163e-06,
328
+ "loss": 0.0941,
 
 
 
 
 
 
 
 
 
329
  "step": 340
330
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  {
332
  "epoch": 10.0,
333
+ "grad_norm": 35.40852737426758,
334
+ "learning_rate": 6.683608640406608e-06,
335
+ "loss": 0.0827,
336
+ "step": 350
337
  },
338
  {
339
  "epoch": 10.0,
340
+ "eval_loss": 0.34335753321647644,
341
+ "eval_macro_f1": 0.9318475916606758,
342
+ "eval_runtime": 34.461,
343
+ "eval_samples_per_second": 6.877,
344
+ "eval_steps_per_second": 0.871,
345
+ "step": 350
 
 
 
 
 
 
 
346
  },
347
  {
348
+ "epoch": 10.287769784172662,
349
+ "grad_norm": 21.371692657470703,
350
+ "learning_rate": 6.556543837357052e-06,
351
+ "loss": 0.0953,
352
+ "step": 360
353
  },
354
  {
355
+ "epoch": 10.575539568345324,
356
+ "grad_norm": 0.9700206518173218,
357
+ "learning_rate": 6.4294790343074975e-06,
358
+ "loss": 0.0855,
359
+ "step": 370
360
  },
361
  {
362
+ "epoch": 10.863309352517986,
363
+ "grad_norm": 11.236717224121094,
364
+ "learning_rate": 6.3024142312579415e-06,
365
+ "loss": 0.0853,
366
+ "step": 380
367
  },
368
  {
369
  "epoch": 11.0,
370
+ "eval_loss": 0.27990925312042236,
371
+ "eval_macro_f1": 0.9489664082687339,
372
+ "eval_runtime": 32.6676,
373
+ "eval_samples_per_second": 7.255,
374
+ "eval_steps_per_second": 0.918,
375
+ "step": 385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  }
377
  ],
378
  "logging_steps": 10,
379
+ "max_steps": 875,
380
  "num_input_tokens_seen": 0,
381
  "num_train_epochs": 25,
382
  "save_steps": 500,
 
392
  "attributes": {}
393
  }
394
  },
395
+ "total_flos": 4.376134205662298e+18,
396
  "train_batch_size": 8,
397
  "trial_name": null,
398
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0b1d45c6c028d437456c5af083b3618770c1bf92ea996d351710771438e3073
3
- size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f044765f3dcdd375a956d71774ce12973156a4c62f7c3a5e1e5b0698ac8d8b9
3
+ size 5713