mgh6 commited on
Commit
bea8af8
·
verified ·
1 Parent(s): 76d9969

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2666dcba378eb5b127a4a1983c384939e975dc835db042728bc334acf524fa14
3
  size 2708740080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b59a3c142cb36e782f862ff97df3be4b268f2b82af56ea2048c0b22de8ab70ac
3
  size 2708740080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5de48057a79311083a4803dcbffa4614e68dd960590083425259e539b86a0774
3
  size 52521338
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15964537a2492ff389ce57f04434fce8c9aa401544dbb2ca53af890dab8c7424
3
  size 52521338
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b51b6a81efcba2b70403a0119b72ef5704041095ada44dd87580f143ef37aee
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9080aea5181d3066ab765d04bc9819f089e9674161d5e56c8bf2b7c839212160
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7523ba195d7b50babd25f66fb14b250cddfb481924c94f68c65428f7cfe6b30c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a3706e3071c2c4cbd5f2e989bf126df210bdb1a4e75a8894205331c65155ec1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,780 +1,28 @@
1
  {
2
- "best_metric": 2.673060894012451,
3
- "best_model_checkpoint": "mgh6/HTH_biCLIP_mean/checkpoint-6063",
4
- "epoch": 48.0,
5
  "eval_steps": 500,
6
- "global_step": 6192,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.06122415140271187,
14
- "learning_rate": 9.83203125e-05,
15
- "loss": 2.7311,
16
  "step": 129
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_cosine_similarity": 0.28012484312057495,
21
- "eval_loss": 2.744464159011841,
22
- "eval_runtime": 18.8577,
23
- "eval_samples_per_second": 22.908,
24
- "eval_steps_per_second": 1.432,
25
  "step": 129
26
- },
27
- {
28
- "epoch": 2.0,
29
- "grad_norm": 0.03998406603932381,
30
- "learning_rate": 9.6640625e-05,
31
- "loss": 2.7174,
32
- "step": 258
33
- },
34
- {
35
- "epoch": 2.0,
36
- "eval_cosine_similarity": 0.35804858803749084,
37
- "eval_loss": 2.7347757816314697,
38
- "eval_runtime": 18.8518,
39
- "eval_samples_per_second": 22.916,
40
- "eval_steps_per_second": 1.432,
41
- "step": 258
42
- },
43
- {
44
- "epoch": 3.0,
45
- "grad_norm": 0.09510844200849533,
46
- "learning_rate": 9.496093750000001e-05,
47
- "loss": 2.712,
48
- "step": 387
49
- },
50
- {
51
- "epoch": 3.0,
52
- "eval_cosine_similarity": 0.31994006037712097,
53
- "eval_loss": 2.7384214401245117,
54
- "eval_runtime": 18.8684,
55
- "eval_samples_per_second": 22.895,
56
- "eval_steps_per_second": 1.431,
57
- "step": 387
58
- },
59
- {
60
- "epoch": 4.0,
61
- "grad_norm": 0.09453984349966049,
62
- "learning_rate": 9.328125000000001e-05,
63
- "loss": 2.7083,
64
- "step": 516
65
- },
66
- {
67
- "epoch": 4.0,
68
- "eval_cosine_similarity": 0.396742582321167,
69
- "eval_loss": 2.7257983684539795,
70
- "eval_runtime": 18.8397,
71
- "eval_samples_per_second": 22.93,
72
- "eval_steps_per_second": 1.433,
73
- "step": 516
74
- },
75
- {
76
- "epoch": 5.0,
77
- "grad_norm": 0.1036021038889885,
78
- "learning_rate": 9.160156250000001e-05,
79
- "loss": 2.7041,
80
- "step": 645
81
- },
82
- {
83
- "epoch": 5.0,
84
- "eval_cosine_similarity": 0.4167896807193756,
85
- "eval_loss": 2.7295186519622803,
86
- "eval_runtime": 18.8549,
87
- "eval_samples_per_second": 22.912,
88
- "eval_steps_per_second": 1.432,
89
- "step": 645
90
- },
91
- {
92
- "epoch": 6.0,
93
- "grad_norm": 0.10563068836927414,
94
- "learning_rate": 8.9921875e-05,
95
- "loss": 2.701,
96
- "step": 774
97
- },
98
- {
99
- "epoch": 6.0,
100
- "eval_cosine_similarity": 0.4463934600353241,
101
- "eval_loss": 2.722810983657837,
102
- "eval_runtime": 18.8344,
103
- "eval_samples_per_second": 22.937,
104
- "eval_steps_per_second": 1.434,
105
- "step": 774
106
- },
107
- {
108
- "epoch": 7.0,
109
- "grad_norm": 0.3786303400993347,
110
- "learning_rate": 8.824218750000001e-05,
111
- "loss": 2.6981,
112
- "step": 903
113
- },
114
- {
115
- "epoch": 7.0,
116
- "eval_cosine_similarity": 0.3459477722644806,
117
- "eval_loss": 2.7271432876586914,
118
- "eval_runtime": 18.854,
119
- "eval_samples_per_second": 22.913,
120
- "eval_steps_per_second": 1.432,
121
- "step": 903
122
- },
123
- {
124
- "epoch": 8.0,
125
- "grad_norm": 0.39843541383743286,
126
- "learning_rate": 8.65625e-05,
127
- "loss": 2.695,
128
- "step": 1032
129
- },
130
- {
131
- "epoch": 8.0,
132
- "eval_cosine_similarity": 0.44728702306747437,
133
- "eval_loss": 2.717395544052124,
134
- "eval_runtime": 18.8516,
135
- "eval_samples_per_second": 22.916,
136
- "eval_steps_per_second": 1.432,
137
- "step": 1032
138
- },
139
- {
140
- "epoch": 9.0,
141
- "grad_norm": 0.061124324798583984,
142
- "learning_rate": 8.488281250000001e-05,
143
- "loss": 2.6921,
144
- "step": 1161
145
- },
146
- {
147
- "epoch": 9.0,
148
- "eval_cosine_similarity": 0.4983077645301819,
149
- "eval_loss": 2.7137577533721924,
150
- "eval_runtime": 18.8363,
151
- "eval_samples_per_second": 22.934,
152
- "eval_steps_per_second": 1.433,
153
- "step": 1161
154
- },
155
- {
156
- "epoch": 10.0,
157
- "grad_norm": 0.12580373883247375,
158
- "learning_rate": 8.3203125e-05,
159
- "loss": 2.6898,
160
- "step": 1290
161
- },
162
- {
163
- "epoch": 10.0,
164
- "eval_cosine_similarity": 0.5014428496360779,
165
- "eval_loss": 2.7119338512420654,
166
- "eval_runtime": 18.8721,
167
- "eval_samples_per_second": 22.891,
168
- "eval_steps_per_second": 1.431,
169
- "step": 1290
170
- },
171
- {
172
- "epoch": 11.0,
173
- "grad_norm": 0.12061749398708344,
174
- "learning_rate": 8.15234375e-05,
175
- "loss": 2.6859,
176
- "step": 1419
177
- },
178
- {
179
- "epoch": 11.0,
180
- "eval_cosine_similarity": 0.48711416125297546,
181
- "eval_loss": 2.714085578918457,
182
- "eval_runtime": 18.834,
183
- "eval_samples_per_second": 22.937,
184
- "eval_steps_per_second": 1.434,
185
- "step": 1419
186
- },
187
- {
188
- "epoch": 12.0,
189
- "grad_norm": 0.18075203895568848,
190
- "learning_rate": 7.984375e-05,
191
- "loss": 2.6843,
192
- "step": 1548
193
- },
194
- {
195
- "epoch": 12.0,
196
- "eval_cosine_similarity": 0.5338874459266663,
197
- "eval_loss": 2.707056760787964,
198
- "eval_runtime": 18.8491,
199
- "eval_samples_per_second": 22.919,
200
- "eval_steps_per_second": 1.432,
201
- "step": 1548
202
- },
203
- {
204
- "epoch": 13.0,
205
- "grad_norm": 0.22249284386634827,
206
- "learning_rate": 7.81640625e-05,
207
- "loss": 2.6829,
208
- "step": 1677
209
- },
210
- {
211
- "epoch": 13.0,
212
- "eval_cosine_similarity": 0.5601667761802673,
213
- "eval_loss": 2.705392837524414,
214
- "eval_runtime": 18.8352,
215
- "eval_samples_per_second": 22.936,
216
- "eval_steps_per_second": 1.433,
217
- "step": 1677
218
- },
219
- {
220
- "epoch": 14.0,
221
- "grad_norm": 0.16165713965892792,
222
- "learning_rate": 7.648437500000001e-05,
223
- "loss": 2.6799,
224
- "step": 1806
225
- },
226
- {
227
- "epoch": 14.0,
228
- "eval_cosine_similarity": 0.5292511582374573,
229
- "eval_loss": 2.7065181732177734,
230
- "eval_runtime": 18.8446,
231
- "eval_samples_per_second": 22.924,
232
- "eval_steps_per_second": 1.433,
233
- "step": 1806
234
- },
235
- {
236
- "epoch": 15.0,
237
- "grad_norm": 0.1596257984638214,
238
- "learning_rate": 7.48046875e-05,
239
- "loss": 2.678,
240
- "step": 1935
241
- },
242
- {
243
- "epoch": 15.0,
244
- "eval_cosine_similarity": 0.5406491756439209,
245
- "eval_loss": 2.7067458629608154,
246
- "eval_runtime": 18.8328,
247
- "eval_samples_per_second": 22.939,
248
- "eval_steps_per_second": 1.434,
249
- "step": 1935
250
- },
251
- {
252
- "epoch": 16.0,
253
- "grad_norm": 0.17212730646133423,
254
- "learning_rate": 7.3125e-05,
255
- "loss": 2.6762,
256
- "step": 2064
257
- },
258
- {
259
- "epoch": 16.0,
260
- "eval_cosine_similarity": 0.5383115410804749,
261
- "eval_loss": 2.7063181400299072,
262
- "eval_runtime": 18.8441,
263
- "eval_samples_per_second": 22.925,
264
- "eval_steps_per_second": 1.433,
265
- "step": 2064
266
- },
267
- {
268
- "epoch": 17.0,
269
- "grad_norm": 0.16676370799541473,
270
- "learning_rate": 7.14453125e-05,
271
- "loss": 2.6748,
272
- "step": 2193
273
- },
274
- {
275
- "epoch": 17.0,
276
- "eval_cosine_similarity": 0.5793458223342896,
277
- "eval_loss": 2.7020437717437744,
278
- "eval_runtime": 18.8795,
279
- "eval_samples_per_second": 22.882,
280
- "eval_steps_per_second": 1.43,
281
- "step": 2193
282
- },
283
- {
284
- "epoch": 18.0,
285
- "grad_norm": 0.08970298618078232,
286
- "learning_rate": 6.9765625e-05,
287
- "loss": 2.6722,
288
- "step": 2322
289
- },
290
- {
291
- "epoch": 18.0,
292
- "eval_cosine_similarity": 0.608212411403656,
293
- "eval_loss": 2.6979143619537354,
294
- "eval_runtime": 18.8361,
295
- "eval_samples_per_second": 22.935,
296
- "eval_steps_per_second": 1.433,
297
- "step": 2322
298
- },
299
- {
300
- "epoch": 19.0,
301
- "grad_norm": 0.46823370456695557,
302
- "learning_rate": 6.80859375e-05,
303
- "loss": 2.6707,
304
- "step": 2451
305
- },
306
- {
307
- "epoch": 19.0,
308
- "eval_cosine_similarity": 0.5827838182449341,
309
- "eval_loss": 2.699484348297119,
310
- "eval_runtime": 18.8456,
311
- "eval_samples_per_second": 22.923,
312
- "eval_steps_per_second": 1.433,
313
- "step": 2451
314
- },
315
- {
316
- "epoch": 20.0,
317
- "grad_norm": 0.22482483088970184,
318
- "learning_rate": 6.640625e-05,
319
- "loss": 2.6687,
320
- "step": 2580
321
- },
322
- {
323
- "epoch": 20.0,
324
- "eval_cosine_similarity": 0.6316117644309998,
325
- "eval_loss": 2.6942453384399414,
326
- "eval_runtime": 18.8317,
327
- "eval_samples_per_second": 22.94,
328
- "eval_steps_per_second": 1.434,
329
- "step": 2580
330
- },
331
- {
332
- "epoch": 21.0,
333
- "grad_norm": 0.45786142349243164,
334
- "learning_rate": 6.472656249999999e-05,
335
- "loss": 2.6661,
336
- "step": 2709
337
- },
338
- {
339
- "epoch": 21.0,
340
- "eval_cosine_similarity": 0.6281163692474365,
341
- "eval_loss": 2.6956567764282227,
342
- "eval_runtime": 18.8424,
343
- "eval_samples_per_second": 22.927,
344
- "eval_steps_per_second": 1.433,
345
- "step": 2709
346
- },
347
- {
348
- "epoch": 22.0,
349
- "grad_norm": 0.5357567667961121,
350
- "learning_rate": 6.3046875e-05,
351
- "loss": 2.6654,
352
- "step": 2838
353
- },
354
- {
355
- "epoch": 22.0,
356
- "eval_cosine_similarity": 0.639909029006958,
357
- "eval_loss": 2.6937482357025146,
358
- "eval_runtime": 18.8423,
359
- "eval_samples_per_second": 22.927,
360
- "eval_steps_per_second": 1.433,
361
- "step": 2838
362
- },
363
- {
364
- "epoch": 23.0,
365
- "grad_norm": 0.5274462103843689,
366
- "learning_rate": 6.136718750000001e-05,
367
- "loss": 2.6634,
368
- "step": 2967
369
- },
370
- {
371
- "epoch": 23.0,
372
- "eval_cosine_similarity": 0.6498256325721741,
373
- "eval_loss": 2.691450834274292,
374
- "eval_runtime": 18.8402,
375
- "eval_samples_per_second": 22.93,
376
- "eval_steps_per_second": 1.433,
377
- "step": 2967
378
- },
379
- {
380
- "epoch": 24.0,
381
- "grad_norm": 0.7012160420417786,
382
- "learning_rate": 5.968750000000001e-05,
383
- "loss": 2.6617,
384
- "step": 3096
385
- },
386
- {
387
- "epoch": 24.0,
388
- "eval_cosine_similarity": 0.6656497120857239,
389
- "eval_loss": 2.691051959991455,
390
- "eval_runtime": 18.8635,
391
- "eval_samples_per_second": 22.901,
392
- "eval_steps_per_second": 1.431,
393
- "step": 3096
394
- },
395
- {
396
- "epoch": 25.0,
397
- "grad_norm": 0.07700519263744354,
398
- "learning_rate": 5.80078125e-05,
399
- "loss": 2.6605,
400
- "step": 3225
401
- },
402
- {
403
- "epoch": 25.0,
404
- "eval_cosine_similarity": 0.6613298058509827,
405
- "eval_loss": 2.689277410507202,
406
- "eval_runtime": 18.8365,
407
- "eval_samples_per_second": 22.934,
408
- "eval_steps_per_second": 1.433,
409
- "step": 3225
410
- },
411
- {
412
- "epoch": 26.0,
413
- "grad_norm": 0.15677490830421448,
414
- "learning_rate": 5.6328124999999995e-05,
415
- "loss": 2.6599,
416
- "step": 3354
417
- },
418
- {
419
- "epoch": 26.0,
420
- "eval_cosine_similarity": 0.6886489391326904,
421
- "eval_loss": 2.686530590057373,
422
- "eval_runtime": 18.8402,
423
- "eval_samples_per_second": 22.93,
424
- "eval_steps_per_second": 1.433,
425
- "step": 3354
426
- },
427
- {
428
- "epoch": 27.0,
429
- "grad_norm": 0.3259325623512268,
430
- "learning_rate": 5.46484375e-05,
431
- "loss": 2.6573,
432
- "step": 3483
433
- },
434
- {
435
- "epoch": 27.0,
436
- "eval_cosine_similarity": 0.6656365394592285,
437
- "eval_loss": 2.689530849456787,
438
- "eval_runtime": 18.8427,
439
- "eval_samples_per_second": 22.927,
440
- "eval_steps_per_second": 1.433,
441
- "step": 3483
442
- },
443
- {
444
- "epoch": 28.0,
445
- "grad_norm": 0.29011014103889465,
446
- "learning_rate": 5.2968750000000003e-05,
447
- "loss": 2.6562,
448
- "step": 3612
449
- },
450
- {
451
- "epoch": 28.0,
452
- "eval_cosine_similarity": 0.6862850189208984,
453
- "eval_loss": 2.687514305114746,
454
- "eval_runtime": 18.8416,
455
- "eval_samples_per_second": 22.928,
456
- "eval_steps_per_second": 1.433,
457
- "step": 3612
458
- },
459
- {
460
- "epoch": 29.0,
461
- "grad_norm": 0.5012183785438538,
462
- "learning_rate": 5.1289062500000004e-05,
463
- "loss": 2.6549,
464
- "step": 3741
465
- },
466
- {
467
- "epoch": 29.0,
468
- "eval_cosine_similarity": 0.6950795650482178,
469
- "eval_loss": 2.6848597526550293,
470
- "eval_runtime": 18.871,
471
- "eval_samples_per_second": 22.892,
472
- "eval_steps_per_second": 1.431,
473
- "step": 3741
474
- },
475
- {
476
- "epoch": 30.0,
477
- "grad_norm": 0.2342006117105484,
478
- "learning_rate": 4.9609375000000005e-05,
479
- "loss": 2.6543,
480
- "step": 3870
481
- },
482
- {
483
- "epoch": 30.0,
484
- "eval_cosine_similarity": 0.6870617270469666,
485
- "eval_loss": 2.6858551502227783,
486
- "eval_runtime": 18.8336,
487
- "eval_samples_per_second": 22.938,
488
- "eval_steps_per_second": 1.434,
489
- "step": 3870
490
- },
491
- {
492
- "epoch": 31.0,
493
- "grad_norm": 0.3240242600440979,
494
- "learning_rate": 4.7929687500000005e-05,
495
- "loss": 2.653,
496
- "step": 3999
497
- },
498
- {
499
- "epoch": 31.0,
500
- "eval_cosine_similarity": 0.6926249861717224,
501
- "eval_loss": 2.6856954097747803,
502
- "eval_runtime": 18.8636,
503
- "eval_samples_per_second": 22.901,
504
- "eval_steps_per_second": 1.431,
505
- "step": 3999
506
- },
507
- {
508
- "epoch": 32.0,
509
- "grad_norm": 1.058250069618225,
510
- "learning_rate": 4.6250000000000006e-05,
511
- "loss": 2.6513,
512
- "step": 4128
513
- },
514
- {
515
- "epoch": 32.0,
516
- "eval_cosine_similarity": 0.7236000895500183,
517
- "eval_loss": 2.682251214981079,
518
- "eval_runtime": 18.8353,
519
- "eval_samples_per_second": 22.936,
520
- "eval_steps_per_second": 1.433,
521
- "step": 4128
522
- },
523
- {
524
- "epoch": 33.0,
525
- "grad_norm": 0.1793142408132553,
526
- "learning_rate": 4.45703125e-05,
527
- "loss": 2.6501,
528
- "step": 4257
529
- },
530
- {
531
- "epoch": 33.0,
532
- "eval_cosine_similarity": 0.6964766979217529,
533
- "eval_loss": 2.6835083961486816,
534
- "eval_runtime": 18.8683,
535
- "eval_samples_per_second": 22.896,
536
- "eval_steps_per_second": 1.431,
537
- "step": 4257
538
- },
539
- {
540
- "epoch": 34.0,
541
- "grad_norm": 0.12666787207126617,
542
- "learning_rate": 4.2890625e-05,
543
- "loss": 2.6492,
544
- "step": 4386
545
- },
546
- {
547
- "epoch": 34.0,
548
- "eval_cosine_similarity": 0.7385993003845215,
549
- "eval_loss": 2.6802611351013184,
550
- "eval_runtime": 18.8352,
551
- "eval_samples_per_second": 22.936,
552
- "eval_steps_per_second": 1.433,
553
- "step": 4386
554
- },
555
- {
556
- "epoch": 35.0,
557
- "grad_norm": 0.28480854630470276,
558
- "learning_rate": 4.12109375e-05,
559
- "loss": 2.6477,
560
- "step": 4515
561
- },
562
- {
563
- "epoch": 35.0,
564
- "eval_cosine_similarity": 0.7121383547782898,
565
- "eval_loss": 2.6818628311157227,
566
- "eval_runtime": 18.8567,
567
- "eval_samples_per_second": 22.91,
568
- "eval_steps_per_second": 1.432,
569
- "step": 4515
570
- },
571
- {
572
- "epoch": 36.0,
573
- "grad_norm": 0.13797371089458466,
574
- "learning_rate": 3.953125e-05,
575
- "loss": 2.6475,
576
- "step": 4644
577
- },
578
- {
579
- "epoch": 36.0,
580
- "eval_cosine_similarity": 0.7440741658210754,
581
- "eval_loss": 2.6793031692504883,
582
- "eval_runtime": 18.8323,
583
- "eval_samples_per_second": 22.939,
584
- "eval_steps_per_second": 1.434,
585
- "step": 4644
586
- },
587
- {
588
- "epoch": 37.0,
589
- "grad_norm": 0.3601125180721283,
590
- "learning_rate": 3.78515625e-05,
591
- "loss": 2.6472,
592
- "step": 4773
593
- },
594
- {
595
- "epoch": 37.0,
596
- "eval_cosine_similarity": 0.7328048348426819,
597
- "eval_loss": 2.682004690170288,
598
- "eval_runtime": 18.8457,
599
- "eval_samples_per_second": 22.923,
600
- "eval_steps_per_second": 1.433,
601
- "step": 4773
602
- },
603
- {
604
- "epoch": 38.0,
605
- "grad_norm": 0.49930471181869507,
606
- "learning_rate": 3.6171875e-05,
607
- "loss": 2.6452,
608
- "step": 4902
609
- },
610
- {
611
- "epoch": 38.0,
612
- "eval_cosine_similarity": 0.7482897639274597,
613
- "eval_loss": 2.678091287612915,
614
- "eval_runtime": 18.8365,
615
- "eval_samples_per_second": 22.934,
616
- "eval_steps_per_second": 1.433,
617
- "step": 4902
618
- },
619
- {
620
- "epoch": 39.0,
621
- "grad_norm": 0.34699180722236633,
622
- "learning_rate": 3.44921875e-05,
623
- "loss": 2.6442,
624
- "step": 5031
625
- },
626
- {
627
- "epoch": 39.0,
628
- "eval_cosine_similarity": 0.7534282803535461,
629
- "eval_loss": 2.678204298019409,
630
- "eval_runtime": 18.8443,
631
- "eval_samples_per_second": 22.925,
632
- "eval_steps_per_second": 1.433,
633
- "step": 5031
634
- },
635
- {
636
- "epoch": 40.0,
637
- "grad_norm": 0.36323243379592896,
638
- "learning_rate": 3.2812500000000005e-05,
639
- "loss": 2.6437,
640
- "step": 5160
641
- },
642
- {
643
- "epoch": 40.0,
644
- "eval_cosine_similarity": 0.7704055905342102,
645
- "eval_loss": 2.676304817199707,
646
- "eval_runtime": 18.8549,
647
- "eval_samples_per_second": 22.912,
648
- "eval_steps_per_second": 1.432,
649
- "step": 5160
650
- },
651
- {
652
- "epoch": 41.0,
653
- "grad_norm": 0.4448683261871338,
654
- "learning_rate": 3.1132812500000005e-05,
655
- "loss": 2.6429,
656
- "step": 5289
657
- },
658
- {
659
- "epoch": 41.0,
660
- "eval_cosine_similarity": 0.7627379894256592,
661
- "eval_loss": 2.67691707611084,
662
- "eval_runtime": 18.836,
663
- "eval_samples_per_second": 22.935,
664
- "eval_steps_per_second": 1.433,
665
- "step": 5289
666
- },
667
- {
668
- "epoch": 42.0,
669
- "grad_norm": 0.3076535165309906,
670
- "learning_rate": 2.9453125000000003e-05,
671
- "loss": 2.6428,
672
- "step": 5418
673
- },
674
- {
675
- "epoch": 42.0,
676
- "eval_cosine_similarity": 0.7730197310447693,
677
- "eval_loss": 2.675767660140991,
678
- "eval_runtime": 18.8744,
679
- "eval_samples_per_second": 22.888,
680
- "eval_steps_per_second": 1.431,
681
- "step": 5418
682
- },
683
- {
684
- "epoch": 43.0,
685
- "grad_norm": 0.3055357336997986,
686
- "learning_rate": 2.77734375e-05,
687
- "loss": 2.6415,
688
- "step": 5547
689
- },
690
- {
691
- "epoch": 43.0,
692
- "eval_cosine_similarity": 0.7752830982208252,
693
- "eval_loss": 2.6751718521118164,
694
- "eval_runtime": 18.8283,
695
- "eval_samples_per_second": 22.944,
696
- "eval_steps_per_second": 1.434,
697
- "step": 5547
698
- },
699
- {
700
- "epoch": 44.0,
701
- "grad_norm": 0.4585816562175751,
702
- "learning_rate": 2.609375e-05,
703
- "loss": 2.6408,
704
- "step": 5676
705
- },
706
- {
707
- "epoch": 44.0,
708
- "eval_cosine_similarity": 0.7713160514831543,
709
- "eval_loss": 2.675260305404663,
710
- "eval_runtime": 18.863,
711
- "eval_samples_per_second": 22.902,
712
- "eval_steps_per_second": 1.431,
713
- "step": 5676
714
- },
715
- {
716
- "epoch": 45.0,
717
- "grad_norm": 0.1793605536222458,
718
- "learning_rate": 2.44140625e-05,
719
- "loss": 2.6401,
720
- "step": 5805
721
- },
722
- {
723
- "epoch": 45.0,
724
- "eval_cosine_similarity": 0.7710201144218445,
725
- "eval_loss": 2.675137519836426,
726
- "eval_runtime": 18.8392,
727
- "eval_samples_per_second": 22.931,
728
- "eval_steps_per_second": 1.433,
729
- "step": 5805
730
- },
731
- {
732
- "epoch": 46.0,
733
- "grad_norm": 0.2033383995294571,
734
- "learning_rate": 2.2734375000000002e-05,
735
- "loss": 2.6396,
736
- "step": 5934
737
- },
738
- {
739
- "epoch": 46.0,
740
- "eval_cosine_similarity": 0.7817992568016052,
741
- "eval_loss": 2.6738462448120117,
742
- "eval_runtime": 18.8424,
743
- "eval_samples_per_second": 22.927,
744
- "eval_steps_per_second": 1.433,
745
- "step": 5934
746
- },
747
- {
748
- "epoch": 47.0,
749
- "grad_norm": 0.3535211384296417,
750
- "learning_rate": 2.10546875e-05,
751
- "loss": 2.6388,
752
- "step": 6063
753
- },
754
- {
755
- "epoch": 47.0,
756
- "eval_cosine_similarity": 0.7922196984291077,
757
- "eval_loss": 2.673060894012451,
758
- "eval_runtime": 18.8542,
759
- "eval_samples_per_second": 22.913,
760
- "eval_steps_per_second": 1.432,
761
- "step": 6063
762
- },
763
- {
764
- "epoch": 48.0,
765
- "grad_norm": 0.29340553283691406,
766
- "learning_rate": 1.9375e-05,
767
- "loss": 2.6383,
768
- "step": 6192
769
- },
770
- {
771
- "epoch": 48.0,
772
- "eval_cosine_similarity": 0.7896454334259033,
773
- "eval_loss": 2.6733312606811523,
774
- "eval_runtime": 18.8372,
775
- "eval_samples_per_second": 22.933,
776
- "eval_steps_per_second": 1.433,
777
- "step": 6192
778
  }
779
  ],
780
  "logging_steps": 500,
@@ -789,7 +37,7 @@
789
  "early_stopping_threshold": 0.0
790
  },
791
  "attributes": {
792
- "early_stopping_patience_counter": 1
793
  }
794
  },
795
  "TrainerControl": {
 
1
  {
2
+ "best_metric": 2.754194974899292,
3
+ "best_model_checkpoint": "mgh6/HTH_biCLIP_mean/checkpoint-129",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 129,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.009723234921693802,
14
+ "learning_rate": 0.000983203125,
15
+ "loss": 2.7359,
16
  "step": 129
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_cosine_similarity": 0.15642696619033813,
21
+ "eval_loss": 2.754194974899292,
22
+ "eval_runtime": 18.9326,
23
+ "eval_samples_per_second": 22.818,
24
+ "eval_steps_per_second": 1.426,
25
  "step": 129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  ],
28
  "logging_steps": 500,
 
37
  "early_stopping_threshold": 0.0
38
  },
39
  "attributes": {
40
+ "early_stopping_patience_counter": 0
41
  }
42
  },
43
  "TrainerControl": {
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9390ca3eced6230918734dc8de4ad7f836138495f13c3d1b134d07c6eaa4db21
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456d4f7d71f44316c3344e7b1dd2abd8cad0993cbcf9e11348a97ca882a75f92
3
  size 5368