DumbledoreWiz commited on
Commit
77c23a1
·
verified ·
1 Parent(s): a52f6e7

Upload 4 files

Browse files
Files changed (3) hide show
  1. config.json +12 -22
  2. model.safetensors +2 -2
  3. trainer_state.json +136 -760
config.json CHANGED
@@ -9,33 +9,23 @@
9
  "hidden_dropout_prob": 0.0,
10
  "hidden_size": 768,
11
  "id2label": {
12
- "0": "round",
13
- "1": "vneck",
14
- "2": "collared",
15
- "3": "straight",
16
- "4": "highneck",
17
- "5": "hoodie",
18
- "6": "henley",
19
- "7": "halter",
20
- "8": "sweetheart",
21
- "9": "polo",
22
- "10": "asymmetrical"
23
  },
24
  "image_size": 224,
25
  "initializer_range": 0.02,
26
  "intermediate_size": 3072,
27
  "label2id": {
28
- "asymmetrical": 10,
29
- "collared": 2,
30
- "halter": 7,
31
- "henley": 6,
32
- "highneck": 4,
33
- "hoodie": 5,
34
- "polo": 9,
35
- "round": 0,
36
- "straight": 3,
37
- "sweetheart": 8,
38
- "vneck": 1
39
  },
40
  "layer_norm_eps": 1e-12,
41
  "model_type": "vit",
 
9
  "hidden_dropout_prob": 0.0,
10
  "hidden_size": 768,
11
  "id2label": {
12
+ "0": "Crop",
13
+ "1": "Regular",
14
+ "2": "Hip",
15
+ "3": "Knee",
16
+ "4": "Maxi",
17
+ "5": "Bodysuit"
 
 
 
 
 
18
  },
19
  "image_size": 224,
20
  "initializer_range": 0.02,
21
  "intermediate_size": 3072,
22
  "label2id": {
23
+ "Bodysuit": 5,
24
+ "Crop": 0,
25
+ "Hip": 2,
26
+ "Knee": 3,
27
+ "Maxi": 4,
28
+ "Regular": 1
 
 
 
 
 
29
  },
30
  "layer_norm_eps": 1e-12,
31
  "model_type": "vit",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aeb3ba6832a1f8ea587c9e935a64a1337fb16be967c27b95e4e24aef18a4ce37
3
- size 343251660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:955e38edf95dd693c962e91cc18b87fede8c26c0d08cec97a97957f150c24477
3
+ size 343236280
trainer_state.json CHANGED
@@ -1,838 +1,214 @@
1
  {
2
- "best_metric": 0.7836363636363637,
3
- "best_model_checkpoint": "/content/drive/MyDrive/autoTaggingProject/ViT/General/Features/NeckLine/Results/model_2024-10-16_test/checkpoint-5192",
4
- "epoch": 11.0,
5
  "eval_steps": 500,
6
- "global_step": 5192,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.1059322033898305,
13
- "grad_norm": 1.528609037399292,
14
- "learning_rate": 9.964689265536724e-06,
15
- "loss": 2.3665,
16
- "step": 50
17
- },
18
- {
19
- "epoch": 0.211864406779661,
20
- "grad_norm": 1.4840689897537231,
21
- "learning_rate": 9.929378531073447e-06,
22
- "loss": 2.2771,
23
  "step": 100
24
  },
25
  {
26
- "epoch": 0.3177966101694915,
27
- "grad_norm": 1.6797162294387817,
28
- "learning_rate": 9.89406779661017e-06,
29
- "loss": 2.1584,
30
- "step": 150
31
- },
32
- {
33
- "epoch": 0.423728813559322,
34
- "grad_norm": 1.5869511365890503,
35
- "learning_rate": 9.858757062146892e-06,
36
- "loss": 2.0337,
37
  "step": 200
38
  },
39
  {
40
- "epoch": 0.5296610169491526,
41
- "grad_norm": 1.625272512435913,
42
- "learning_rate": 9.823446327683617e-06,
43
- "loss": 1.945,
44
- "step": 250
45
- },
46
- {
47
- "epoch": 0.635593220338983,
48
- "grad_norm": 1.918320655822754,
49
- "learning_rate": 9.788135593220339e-06,
50
- "loss": 1.8575,
 
 
51
  "step": 300
52
  },
53
  {
54
- "epoch": 0.7415254237288136,
55
- "grad_norm": 2.73588490486145,
56
- "learning_rate": 9.752824858757062e-06,
57
- "loss": 1.795,
58
- "step": 350
59
- },
60
- {
61
- "epoch": 0.847457627118644,
62
- "grad_norm": 1.978835105895996,
63
- "learning_rate": 9.717514124293787e-06,
64
- "loss": 1.7206,
65
  "step": 400
66
  },
67
  {
68
- "epoch": 0.9533898305084746,
69
- "grad_norm": 2.827810764312744,
70
- "learning_rate": 9.682203389830509e-06,
71
- "loss": 1.643,
72
- "step": 450
73
- },
74
- {
75
- "epoch": 1.0,
76
- "eval_accuracy": 0.5754545454545454,
77
- "eval_loss": 1.6228725910186768,
78
- "eval_runtime": 14.283,
79
- "eval_samples_per_second": 77.014,
80
- "eval_steps_per_second": 2.45,
81
- "step": 472
82
- },
83
- {
84
- "epoch": 1.0593220338983051,
85
- "grad_norm": 2.191129207611084,
86
- "learning_rate": 9.646892655367232e-06,
87
- "loss": 1.5876,
88
  "step": 500
89
  },
90
  {
91
- "epoch": 1.1652542372881356,
92
- "grad_norm": 2.16697359085083,
93
- "learning_rate": 9.611581920903955e-06,
94
- "loss": 1.5416,
 
 
95
  "step": 550
96
  },
97
  {
98
- "epoch": 1.271186440677966,
99
- "grad_norm": 2.1197969913482666,
100
- "learning_rate": 9.576271186440679e-06,
101
- "loss": 1.5015,
102
  "step": 600
103
  },
104
  {
105
- "epoch": 1.3771186440677967,
106
- "grad_norm": 2.465144634246826,
107
- "learning_rate": 9.540960451977402e-06,
108
- "loss": 1.4279,
109
- "step": 650
110
- },
111
- {
112
- "epoch": 1.4830508474576272,
113
- "grad_norm": 1.8128849267959595,
114
- "learning_rate": 9.505649717514125e-06,
115
- "loss": 1.4298,
116
  "step": 700
117
  },
118
  {
119
- "epoch": 1.5889830508474576,
120
- "grad_norm": 2.8659541606903076,
121
- "learning_rate": 9.470338983050848e-06,
122
- "loss": 1.3816,
123
- "step": 750
124
- },
125
- {
126
- "epoch": 1.694915254237288,
127
- "grad_norm": 2.582030773162842,
128
- "learning_rate": 9.435028248587572e-06,
129
- "loss": 1.3254,
130
  "step": 800
131
  },
132
  {
133
- "epoch": 1.8008474576271185,
134
- "grad_norm": 2.6557815074920654,
135
- "learning_rate": 9.399717514124295e-06,
136
- "loss": 1.2937,
137
- "step": 850
138
- },
139
- {
140
- "epoch": 1.9067796610169492,
141
- "grad_norm": 3.4831998348236084,
142
- "learning_rate": 9.364406779661017e-06,
143
- "loss": 1.2511,
 
 
144
  "step": 900
145
  },
146
  {
147
- "epoch": 2.0,
148
- "eval_accuracy": 0.6927272727272727,
149
- "eval_loss": 1.3029816150665283,
150
- "eval_runtime": 14.6712,
151
- "eval_samples_per_second": 74.977,
152
- "eval_steps_per_second": 2.386,
153
- "step": 944
154
- },
155
- {
156
- "epoch": 2.01271186440678,
157
- "grad_norm": 2.9632270336151123,
158
- "learning_rate": 9.329096045197742e-06,
159
- "loss": 1.2574,
160
- "step": 950
161
- },
162
- {
163
- "epoch": 2.1186440677966103,
164
- "grad_norm": 4.297321796417236,
165
- "learning_rate": 9.293785310734465e-06,
166
- "loss": 1.213,
167
  "step": 1000
168
  },
169
  {
170
- "epoch": 2.2245762711864407,
171
- "grad_norm": 5.289111614227295,
172
- "learning_rate": 9.258474576271187e-06,
173
- "loss": 1.2127,
174
- "step": 1050
175
- },
176
- {
177
- "epoch": 2.330508474576271,
178
- "grad_norm": 5.046447277069092,
179
- "learning_rate": 9.22316384180791e-06,
180
- "loss": 1.1403,
181
  "step": 1100
182
  },
183
  {
184
- "epoch": 2.4364406779661016,
185
- "grad_norm": 3.322784662246704,
186
- "learning_rate": 9.187853107344633e-06,
187
- "loss": 1.1244,
188
- "step": 1150
 
 
189
  },
190
  {
191
- "epoch": 2.542372881355932,
192
- "grad_norm": 2.8034584522247314,
193
- "learning_rate": 9.152542372881356e-06,
194
- "loss": 1.1052,
195
  "step": 1200
196
  },
197
  {
198
- "epoch": 2.648305084745763,
199
- "grad_norm": 2.822523593902588,
200
- "learning_rate": 9.11723163841808e-06,
201
- "loss": 1.0929,
202
- "step": 1250
203
- },
204
- {
205
- "epoch": 2.7542372881355934,
206
- "grad_norm": 4.488712787628174,
207
- "learning_rate": 9.081920903954803e-06,
208
- "loss": 1.0991,
209
  "step": 1300
210
  },
211
  {
212
- "epoch": 2.860169491525424,
213
- "grad_norm": 4.986589431762695,
214
- "learning_rate": 9.046610169491526e-06,
215
- "loss": 1.0464,
216
- "step": 1350
217
- },
218
- {
219
- "epoch": 2.9661016949152543,
220
- "grad_norm": 4.071073532104492,
221
- "learning_rate": 9.01129943502825e-06,
222
- "loss": 1.0532,
 
 
223
  "step": 1400
224
  },
225
  {
226
- "epoch": 3.0,
227
- "eval_accuracy": 0.7209090909090909,
228
- "eval_loss": 1.0850567817687988,
229
- "eval_runtime": 17.8307,
230
- "eval_samples_per_second": 61.691,
231
- "eval_steps_per_second": 1.963,
232
- "step": 1416
233
- },
234
- {
235
- "epoch": 3.0720338983050848,
236
- "grad_norm": 4.750606536865234,
237
- "learning_rate": 8.975988700564973e-06,
238
- "loss": 1.0453,
239
- "step": 1450
240
- },
241
- {
242
- "epoch": 3.1779661016949152,
243
- "grad_norm": 3.7604198455810547,
244
- "learning_rate": 8.940677966101694e-06,
245
- "loss": 1.0231,
246
  "step": 1500
247
  },
248
  {
249
- "epoch": 3.2838983050847457,
250
- "grad_norm": 5.222371578216553,
251
- "learning_rate": 8.90536723163842e-06,
252
- "loss": 0.9812,
253
- "step": 1550
254
- },
255
- {
256
- "epoch": 3.389830508474576,
257
- "grad_norm": 6.1730570793151855,
258
- "learning_rate": 8.870056497175143e-06,
259
- "loss": 0.9771,
260
  "step": 1600
261
  },
262
  {
263
- "epoch": 3.4957627118644066,
264
- "grad_norm": 5.553199291229248,
265
- "learning_rate": 8.834745762711864e-06,
266
- "loss": 0.9682,
 
 
267
  "step": 1650
268
  },
269
  {
270
- "epoch": 3.601694915254237,
271
- "grad_norm": 4.2731451988220215,
272
- "learning_rate": 8.79943502824859e-06,
273
- "loss": 1.0006,
274
  "step": 1700
275
  },
276
  {
277
- "epoch": 3.707627118644068,
278
- "grad_norm": 3.617053985595703,
279
- "learning_rate": 8.764124293785311e-06,
280
- "loss": 0.9154,
281
- "step": 1750
282
- },
283
- {
284
- "epoch": 3.8135593220338984,
285
- "grad_norm": 2.5833144187927246,
286
- "learning_rate": 8.728813559322034e-06,
287
- "loss": 0.935,
288
  "step": 1800
289
  },
290
  {
291
- "epoch": 3.919491525423729,
292
- "grad_norm": 3.1606085300445557,
293
- "learning_rate": 8.693502824858758e-06,
294
- "loss": 0.9216,
295
- "step": 1850
296
- },
297
- {
298
- "epoch": 4.0,
299
- "eval_accuracy": 0.74,
300
- "eval_loss": 0.9795950651168823,
301
- "eval_runtime": 16.0161,
302
- "eval_samples_per_second": 68.681,
303
- "eval_steps_per_second": 2.185,
304
- "step": 1888
305
- },
306
- {
307
- "epoch": 4.02542372881356,
308
- "grad_norm": 3.959052801132202,
309
- "learning_rate": 8.65819209039548e-06,
310
- "loss": 0.9288,
311
  "step": 1900
312
  },
313
- {
314
- "epoch": 4.13135593220339,
315
- "grad_norm": 3.6853768825531006,
316
- "learning_rate": 8.622881355932204e-06,
317
- "loss": 0.879,
318
- "step": 1950
319
- },
320
- {
321
- "epoch": 4.237288135593221,
322
- "grad_norm": 3.1763620376586914,
323
- "learning_rate": 8.587570621468927e-06,
324
- "loss": 0.8912,
325
- "step": 2000
326
- },
327
- {
328
- "epoch": 4.343220338983051,
329
- "grad_norm": 4.019489288330078,
330
- "learning_rate": 8.55225988700565e-06,
331
- "loss": 0.9077,
332
- "step": 2050
333
- },
334
- {
335
- "epoch": 4.4491525423728815,
336
- "grad_norm": 4.087663650512695,
337
- "learning_rate": 8.516949152542372e-06,
338
- "loss": 0.8812,
339
- "step": 2100
340
- },
341
- {
342
- "epoch": 4.555084745762712,
343
- "grad_norm": 3.6994051933288574,
344
- "learning_rate": 8.481638418079097e-06,
345
- "loss": 0.8344,
346
- "step": 2150
347
- },
348
- {
349
- "epoch": 4.661016949152542,
350
- "grad_norm": 2.284302234649658,
351
- "learning_rate": 8.44632768361582e-06,
352
- "loss": 0.8501,
353
- "step": 2200
354
- },
355
- {
356
- "epoch": 4.766949152542373,
357
- "grad_norm": 4.348343372344971,
358
- "learning_rate": 8.411016949152542e-06,
359
- "loss": 0.8712,
360
- "step": 2250
361
- },
362
- {
363
- "epoch": 4.872881355932203,
364
- "grad_norm": 4.996354103088379,
365
- "learning_rate": 8.375706214689267e-06,
366
- "loss": 0.8264,
367
- "step": 2300
368
- },
369
- {
370
- "epoch": 4.978813559322034,
371
- "grad_norm": 3.136770486831665,
372
- "learning_rate": 8.340395480225989e-06,
373
- "loss": 0.843,
374
- "step": 2350
375
- },
376
- {
377
- "epoch": 5.0,
378
- "eval_accuracy": 0.7390909090909091,
379
- "eval_loss": 0.9454855918884277,
380
- "eval_runtime": 15.0504,
381
- "eval_samples_per_second": 73.088,
382
- "eval_steps_per_second": 2.326,
383
- "step": 2360
384
- },
385
- {
386
- "epoch": 5.084745762711864,
387
- "grad_norm": 6.956203937530518,
388
- "learning_rate": 8.305084745762712e-06,
389
- "loss": 0.8094,
390
- "step": 2400
391
- },
392
- {
393
- "epoch": 5.190677966101695,
394
- "grad_norm": 6.91636323928833,
395
- "learning_rate": 8.269774011299437e-06,
396
- "loss": 0.8301,
397
- "step": 2450
398
- },
399
- {
400
- "epoch": 5.296610169491525,
401
- "grad_norm": 2.561798334121704,
402
- "learning_rate": 8.234463276836159e-06,
403
- "loss": 0.8562,
404
- "step": 2500
405
- },
406
- {
407
- "epoch": 5.4025423728813555,
408
- "grad_norm": 4.503079891204834,
409
- "learning_rate": 8.199152542372882e-06,
410
- "loss": 0.7487,
411
- "step": 2550
412
- },
413
- {
414
- "epoch": 5.508474576271187,
415
- "grad_norm": 3.560302257537842,
416
- "learning_rate": 8.163841807909605e-06,
417
- "loss": 0.8222,
418
- "step": 2600
419
- },
420
- {
421
- "epoch": 5.614406779661017,
422
- "grad_norm": 6.565722465515137,
423
- "learning_rate": 8.128531073446328e-06,
424
- "loss": 0.7917,
425
- "step": 2650
426
- },
427
- {
428
- "epoch": 5.720338983050848,
429
- "grad_norm": 7.790140151977539,
430
- "learning_rate": 8.093220338983052e-06,
431
- "loss": 0.7764,
432
- "step": 2700
433
- },
434
- {
435
- "epoch": 5.826271186440678,
436
- "grad_norm": 4.017592430114746,
437
- "learning_rate": 8.057909604519775e-06,
438
- "loss": 0.7718,
439
- "step": 2750
440
- },
441
- {
442
- "epoch": 5.932203389830509,
443
- "grad_norm": 6.110499382019043,
444
- "learning_rate": 8.022598870056498e-06,
445
- "loss": 0.7445,
446
- "step": 2800
447
- },
448
- {
449
- "epoch": 6.0,
450
- "eval_accuracy": 0.7527272727272727,
451
- "eval_loss": 0.8885732889175415,
452
- "eval_runtime": 15.0594,
453
- "eval_samples_per_second": 73.044,
454
- "eval_steps_per_second": 2.324,
455
- "step": 2832
456
- },
457
- {
458
- "epoch": 6.038135593220339,
459
- "grad_norm": 4.600073337554932,
460
- "learning_rate": 7.987288135593222e-06,
461
- "loss": 0.7767,
462
- "step": 2850
463
- },
464
- {
465
- "epoch": 6.1440677966101696,
466
- "grad_norm": 5.356403827667236,
467
- "learning_rate": 7.951977401129945e-06,
468
- "loss": 0.708,
469
- "step": 2900
470
- },
471
- {
472
- "epoch": 6.25,
473
- "grad_norm": 5.1397223472595215,
474
- "learning_rate": 7.917372881355932e-06,
475
- "loss": 0.7397,
476
- "step": 2950
477
- },
478
- {
479
- "epoch": 6.3559322033898304,
480
- "grad_norm": 6.384206771850586,
481
- "learning_rate": 7.882062146892657e-06,
482
- "loss": 0.7523,
483
- "step": 3000
484
- },
485
- {
486
- "epoch": 6.461864406779661,
487
- "grad_norm": 5.545274257659912,
488
- "learning_rate": 7.846751412429378e-06,
489
- "loss": 0.7011,
490
- "step": 3050
491
- },
492
- {
493
- "epoch": 6.567796610169491,
494
- "grad_norm": 9.406649589538574,
495
- "learning_rate": 7.811440677966102e-06,
496
- "loss": 0.7975,
497
- "step": 3100
498
- },
499
- {
500
- "epoch": 6.673728813559322,
501
- "grad_norm": 7.81419563293457,
502
- "learning_rate": 7.776129943502827e-06,
503
- "loss": 0.7385,
504
- "step": 3150
505
- },
506
- {
507
- "epoch": 6.779661016949152,
508
- "grad_norm": 3.415956974029541,
509
- "learning_rate": 7.740819209039548e-06,
510
- "loss": 0.7356,
511
- "step": 3200
512
- },
513
- {
514
- "epoch": 6.885593220338983,
515
- "grad_norm": 5.487062931060791,
516
- "learning_rate": 7.705508474576271e-06,
517
- "loss": 0.7121,
518
- "step": 3250
519
- },
520
- {
521
- "epoch": 6.991525423728813,
522
- "grad_norm": 5.682718276977539,
523
- "learning_rate": 7.670197740112995e-06,
524
- "loss": 0.7191,
525
- "step": 3300
526
- },
527
  {
528
  "epoch": 7.0,
529
- "eval_accuracy": 0.7545454545454545,
530
- "eval_loss": 0.8240677118301392,
531
- "eval_runtime": 14.7678,
532
- "eval_samples_per_second": 74.486,
533
- "eval_steps_per_second": 2.37,
534
- "step": 3304
535
- },
536
- {
537
- "epoch": 7.0974576271186445,
538
- "grad_norm": 8.319087028503418,
539
- "learning_rate": 7.634887005649718e-06,
540
- "loss": 0.7096,
541
- "step": 3350
542
- },
543
- {
544
- "epoch": 7.203389830508475,
545
- "grad_norm": 5.857816696166992,
546
- "learning_rate": 7.599576271186442e-06,
547
- "loss": 0.7006,
548
- "step": 3400
549
- },
550
- {
551
- "epoch": 7.309322033898305,
552
- "grad_norm": 4.400519371032715,
553
- "learning_rate": 7.564265536723165e-06,
554
- "loss": 0.706,
555
- "step": 3450
556
- },
557
- {
558
- "epoch": 7.415254237288136,
559
- "grad_norm": 4.573615550994873,
560
- "learning_rate": 7.528954802259888e-06,
561
- "loss": 0.6543,
562
- "step": 3500
563
- },
564
- {
565
- "epoch": 7.521186440677966,
566
- "grad_norm": 7.545746803283691,
567
- "learning_rate": 7.49364406779661e-06,
568
- "loss": 0.6498,
569
- "step": 3550
570
- },
571
- {
572
- "epoch": 7.627118644067797,
573
- "grad_norm": 6.38883638381958,
574
- "learning_rate": 7.4583333333333345e-06,
575
- "loss": 0.6849,
576
- "step": 3600
577
- },
578
- {
579
- "epoch": 7.733050847457627,
580
- "grad_norm": 4.496486186981201,
581
- "learning_rate": 7.423022598870057e-06,
582
- "loss": 0.6741,
583
- "step": 3650
584
- },
585
- {
586
- "epoch": 7.838983050847458,
587
- "grad_norm": 5.2381792068481445,
588
- "learning_rate": 7.38771186440678e-06,
589
- "loss": 0.6441,
590
- "step": 3700
591
- },
592
- {
593
- "epoch": 7.944915254237288,
594
- "grad_norm": 6.047347068786621,
595
- "learning_rate": 7.3524011299435035e-06,
596
- "loss": 0.7089,
597
- "step": 3750
598
- },
599
- {
600
- "epoch": 8.0,
601
- "eval_accuracy": 0.7718181818181818,
602
- "eval_loss": 0.845079779624939,
603
- "eval_runtime": 14.9091,
604
- "eval_samples_per_second": 73.78,
605
- "eval_steps_per_second": 2.348,
606
- "step": 3776
607
- },
608
- {
609
- "epoch": 8.05084745762712,
610
- "grad_norm": 5.383782386779785,
611
- "learning_rate": 7.317090395480226e-06,
612
- "loss": 0.6375,
613
- "step": 3800
614
- },
615
- {
616
- "epoch": 8.15677966101695,
617
- "grad_norm": 7.663337707519531,
618
- "learning_rate": 7.28177966101695e-06,
619
- "loss": 0.6101,
620
- "step": 3850
621
- },
622
- {
623
- "epoch": 8.26271186440678,
624
- "grad_norm": 4.593461036682129,
625
- "learning_rate": 7.2464689265536725e-06,
626
- "loss": 0.6356,
627
- "step": 3900
628
- },
629
- {
630
- "epoch": 8.36864406779661,
631
- "grad_norm": 7.878734111785889,
632
- "learning_rate": 7.211158192090396e-06,
633
- "loss": 0.6412,
634
- "step": 3950
635
- },
636
- {
637
- "epoch": 8.474576271186441,
638
- "grad_norm": 3.501059055328369,
639
- "learning_rate": 7.17584745762712e-06,
640
- "loss": 0.6296,
641
- "step": 4000
642
- },
643
- {
644
- "epoch": 8.580508474576272,
645
- "grad_norm": 3.8199708461761475,
646
- "learning_rate": 7.140536723163842e-06,
647
- "loss": 0.654,
648
- "step": 4050
649
- },
650
- {
651
- "epoch": 8.686440677966102,
652
- "grad_norm": 6.42057466506958,
653
- "learning_rate": 7.105225988700566e-06,
654
- "loss": 0.6378,
655
- "step": 4100
656
- },
657
- {
658
- "epoch": 8.792372881355933,
659
- "grad_norm": 6.310295104980469,
660
- "learning_rate": 7.069915254237288e-06,
661
- "loss": 0.6235,
662
- "step": 4150
663
- },
664
- {
665
- "epoch": 8.898305084745763,
666
- "grad_norm": 4.627810478210449,
667
- "learning_rate": 7.034604519774012e-06,
668
- "loss": 0.6631,
669
- "step": 4200
670
- },
671
- {
672
- "epoch": 9.0,
673
- "eval_accuracy": 0.77,
674
- "eval_loss": 0.8626542687416077,
675
- "eval_runtime": 15.548,
676
- "eval_samples_per_second": 70.749,
677
- "eval_steps_per_second": 2.251,
678
- "step": 4248
679
- },
680
- {
681
- "epoch": 9.004237288135593,
682
- "grad_norm": 5.442898273468018,
683
- "learning_rate": 6.999293785310735e-06,
684
- "loss": 0.6358,
685
- "step": 4250
686
- },
687
- {
688
- "epoch": 9.110169491525424,
689
- "grad_norm": 5.042696475982666,
690
- "learning_rate": 6.963983050847458e-06,
691
- "loss": 0.6183,
692
- "step": 4300
693
- },
694
- {
695
- "epoch": 9.216101694915254,
696
- "grad_norm": 5.006898403167725,
697
- "learning_rate": 6.928672316384182e-06,
698
- "loss": 0.6438,
699
- "step": 4350
700
- },
701
- {
702
- "epoch": 9.322033898305085,
703
- "grad_norm": 6.093140125274658,
704
- "learning_rate": 6.893361581920905e-06,
705
- "loss": 0.5826,
706
- "step": 4400
707
- },
708
- {
709
- "epoch": 9.427966101694915,
710
- "grad_norm": 4.637847900390625,
711
- "learning_rate": 6.858050847457628e-06,
712
- "loss": 0.5559,
713
- "step": 4450
714
- },
715
- {
716
- "epoch": 9.533898305084746,
717
- "grad_norm": 2.860111951828003,
718
- "learning_rate": 6.82274011299435e-06,
719
- "loss": 0.5577,
720
- "step": 4500
721
- },
722
- {
723
- "epoch": 9.639830508474576,
724
- "grad_norm": 10.876856803894043,
725
- "learning_rate": 6.7874293785310745e-06,
726
- "loss": 0.6233,
727
- "step": 4550
728
- },
729
- {
730
- "epoch": 9.745762711864407,
731
- "grad_norm": 5.635727882385254,
732
- "learning_rate": 6.752118644067798e-06,
733
- "loss": 0.5703,
734
- "step": 4600
735
- },
736
- {
737
- "epoch": 9.851694915254237,
738
- "grad_norm": 6.9388532638549805,
739
- "learning_rate": 6.71680790960452e-06,
740
- "loss": 0.6323,
741
- "step": 4650
742
- },
743
- {
744
- "epoch": 9.957627118644067,
745
- "grad_norm": 7.485644340515137,
746
- "learning_rate": 6.6814971751412435e-06,
747
- "loss": 0.6021,
748
- "step": 4700
749
- },
750
- {
751
- "epoch": 10.0,
752
- "eval_accuracy": 0.7772727272727272,
753
- "eval_loss": 0.8030957579612732,
754
- "eval_runtime": 14.6023,
755
- "eval_samples_per_second": 75.331,
756
- "eval_steps_per_second": 2.397,
757
- "step": 4720
758
- },
759
- {
760
- "epoch": 10.063559322033898,
761
- "grad_norm": 13.398885726928711,
762
- "learning_rate": 6.646186440677966e-06,
763
- "loss": 0.5698,
764
- "step": 4750
765
- },
766
- {
767
- "epoch": 10.169491525423728,
768
- "grad_norm": 7.821059226989746,
769
- "learning_rate": 6.61087570621469e-06,
770
- "loss": 0.5364,
771
- "step": 4800
772
- },
773
- {
774
- "epoch": 10.275423728813559,
775
- "grad_norm": 7.535600662231445,
776
- "learning_rate": 6.576271186440678e-06,
777
- "loss": 0.5959,
778
- "step": 4850
779
- },
780
- {
781
- "epoch": 10.38135593220339,
782
- "grad_norm": 8.605109214782715,
783
- "learning_rate": 6.540960451977402e-06,
784
- "loss": 0.5837,
785
- "step": 4900
786
- },
787
- {
788
- "epoch": 10.48728813559322,
789
- "grad_norm": 5.240401268005371,
790
- "learning_rate": 6.505649717514125e-06,
791
- "loss": 0.5796,
792
- "step": 4950
793
- },
794
- {
795
- "epoch": 10.59322033898305,
796
- "grad_norm": 6.025055408477783,
797
- "learning_rate": 6.4703389830508476e-06,
798
- "loss": 0.5749,
799
- "step": 5000
800
- },
801
- {
802
- "epoch": 10.69915254237288,
803
- "grad_norm": 6.605931282043457,
804
- "learning_rate": 6.435028248587572e-06,
805
- "loss": 0.5185,
806
- "step": 5050
807
- },
808
- {
809
- "epoch": 10.805084745762711,
810
- "grad_norm": 3.71102237701416,
811
- "learning_rate": 6.399717514124294e-06,
812
- "loss": 0.5289,
813
- "step": 5100
814
- },
815
- {
816
- "epoch": 10.911016949152543,
817
- "grad_norm": 4.973482608795166,
818
- "learning_rate": 6.3644067796610174e-06,
819
- "loss": 0.5691,
820
- "step": 5150
821
- },
822
- {
823
- "epoch": 11.0,
824
- "eval_accuracy": 0.7836363636363637,
825
- "eval_loss": 0.8530685305595398,
826
- "eval_runtime": 14.8369,
827
- "eval_samples_per_second": 74.14,
828
- "eval_steps_per_second": 2.359,
829
- "step": 5192
830
  }
831
  ],
832
- "logging_steps": 50,
833
- "max_steps": 14160,
834
  "num_input_tokens_seen": 0,
835
- "num_train_epochs": 30,
836
  "save_steps": 500,
837
  "stateful_callbacks": {
838
  "TrainerControl": {
@@ -846,7 +222,7 @@
846
  "attributes": {}
847
  }
848
  },
849
- "total_flos": 1.286478541133356e+19,
850
  "train_batch_size": 32,
851
  "trial_name": null,
852
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8563636363636363,
3
+ "best_model_checkpoint": "/content/drive/MyDrive/autoTaggingProject/ViT/General/Features/TopLength/Results/model_2024-10-16_test/checkpoint-1925",
4
+ "epoch": 7.0,
5
  "eval_steps": 500,
6
+ "global_step": 1925,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.36363636363636365,
13
+ "grad_norm": 1.4969431161880493,
14
+ "learning_rate": 9.81818181818182e-06,
15
+ "loss": 1.6534,
 
 
 
 
 
 
 
16
  "step": 100
17
  },
18
  {
19
+ "epoch": 0.7272727272727273,
20
+ "grad_norm": 2.2577221393585205,
21
+ "learning_rate": 9.636363636363638e-06,
22
+ "loss": 1.317,
 
 
 
 
 
 
 
23
  "step": 200
24
  },
25
  {
26
+ "epoch": 1.0,
27
+ "eval_accuracy": 0.7218181818181818,
28
+ "eval_loss": 1.0995656251907349,
29
+ "eval_runtime": 12.6938,
30
+ "eval_samples_per_second": 43.328,
31
+ "eval_steps_per_second": 1.418,
32
+ "step": 275
33
+ },
34
+ {
35
+ "epoch": 1.0909090909090908,
36
+ "grad_norm": 2.7553977966308594,
37
+ "learning_rate": 9.454545454545456e-06,
38
+ "loss": 1.0529,
39
  "step": 300
40
  },
41
  {
42
+ "epoch": 1.4545454545454546,
43
+ "grad_norm": 2.5488719940185547,
44
+ "learning_rate": 9.272727272727273e-06,
45
+ "loss": 0.9003,
 
 
 
 
 
 
 
46
  "step": 400
47
  },
48
  {
49
+ "epoch": 1.8181818181818183,
50
+ "grad_norm": 2.936871290206909,
51
+ "learning_rate": 9.090909090909091e-06,
52
+ "loss": 0.7955,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  "step": 500
54
  },
55
  {
56
+ "epoch": 2.0,
57
+ "eval_accuracy": 0.7890909090909091,
58
+ "eval_loss": 0.8251153826713562,
59
+ "eval_runtime": 11.8903,
60
+ "eval_samples_per_second": 46.256,
61
+ "eval_steps_per_second": 1.514,
62
  "step": 550
63
  },
64
  {
65
+ "epoch": 2.1818181818181817,
66
+ "grad_norm": 3.6827473640441895,
67
+ "learning_rate": 8.90909090909091e-06,
68
+ "loss": 0.7386,
69
  "step": 600
70
  },
71
  {
72
+ "epoch": 2.5454545454545454,
73
+ "grad_norm": 2.6154849529266357,
74
+ "learning_rate": 8.727272727272728e-06,
75
+ "loss": 0.6758,
 
 
 
 
 
 
 
76
  "step": 700
77
  },
78
  {
79
+ "epoch": 2.909090909090909,
80
+ "grad_norm": 6.043990135192871,
81
+ "learning_rate": 8.545454545454546e-06,
82
+ "loss": 0.6518,
 
 
 
 
 
 
 
83
  "step": 800
84
  },
85
  {
86
+ "epoch": 3.0,
87
+ "eval_accuracy": 0.82,
88
+ "eval_loss": 0.7125250101089478,
89
+ "eval_runtime": 11.5548,
90
+ "eval_samples_per_second": 47.599,
91
+ "eval_steps_per_second": 1.558,
92
+ "step": 825
93
+ },
94
+ {
95
+ "epoch": 3.2727272727272725,
96
+ "grad_norm": 1.676995873451233,
97
+ "learning_rate": 8.363636363636365e-06,
98
+ "loss": 0.6034,
99
  "step": 900
100
  },
101
  {
102
+ "epoch": 3.6363636363636362,
103
+ "grad_norm": 2.6665966510772705,
104
+ "learning_rate": 8.181818181818183e-06,
105
+ "loss": 0.5865,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  "step": 1000
107
  },
108
  {
109
+ "epoch": 4.0,
110
+ "grad_norm": 3.419482469558716,
111
+ "learning_rate": 8.000000000000001e-06,
112
+ "loss": 0.5531,
 
 
 
 
 
 
 
113
  "step": 1100
114
  },
115
  {
116
+ "epoch": 4.0,
117
+ "eval_accuracy": 0.8218181818181818,
118
+ "eval_loss": 0.5728441476821899,
119
+ "eval_runtime": 11.88,
120
+ "eval_samples_per_second": 46.296,
121
+ "eval_steps_per_second": 1.515,
122
+ "step": 1100
123
  },
124
  {
125
+ "epoch": 4.363636363636363,
126
+ "grad_norm": 2.151674747467041,
127
+ "learning_rate": 7.81818181818182e-06,
128
+ "loss": 0.5151,
129
  "step": 1200
130
  },
131
  {
132
+ "epoch": 4.7272727272727275,
133
+ "grad_norm": 4.103756427764893,
134
+ "learning_rate": 7.63818181818182e-06,
135
+ "loss": 0.5118,
 
 
 
 
 
 
 
136
  "step": 1300
137
  },
138
  {
139
+ "epoch": 5.0,
140
+ "eval_accuracy": 0.84,
141
+ "eval_loss": 0.631469190120697,
142
+ "eval_runtime": 11.4374,
143
+ "eval_samples_per_second": 48.088,
144
+ "eval_steps_per_second": 1.574,
145
+ "step": 1375
146
+ },
147
+ {
148
+ "epoch": 5.090909090909091,
149
+ "grad_norm": 2.0776937007904053,
150
+ "learning_rate": 7.456363636363637e-06,
151
+ "loss": 0.4878,
152
  "step": 1400
153
  },
154
  {
155
+ "epoch": 5.454545454545454,
156
+ "grad_norm": 4.111904621124268,
157
+ "learning_rate": 7.274545454545456e-06,
158
+ "loss": 0.4802,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  "step": 1500
160
  },
161
  {
162
+ "epoch": 5.818181818181818,
163
+ "grad_norm": 3.490161657333374,
164
+ "learning_rate": 7.092727272727273e-06,
165
+ "loss": 0.4713,
 
 
 
 
 
 
 
166
  "step": 1600
167
  },
168
  {
169
+ "epoch": 6.0,
170
+ "eval_accuracy": 0.84,
171
+ "eval_loss": 0.4893127381801605,
172
+ "eval_runtime": 11.1344,
173
+ "eval_samples_per_second": 49.396,
174
+ "eval_steps_per_second": 1.617,
175
  "step": 1650
176
  },
177
  {
178
+ "epoch": 6.181818181818182,
179
+ "grad_norm": 3.1339528560638428,
180
+ "learning_rate": 6.910909090909092e-06,
181
+ "loss": 0.4365,
182
  "step": 1700
183
  },
184
  {
185
+ "epoch": 6.545454545454545,
186
+ "grad_norm": 3.686084032058716,
187
+ "learning_rate": 6.72909090909091e-06,
188
+ "loss": 0.4155,
 
 
 
 
 
 
 
189
  "step": 1800
190
  },
191
  {
192
+ "epoch": 6.909090909090909,
193
+ "grad_norm": 3.6479032039642334,
194
+ "learning_rate": 6.5472727272727275e-06,
195
+ "loss": 0.4251,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  "step": 1900
197
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  {
199
  "epoch": 7.0,
200
+ "eval_accuracy": 0.8563636363636363,
201
+ "eval_loss": 0.48278242349624634,
202
+ "eval_runtime": 11.2502,
203
+ "eval_samples_per_second": 48.888,
204
+ "eval_steps_per_second": 1.6,
205
+ "step": 1925
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  }
207
  ],
208
+ "logging_steps": 100,
209
+ "max_steps": 5500,
210
  "num_input_tokens_seen": 0,
211
+ "num_train_epochs": 20,
212
  "save_steps": 500,
213
  "stateful_callbacks": {
214
  "TrainerControl": {
 
222
  "attributes": {}
223
  }
224
  },
225
+ "total_flos": 4.773677693976576e+18,
226
  "train_batch_size": 32,
227
  "trial_name": null,
228
  "trial_params": null