| { | |
| "best_metric": 0.6445224246502909, | |
| "best_model_checkpoint": "swin-tiny-patch4-window7-224-category-classification/checkpoint-868", | |
| "epoch": 9.94818652849741, | |
| "eval_steps": 500, | |
| "global_step": 960, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.10362694300518134, | |
| "grad_norm": 4.037997722625732, | |
| "learning_rate": 5.208333333333334e-06, | |
| "loss": 3.0145, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.20725388601036268, | |
| "grad_norm": 3.5954205989837646, | |
| "learning_rate": 1.0416666666666668e-05, | |
| "loss": 2.9504, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.31088082901554404, | |
| "grad_norm": 3.6831040382385254, | |
| "learning_rate": 1.5625e-05, | |
| "loss": 2.803, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.41450777202072536, | |
| "grad_norm": 4.399529933929443, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": 2.5636, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5181347150259067, | |
| "grad_norm": 5.646263599395752, | |
| "learning_rate": 2.604166666666667e-05, | |
| "loss": 2.354, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6217616580310881, | |
| "grad_norm": 5.669042110443115, | |
| "learning_rate": 3.125e-05, | |
| "loss": 2.1137, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7253886010362695, | |
| "grad_norm": 6.388670444488525, | |
| "learning_rate": 3.6458333333333336e-05, | |
| "loss": 2.0586, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.8290155440414507, | |
| "grad_norm": 4.985081672668457, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 1.9298, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9326424870466321, | |
| "grad_norm": 5.885165691375732, | |
| "learning_rate": 4.6875e-05, | |
| "loss": 1.89, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.9948186528497409, | |
| "eval_accuracy": 0.5441042157381147, | |
| "eval_loss": 1.507540225982666, | |
| "eval_runtime": 4294.0719, | |
| "eval_samples_per_second": 4.845, | |
| "eval_steps_per_second": 0.076, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.0362694300518134, | |
| "grad_norm": 6.3568572998046875, | |
| "learning_rate": 4.976851851851852e-05, | |
| "loss": 1.7875, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1398963730569949, | |
| "grad_norm": 5.888931751251221, | |
| "learning_rate": 4.9189814814814815e-05, | |
| "loss": 1.8005, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.2435233160621761, | |
| "grad_norm": 5.636714458465576, | |
| "learning_rate": 4.8611111111111115e-05, | |
| "loss": 1.7211, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.3471502590673574, | |
| "grad_norm": 8.701831817626953, | |
| "learning_rate": 4.803240740740741e-05, | |
| "loss": 1.7164, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.450777202072539, | |
| "grad_norm": 6.59879207611084, | |
| "learning_rate": 4.745370370370371e-05, | |
| "loss": 1.6864, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.5544041450777202, | |
| "grad_norm": 5.239670753479004, | |
| "learning_rate": 4.6875e-05, | |
| "loss": 1.6295, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.6580310880829017, | |
| "grad_norm": 8.442557334899902, | |
| "learning_rate": 4.62962962962963e-05, | |
| "loss": 1.5609, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.761658031088083, | |
| "grad_norm": 4.829947471618652, | |
| "learning_rate": 4.5717592592592594e-05, | |
| "loss": 1.6158, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.8652849740932642, | |
| "grad_norm": 6.158498764038086, | |
| "learning_rate": 4.5138888888888894e-05, | |
| "loss": 1.6215, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.9689119170984455, | |
| "grad_norm": 6.387746810913086, | |
| "learning_rate": 4.456018518518519e-05, | |
| "loss": 1.6059, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.5983271643512955, | |
| "eval_loss": 1.3102949857711792, | |
| "eval_runtime": 3836.7945, | |
| "eval_samples_per_second": 5.422, | |
| "eval_steps_per_second": 0.085, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 2.0725388601036268, | |
| "grad_norm": 6.294243812561035, | |
| "learning_rate": 4.3981481481481486e-05, | |
| "loss": 1.5034, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.1761658031088085, | |
| "grad_norm": 5.085219860076904, | |
| "learning_rate": 4.340277777777778e-05, | |
| "loss": 1.5468, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.2797927461139897, | |
| "grad_norm": 5.5421624183654785, | |
| "learning_rate": 4.282407407407408e-05, | |
| "loss": 1.5022, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.383419689119171, | |
| "grad_norm": 8.139309883117676, | |
| "learning_rate": 4.224537037037037e-05, | |
| "loss": 1.5609, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.4870466321243523, | |
| "grad_norm": 6.149181842803955, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 1.5462, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.5906735751295336, | |
| "grad_norm": 4.927513122558594, | |
| "learning_rate": 4.1087962962962965e-05, | |
| "loss": 1.4448, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.694300518134715, | |
| "grad_norm": 5.6315999031066895, | |
| "learning_rate": 4.0509259259259265e-05, | |
| "loss": 1.5033, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.7979274611398965, | |
| "grad_norm": 5.844498634338379, | |
| "learning_rate": 3.993055555555556e-05, | |
| "loss": 1.4713, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.901554404145078, | |
| "grad_norm": 5.9281182289123535, | |
| "learning_rate": 3.935185185185186e-05, | |
| "loss": 1.4844, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.994818652849741, | |
| "eval_accuracy": 0.6106811517569581, | |
| "eval_loss": 1.2595337629318237, | |
| "eval_runtime": 3906.6265, | |
| "eval_samples_per_second": 5.325, | |
| "eval_steps_per_second": 0.083, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 3.005181347150259, | |
| "grad_norm": 5.67548942565918, | |
| "learning_rate": 3.877314814814815e-05, | |
| "loss": 1.453, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 3.1088082901554404, | |
| "grad_norm": 5.950716495513916, | |
| "learning_rate": 3.8194444444444444e-05, | |
| "loss": 1.4132, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.2124352331606216, | |
| "grad_norm": 6.033442497253418, | |
| "learning_rate": 3.7615740740740744e-05, | |
| "loss": 1.411, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.3160621761658033, | |
| "grad_norm": 6.805812358856201, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 1.4046, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.4196891191709846, | |
| "grad_norm": 6.180249214172363, | |
| "learning_rate": 3.6458333333333336e-05, | |
| "loss": 1.4112, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.523316062176166, | |
| "grad_norm": 5.689445495605469, | |
| "learning_rate": 3.587962962962963e-05, | |
| "loss": 1.4126, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.626943005181347, | |
| "grad_norm": 7.714968204498291, | |
| "learning_rate": 3.530092592592593e-05, | |
| "loss": 1.4563, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.7305699481865284, | |
| "grad_norm": 7.22826623916626, | |
| "learning_rate": 3.472222222222222e-05, | |
| "loss": 1.3952, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.8341968911917097, | |
| "grad_norm": 6.826329231262207, | |
| "learning_rate": 3.414351851851852e-05, | |
| "loss": 1.4198, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.937823834196891, | |
| "grad_norm": 6.155144691467285, | |
| "learning_rate": 3.3564814814814815e-05, | |
| "loss": 1.4392, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6151516608181512, | |
| "eval_loss": 1.241403341293335, | |
| "eval_runtime": 3776.8643, | |
| "eval_samples_per_second": 5.508, | |
| "eval_steps_per_second": 0.086, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 4.041450777202073, | |
| "grad_norm": 5.685715675354004, | |
| "learning_rate": 3.2986111111111115e-05, | |
| "loss": 1.3702, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 4.1450777202072535, | |
| "grad_norm": 6.5382585525512695, | |
| "learning_rate": 3.240740740740741e-05, | |
| "loss": 1.3525, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.248704663212435, | |
| "grad_norm": 5.420712947845459, | |
| "learning_rate": 3.182870370370371e-05, | |
| "loss": 1.3657, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 4.352331606217617, | |
| "grad_norm": 5.577756881713867, | |
| "learning_rate": 3.125e-05, | |
| "loss": 1.3743, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.455958549222798, | |
| "grad_norm": 6.49379301071167, | |
| "learning_rate": 3.06712962962963e-05, | |
| "loss": 1.358, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 4.5595854922279795, | |
| "grad_norm": 5.322962284088135, | |
| "learning_rate": 3.0092592592592593e-05, | |
| "loss": 1.3334, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 4.66321243523316, | |
| "grad_norm": 5.416570663452148, | |
| "learning_rate": 2.951388888888889e-05, | |
| "loss": 1.3498, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 4.766839378238342, | |
| "grad_norm": 5.773409843444824, | |
| "learning_rate": 2.8935185185185186e-05, | |
| "loss": 1.3413, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 4.870466321243523, | |
| "grad_norm": 5.400073528289795, | |
| "learning_rate": 2.8356481481481483e-05, | |
| "loss": 1.2978, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 4.974093264248705, | |
| "grad_norm": 6.004559516906738, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 1.3431, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 4.994818652849741, | |
| "eval_accuracy": 0.6285151180118252, | |
| "eval_loss": 1.195352554321289, | |
| "eval_runtime": 3677.8276, | |
| "eval_samples_per_second": 5.656, | |
| "eval_steps_per_second": 0.089, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 5.077720207253886, | |
| "grad_norm": 6.405722141265869, | |
| "learning_rate": 2.7199074074074076e-05, | |
| "loss": 1.3108, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 5.181347150259067, | |
| "grad_norm": 5.789535045623779, | |
| "learning_rate": 2.6620370370370372e-05, | |
| "loss": 1.2901, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.284974093264249, | |
| "grad_norm": 6.413881301879883, | |
| "learning_rate": 2.604166666666667e-05, | |
| "loss": 1.2965, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 5.38860103626943, | |
| "grad_norm": 5.42963171005249, | |
| "learning_rate": 2.5462962962962965e-05, | |
| "loss": 1.2921, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 5.492227979274611, | |
| "grad_norm": 5.485264778137207, | |
| "learning_rate": 2.488425925925926e-05, | |
| "loss": 1.2903, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 5.595854922279793, | |
| "grad_norm": 5.791281223297119, | |
| "learning_rate": 2.4305555555555558e-05, | |
| "loss": 1.2753, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 5.699481865284974, | |
| "grad_norm": 5.666800498962402, | |
| "learning_rate": 2.3726851851851854e-05, | |
| "loss": 1.2826, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 5.803108808290156, | |
| "grad_norm": 5.542099475860596, | |
| "learning_rate": 2.314814814814815e-05, | |
| "loss": 1.3549, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 5.9067357512953365, | |
| "grad_norm": 6.228384017944336, | |
| "learning_rate": 2.2569444444444447e-05, | |
| "loss": 1.2897, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.6383694659424122, | |
| "eval_loss": 1.1611359119415283, | |
| "eval_runtime": 3648.3627, | |
| "eval_samples_per_second": 5.702, | |
| "eval_steps_per_second": 0.089, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 6.010362694300518, | |
| "grad_norm": 6.60087776184082, | |
| "learning_rate": 2.1990740740740743e-05, | |
| "loss": 1.3194, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 6.1139896373057, | |
| "grad_norm": 5.230093479156494, | |
| "learning_rate": 2.141203703703704e-05, | |
| "loss": 1.2622, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 6.217616580310881, | |
| "grad_norm": 6.67712926864624, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": 1.2835, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.321243523316062, | |
| "grad_norm": 6.0210065841674805, | |
| "learning_rate": 2.0254629629629632e-05, | |
| "loss": 1.2516, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.424870466321243, | |
| "grad_norm": 5.083096981048584, | |
| "learning_rate": 1.967592592592593e-05, | |
| "loss": 1.2069, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 6.528497409326425, | |
| "grad_norm": 6.067955493927002, | |
| "learning_rate": 1.9097222222222222e-05, | |
| "loss": 1.2808, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 6.632124352331607, | |
| "grad_norm": 8.958367347717285, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 1.2798, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 6.7357512953367875, | |
| "grad_norm": 5.711329460144043, | |
| "learning_rate": 1.7939814814814815e-05, | |
| "loss": 1.2249, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 6.839378238341969, | |
| "grad_norm": 5.1562957763671875, | |
| "learning_rate": 1.736111111111111e-05, | |
| "loss": 1.2413, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 6.94300518134715, | |
| "grad_norm": 6.775362968444824, | |
| "learning_rate": 1.6782407407407408e-05, | |
| "loss": 1.2222, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 6.994818652849741, | |
| "eval_accuracy": 0.6417343652357833, | |
| "eval_loss": 1.1575372219085693, | |
| "eval_runtime": 3709.0324, | |
| "eval_samples_per_second": 5.609, | |
| "eval_steps_per_second": 0.088, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 7.046632124352332, | |
| "grad_norm": 6.662919998168945, | |
| "learning_rate": 1.6203703703703704e-05, | |
| "loss": 1.2678, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 7.150259067357513, | |
| "grad_norm": 5.15730619430542, | |
| "learning_rate": 1.5625e-05, | |
| "loss": 1.2303, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 7.253886010362694, | |
| "grad_norm": 5.7805867195129395, | |
| "learning_rate": 1.5046296296296297e-05, | |
| "loss": 1.2413, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 7.357512953367876, | |
| "grad_norm": 7.833797454833984, | |
| "learning_rate": 1.4467592592592593e-05, | |
| "loss": 1.2475, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 7.461139896373057, | |
| "grad_norm": 5.12682580947876, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 1.2079, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 7.564766839378239, | |
| "grad_norm": 6.603540897369385, | |
| "learning_rate": 1.3310185185185186e-05, | |
| "loss": 1.2547, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 7.668393782383419, | |
| "grad_norm": 6.611011981964111, | |
| "learning_rate": 1.2731481481481482e-05, | |
| "loss": 1.2409, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 7.772020725388601, | |
| "grad_norm": 5.739754676818848, | |
| "learning_rate": 1.2152777777777779e-05, | |
| "loss": 1.228, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 7.875647668393782, | |
| "grad_norm": 5.423896312713623, | |
| "learning_rate": 1.1574074074074075e-05, | |
| "loss": 1.1859, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 7.979274611398964, | |
| "grad_norm": 5.076350212097168, | |
| "learning_rate": 1.0995370370370372e-05, | |
| "loss": 1.212, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.6421189251550257, | |
| "eval_loss": 1.1474467515945435, | |
| "eval_runtime": 3776.6533, | |
| "eval_samples_per_second": 5.508, | |
| "eval_steps_per_second": 0.086, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 8.082901554404145, | |
| "grad_norm": 5.369143486022949, | |
| "learning_rate": 1.0416666666666668e-05, | |
| "loss": 1.1907, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 8.186528497409327, | |
| "grad_norm": 6.046126365661621, | |
| "learning_rate": 9.837962962962964e-06, | |
| "loss": 1.1993, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 8.290155440414507, | |
| "grad_norm": 6.894242286682129, | |
| "learning_rate": 9.259259259259259e-06, | |
| "loss": 1.2238, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 8.393782383419689, | |
| "grad_norm": 6.160308361053467, | |
| "learning_rate": 8.680555555555556e-06, | |
| "loss": 1.2258, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 8.49740932642487, | |
| "grad_norm": 5.83083963394165, | |
| "learning_rate": 8.101851851851852e-06, | |
| "loss": 1.1913, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 8.601036269430052, | |
| "grad_norm": 6.659111499786377, | |
| "learning_rate": 7.523148148148148e-06, | |
| "loss": 1.1878, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 8.704663212435234, | |
| "grad_norm": 7.118185043334961, | |
| "learning_rate": 6.944444444444445e-06, | |
| "loss": 1.1841, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 8.808290155440414, | |
| "grad_norm": 5.696249008178711, | |
| "learning_rate": 6.365740740740741e-06, | |
| "loss": 1.1934, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 8.911917098445596, | |
| "grad_norm": 5.1954545974731445, | |
| "learning_rate": 5.787037037037038e-06, | |
| "loss": 1.2087, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 8.994818652849741, | |
| "eval_accuracy": 0.6445224246502909, | |
| "eval_loss": 1.1410062313079834, | |
| "eval_runtime": 3882.2074, | |
| "eval_samples_per_second": 5.359, | |
| "eval_steps_per_second": 0.084, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 9.015544041450777, | |
| "grad_norm": 6.596940994262695, | |
| "learning_rate": 5.208333333333334e-06, | |
| "loss": 1.2113, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 9.119170984455959, | |
| "grad_norm": 6.038557052612305, | |
| "learning_rate": 4.6296296296296296e-06, | |
| "loss": 1.1918, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 9.22279792746114, | |
| "grad_norm": 5.016276836395264, | |
| "learning_rate": 4.050925925925926e-06, | |
| "loss": 1.2371, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 9.32642487046632, | |
| "grad_norm": 5.792815208435059, | |
| "learning_rate": 3.4722222222222224e-06, | |
| "loss": 1.231, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 9.430051813471502, | |
| "grad_norm": 5.179474353790283, | |
| "learning_rate": 2.893518518518519e-06, | |
| "loss": 1.19, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 9.533678756476684, | |
| "grad_norm": 6.146459579467773, | |
| "learning_rate": 2.3148148148148148e-06, | |
| "loss": 1.1693, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 9.637305699481866, | |
| "grad_norm": 5.89201021194458, | |
| "learning_rate": 1.7361111111111112e-06, | |
| "loss": 1.1679, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 9.740932642487046, | |
| "grad_norm": 6.085833549499512, | |
| "learning_rate": 1.1574074074074074e-06, | |
| "loss": 1.1812, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 9.844559585492227, | |
| "grad_norm": 5.505359172821045, | |
| "learning_rate": 5.787037037037037e-07, | |
| "loss": 1.1552, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 9.94818652849741, | |
| "grad_norm": 6.020310878753662, | |
| "learning_rate": 0.0, | |
| "loss": 1.1897, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 9.94818652849741, | |
| "eval_accuracy": 0.6431764649329423, | |
| "eval_loss": 1.1433619260787964, | |
| "eval_runtime": 3906.8512, | |
| "eval_samples_per_second": 5.325, | |
| "eval_steps_per_second": 0.083, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 9.94818652849741, | |
| "step": 960, | |
| "total_flos": 6.10407050089078e+18, | |
| "train_loss": 1.447805991768837, | |
| "train_runtime": 79883.0648, | |
| "train_samples_per_second": 3.088, | |
| "train_steps_per_second": 0.012 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 960, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.10407050089078e+18, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |