| { | |
| "best_metric": 0.7506270627062707, | |
| "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-1064", | |
| "epoch": 1.9971844204598779, | |
| "eval_steps": 500, | |
| "global_step": 1064, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.018770530267480056, | |
| "grad_norm": 3.569255828857422, | |
| "learning_rate": 4.6728971962616825e-06, | |
| "loss": 4.6843, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03754106053496011, | |
| "grad_norm": 3.1433990001678467, | |
| "learning_rate": 9.345794392523365e-06, | |
| "loss": 4.6663, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05631159080244017, | |
| "grad_norm": 3.3763270378112793, | |
| "learning_rate": 1.4018691588785047e-05, | |
| "loss": 4.6424, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07508212106992022, | |
| "grad_norm": 3.4368538856506348, | |
| "learning_rate": 1.869158878504673e-05, | |
| "loss": 4.6077, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09385265133740028, | |
| "grad_norm": 3.3751752376556396, | |
| "learning_rate": 2.3364485981308414e-05, | |
| "loss": 4.5724, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11262318160488034, | |
| "grad_norm": 4.194823265075684, | |
| "learning_rate": 2.8037383177570094e-05, | |
| "loss": 4.5213, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1313937118723604, | |
| "grad_norm": 4.447911262512207, | |
| "learning_rate": 3.2710280373831774e-05, | |
| "loss": 4.4357, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15016424213984045, | |
| "grad_norm": 5.328530788421631, | |
| "learning_rate": 3.738317757009346e-05, | |
| "loss": 4.2996, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1689347724073205, | |
| "grad_norm": 7.00287389755249, | |
| "learning_rate": 4.205607476635514e-05, | |
| "loss": 4.0969, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.18770530267480057, | |
| "grad_norm": 8.038763999938965, | |
| "learning_rate": 4.672897196261683e-05, | |
| "loss": 3.7622, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2064758329422806, | |
| "grad_norm": 10.13965129852295, | |
| "learning_rate": 4.9843260188087774e-05, | |
| "loss": 3.5079, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.22524636320976069, | |
| "grad_norm": 9.638181686401367, | |
| "learning_rate": 4.932079414838036e-05, | |
| "loss": 3.2635, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.24401689347724073, | |
| "grad_norm": 9.798678398132324, | |
| "learning_rate": 4.879832810867294e-05, | |
| "loss": 3.0213, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2627874237447208, | |
| "grad_norm": 9.259151458740234, | |
| "learning_rate": 4.827586206896552e-05, | |
| "loss": 2.9042, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.28155795401220085, | |
| "grad_norm": 9.942826271057129, | |
| "learning_rate": 4.77533960292581e-05, | |
| "loss": 2.6591, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3003284842796809, | |
| "grad_norm": 9.459359169006348, | |
| "learning_rate": 4.723092998955068e-05, | |
| "loss": 2.6108, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.31909901454716094, | |
| "grad_norm": 10.439133644104004, | |
| "learning_rate": 4.670846394984326e-05, | |
| "loss": 2.4054, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.337869544814641, | |
| "grad_norm": 9.522982597351074, | |
| "learning_rate": 4.6185997910135844e-05, | |
| "loss": 2.3187, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3566400750821211, | |
| "grad_norm": 10.88189697265625, | |
| "learning_rate": 4.566353187042842e-05, | |
| "loss": 2.3281, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.37541060534960113, | |
| "grad_norm": 12.891951560974121, | |
| "learning_rate": 4.514106583072101e-05, | |
| "loss": 2.2129, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3941811356170812, | |
| "grad_norm": 9.860595703125, | |
| "learning_rate": 4.461859979101359e-05, | |
| "loss": 2.1525, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4129516658845612, | |
| "grad_norm": 9.62917709350586, | |
| "learning_rate": 4.4096133751306166e-05, | |
| "loss": 2.1014, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.43172219615204127, | |
| "grad_norm": 10.246869087219238, | |
| "learning_rate": 4.357366771159875e-05, | |
| "loss": 2.0448, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.45049272641952137, | |
| "grad_norm": 10.559744834899902, | |
| "learning_rate": 4.305120167189133e-05, | |
| "loss": 1.9428, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4692632566870014, | |
| "grad_norm": 10.051835060119629, | |
| "learning_rate": 4.252873563218391e-05, | |
| "loss": 1.9952, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.48803378695448146, | |
| "grad_norm": 10.530678749084473, | |
| "learning_rate": 4.2006269592476494e-05, | |
| "loss": 1.8988, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5068043172219615, | |
| "grad_norm": 14.675883293151855, | |
| "learning_rate": 4.148380355276907e-05, | |
| "loss": 1.8214, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5255748474894416, | |
| "grad_norm": 10.792617797851562, | |
| "learning_rate": 4.096133751306165e-05, | |
| "loss": 1.8403, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5443453777569216, | |
| "grad_norm": 11.018345832824707, | |
| "learning_rate": 4.0438871473354236e-05, | |
| "loss": 1.8173, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5631159080244017, | |
| "grad_norm": 10.860880851745605, | |
| "learning_rate": 3.9916405433646815e-05, | |
| "loss": 1.8106, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5818864382918817, | |
| "grad_norm": 12.167359352111816, | |
| "learning_rate": 3.939393939393939e-05, | |
| "loss": 1.785, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6006569685593618, | |
| "grad_norm": 10.483503341674805, | |
| "learning_rate": 3.887147335423197e-05, | |
| "loss": 1.7222, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6194274988268419, | |
| "grad_norm": 9.229653358459473, | |
| "learning_rate": 3.834900731452456e-05, | |
| "loss": 1.6953, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6381980290943219, | |
| "grad_norm": 9.605615615844727, | |
| "learning_rate": 3.782654127481714e-05, | |
| "loss": 1.7395, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.656968559361802, | |
| "grad_norm": 9.905487060546875, | |
| "learning_rate": 3.730407523510972e-05, | |
| "loss": 1.6551, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.675739089629282, | |
| "grad_norm": 13.063451766967773, | |
| "learning_rate": 3.67816091954023e-05, | |
| "loss": 1.6907, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6945096198967621, | |
| "grad_norm": 11.43543815612793, | |
| "learning_rate": 3.625914315569488e-05, | |
| "loss": 1.6405, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7132801501642422, | |
| "grad_norm": 8.96221923828125, | |
| "learning_rate": 3.573667711598746e-05, | |
| "loss": 1.6317, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7320506804317222, | |
| "grad_norm": 9.918800354003906, | |
| "learning_rate": 3.521421107628005e-05, | |
| "loss": 1.6596, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7508212106992023, | |
| "grad_norm": 9.336323738098145, | |
| "learning_rate": 3.469174503657263e-05, | |
| "loss": 1.6412, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7695917409666823, | |
| "grad_norm": 9.826384544372559, | |
| "learning_rate": 3.4169278996865206e-05, | |
| "loss": 1.5947, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7883622712341624, | |
| "grad_norm": 11.160934448242188, | |
| "learning_rate": 3.3646812957157785e-05, | |
| "loss": 1.5278, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8071328015016425, | |
| "grad_norm": 13.808747291564941, | |
| "learning_rate": 3.3124346917450364e-05, | |
| "loss": 1.5359, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8259033317691225, | |
| "grad_norm": 9.443258285522461, | |
| "learning_rate": 3.260188087774295e-05, | |
| "loss": 1.544, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8446738620366026, | |
| "grad_norm": 9.449996948242188, | |
| "learning_rate": 3.2079414838035534e-05, | |
| "loss": 1.5372, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8634443923040825, | |
| "grad_norm": 10.876261711120605, | |
| "learning_rate": 3.155694879832811e-05, | |
| "loss": 1.5632, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8822149225715626, | |
| "grad_norm": 9.270318031311035, | |
| "learning_rate": 3.103448275862069e-05, | |
| "loss": 1.4651, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9009854528390427, | |
| "grad_norm": 10.690321922302246, | |
| "learning_rate": 3.0512016718913274e-05, | |
| "loss": 1.5552, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9197559831065227, | |
| "grad_norm": 8.963105201721191, | |
| "learning_rate": 2.9989550679205852e-05, | |
| "loss": 1.5143, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9385265133740028, | |
| "grad_norm": 9.028443336486816, | |
| "learning_rate": 2.946708463949843e-05, | |
| "loss": 1.4811, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9572970436414828, | |
| "grad_norm": 9.890482902526855, | |
| "learning_rate": 2.8944618599791013e-05, | |
| "loss": 1.4827, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9760675739089629, | |
| "grad_norm": 11.68128490447998, | |
| "learning_rate": 2.8422152560083598e-05, | |
| "loss": 1.3682, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.994838104176443, | |
| "grad_norm": 10.292035102844238, | |
| "learning_rate": 2.7899686520376177e-05, | |
| "loss": 1.3766, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.998592210229939, | |
| "eval_accuracy": 0.7071947194719472, | |
| "eval_loss": 1.12041175365448, | |
| "eval_runtime": 73.0098, | |
| "eval_samples_per_second": 103.753, | |
| "eval_steps_per_second": 3.246, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.013608634443923, | |
| "grad_norm": 10.038961410522461, | |
| "learning_rate": 2.737722048066876e-05, | |
| "loss": 1.3601, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.0323791647114031, | |
| "grad_norm": 15.009783744812012, | |
| "learning_rate": 2.6854754440961337e-05, | |
| "loss": 1.4014, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.0511496949788832, | |
| "grad_norm": 9.596123695373535, | |
| "learning_rate": 2.633228840125392e-05, | |
| "loss": 1.3601, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.069920225246363, | |
| "grad_norm": 11.943960189819336, | |
| "learning_rate": 2.5809822361546498e-05, | |
| "loss": 1.3876, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.0886907555138432, | |
| "grad_norm": 10.823417663574219, | |
| "learning_rate": 2.5287356321839083e-05, | |
| "loss": 1.3681, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.1074612857813233, | |
| "grad_norm": 8.811203002929688, | |
| "learning_rate": 2.4764890282131662e-05, | |
| "loss": 1.3331, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.1262318160488034, | |
| "grad_norm": 10.159242630004883, | |
| "learning_rate": 2.4242424242424244e-05, | |
| "loss": 1.4147, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1450023463162835, | |
| "grad_norm": 10.464547157287598, | |
| "learning_rate": 2.3719958202716826e-05, | |
| "loss": 1.3975, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.1637728765837636, | |
| "grad_norm": 11.621941566467285, | |
| "learning_rate": 2.3197492163009404e-05, | |
| "loss": 1.3553, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.1825434068512435, | |
| "grad_norm": 10.114766120910645, | |
| "learning_rate": 2.2675026123301986e-05, | |
| "loss": 1.3037, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.2013139371187236, | |
| "grad_norm": 13.734535217285156, | |
| "learning_rate": 2.215256008359457e-05, | |
| "loss": 1.3696, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.2200844673862037, | |
| "grad_norm": 10.053481101989746, | |
| "learning_rate": 2.1630094043887147e-05, | |
| "loss": 1.3007, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.2388549976536838, | |
| "grad_norm": 10.231536865234375, | |
| "learning_rate": 2.110762800417973e-05, | |
| "loss": 1.3774, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.2576255279211637, | |
| "grad_norm": 12.41734504699707, | |
| "learning_rate": 2.058516196447231e-05, | |
| "loss": 1.3729, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.2763960581886438, | |
| "grad_norm": 10.437618255615234, | |
| "learning_rate": 2.006269592476489e-05, | |
| "loss": 1.3175, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.2951665884561239, | |
| "grad_norm": 10.459583282470703, | |
| "learning_rate": 1.9540229885057475e-05, | |
| "loss": 1.3438, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.313937118723604, | |
| "grad_norm": 9.832696914672852, | |
| "learning_rate": 1.9017763845350054e-05, | |
| "loss": 1.2885, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.332707648991084, | |
| "grad_norm": 12.73790454864502, | |
| "learning_rate": 1.8495297805642632e-05, | |
| "loss": 1.3343, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.3514781792585642, | |
| "grad_norm": 12.657564163208008, | |
| "learning_rate": 1.7972831765935218e-05, | |
| "loss": 1.2856, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.370248709526044, | |
| "grad_norm": 10.494688034057617, | |
| "learning_rate": 1.7450365726227796e-05, | |
| "loss": 1.2428, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.3890192397935242, | |
| "grad_norm": 9.145332336425781, | |
| "learning_rate": 1.6927899686520378e-05, | |
| "loss": 1.2834, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.4077897700610043, | |
| "grad_norm": 9.796390533447266, | |
| "learning_rate": 1.6405433646812957e-05, | |
| "loss": 1.3583, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.4265603003284844, | |
| "grad_norm": 9.282154083251953, | |
| "learning_rate": 1.588296760710554e-05, | |
| "loss": 1.2639, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.4453308305959642, | |
| "grad_norm": 11.123170852661133, | |
| "learning_rate": 1.536050156739812e-05, | |
| "loss": 1.3345, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.4641013608634443, | |
| "grad_norm": 10.234476089477539, | |
| "learning_rate": 1.48380355276907e-05, | |
| "loss": 1.3293, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.4828718911309244, | |
| "grad_norm": 8.585359573364258, | |
| "learning_rate": 1.4315569487983283e-05, | |
| "loss": 1.2923, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.5016424213984045, | |
| "grad_norm": 9.973495483398438, | |
| "learning_rate": 1.3793103448275863e-05, | |
| "loss": 1.2716, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.5204129516658846, | |
| "grad_norm": 10.079455375671387, | |
| "learning_rate": 1.3270637408568442e-05, | |
| "loss": 1.2429, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.5391834819333647, | |
| "grad_norm": 11.303532600402832, | |
| "learning_rate": 1.2748171368861026e-05, | |
| "loss": 1.3177, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.5579540122008448, | |
| "grad_norm": 11.871292114257812, | |
| "learning_rate": 1.2225705329153606e-05, | |
| "loss": 1.3239, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.5767245424683247, | |
| "grad_norm": 8.814146041870117, | |
| "learning_rate": 1.1703239289446186e-05, | |
| "loss": 1.1561, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.5954950727358048, | |
| "grad_norm": 10.472289085388184, | |
| "learning_rate": 1.1180773249738766e-05, | |
| "loss": 1.3309, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.6142656030032847, | |
| "grad_norm": 10.017809867858887, | |
| "learning_rate": 1.0658307210031348e-05, | |
| "loss": 1.2212, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.6330361332707648, | |
| "grad_norm": 9.293713569641113, | |
| "learning_rate": 1.013584117032393e-05, | |
| "loss": 1.2172, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.651806663538245, | |
| "grad_norm": 10.923977851867676, | |
| "learning_rate": 9.61337513061651e-06, | |
| "loss": 1.2031, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.670577193805725, | |
| "grad_norm": 8.05986213684082, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 1.2337, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.689347724073205, | |
| "grad_norm": 9.424606323242188, | |
| "learning_rate": 8.568443051201671e-06, | |
| "loss": 1.2331, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.7081182543406852, | |
| "grad_norm": 9.5239896774292, | |
| "learning_rate": 8.045977011494253e-06, | |
| "loss": 1.1844, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.7268887846081653, | |
| "grad_norm": 9.687056541442871, | |
| "learning_rate": 7.5235109717868345e-06, | |
| "loss": 1.2396, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.7456593148756452, | |
| "grad_norm": 11.813615798950195, | |
| "learning_rate": 7.001044932079415e-06, | |
| "loss": 1.2617, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.7644298451431253, | |
| "grad_norm": 9.889193534851074, | |
| "learning_rate": 6.478578892371996e-06, | |
| "loss": 1.2525, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.7832003754106054, | |
| "grad_norm": 9.544778823852539, | |
| "learning_rate": 5.956112852664577e-06, | |
| "loss": 1.2417, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.8019709056780853, | |
| "grad_norm": 10.579482078552246, | |
| "learning_rate": 5.433646812957157e-06, | |
| "loss": 1.1964, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.8207414359455654, | |
| "grad_norm": 8.800308227539062, | |
| "learning_rate": 4.911180773249739e-06, | |
| "loss": 1.2285, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.8395119662130455, | |
| "grad_norm": 7.766645431518555, | |
| "learning_rate": 4.38871473354232e-06, | |
| "loss": 1.204, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.8582824964805256, | |
| "grad_norm": 9.686829566955566, | |
| "learning_rate": 3.866248693834901e-06, | |
| "loss": 1.2163, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.8770530267480057, | |
| "grad_norm": 9.186079025268555, | |
| "learning_rate": 3.3437826541274823e-06, | |
| "loss": 1.1354, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.8958235570154858, | |
| "grad_norm": 9.512069702148438, | |
| "learning_rate": 2.821316614420063e-06, | |
| "loss": 1.218, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.9145940872829659, | |
| "grad_norm": 11.176709175109863, | |
| "learning_rate": 2.2988505747126437e-06, | |
| "loss": 1.2341, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.9333646175504458, | |
| "grad_norm": 9.307233810424805, | |
| "learning_rate": 1.7763845350052249e-06, | |
| "loss": 1.1894, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.9521351478179259, | |
| "grad_norm": 11.547785758972168, | |
| "learning_rate": 1.2539184952978056e-06, | |
| "loss": 1.2157, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.970905678085406, | |
| "grad_norm": 10.38868236541748, | |
| "learning_rate": 7.314524555903866e-07, | |
| "loss": 1.1476, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.9896762083528858, | |
| "grad_norm": 15.987852096557617, | |
| "learning_rate": 2.0898641588296764e-07, | |
| "loss": 1.2542, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.9971844204598779, | |
| "eval_accuracy": 0.7506270627062707, | |
| "eval_loss": 0.9383056163787842, | |
| "eval_runtime": 72.7566, | |
| "eval_samples_per_second": 104.114, | |
| "eval_steps_per_second": 3.257, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.9971844204598779, | |
| "step": 1064, | |
| "total_flos": 3.394127568277555e+18, | |
| "train_loss": 1.8402288484394103, | |
| "train_runtime": 2568.7785, | |
| "train_samples_per_second": 53.08, | |
| "train_steps_per_second": 0.414 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1064, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.394127568277555e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |