| { | |
| "best_metric": 0.8978675645342312, | |
| "best_model_checkpoint": "videomae-base-finetuned-dd\\checkpoint-1344", | |
| "epoch": 4.005917159763314, | |
| "eval_steps": 500, | |
| "global_step": 1352, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0073964497041420114, | |
| "grad_norm": 22.897619247436523, | |
| "learning_rate": 3.6764705882352942e-06, | |
| "loss": 0.7648, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.014792899408284023, | |
| "grad_norm": 6.239708423614502, | |
| "learning_rate": 7.3529411764705884e-06, | |
| "loss": 0.6619, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.022189349112426034, | |
| "grad_norm": 11.775946617126465, | |
| "learning_rate": 1.1029411764705883e-05, | |
| "loss": 0.6844, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.029585798816568046, | |
| "grad_norm": 12.948445320129395, | |
| "learning_rate": 1.4705882352941177e-05, | |
| "loss": 0.6158, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03698224852071006, | |
| "grad_norm": 18.087614059448242, | |
| "learning_rate": 1.8382352941176472e-05, | |
| "loss": 0.6569, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04437869822485207, | |
| "grad_norm": 10.075726509094238, | |
| "learning_rate": 2.2058823529411766e-05, | |
| "loss": 0.6572, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.051775147928994084, | |
| "grad_norm": 4.17144250869751, | |
| "learning_rate": 2.5735294117647057e-05, | |
| "loss": 0.3788, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.05917159763313609, | |
| "grad_norm": 24.00593376159668, | |
| "learning_rate": 2.9411764705882354e-05, | |
| "loss": 0.5752, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06656804733727811, | |
| "grad_norm": 0.1779569834470749, | |
| "learning_rate": 3.308823529411765e-05, | |
| "loss": 0.488, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07396449704142012, | |
| "grad_norm": 2.1989564895629883, | |
| "learning_rate": 3.6764705882352945e-05, | |
| "loss": 1.0225, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08136094674556213, | |
| "grad_norm": 16.50670623779297, | |
| "learning_rate": 4.044117647058824e-05, | |
| "loss": 1.0776, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.08875739644970414, | |
| "grad_norm": 36.278076171875, | |
| "learning_rate": 4.411764705882353e-05, | |
| "loss": 1.1623, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.09615384615384616, | |
| "grad_norm": 8.829483032226562, | |
| "learning_rate": 4.7794117647058826e-05, | |
| "loss": 0.9484, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.10355029585798817, | |
| "grad_norm": 0.47137251496315, | |
| "learning_rate": 4.983552631578948e-05, | |
| "loss": 0.2775, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.11094674556213018, | |
| "grad_norm": 1.3937591314315796, | |
| "learning_rate": 4.942434210526316e-05, | |
| "loss": 0.959, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.11834319526627218, | |
| "grad_norm": 14.168859481811523, | |
| "learning_rate": 4.901315789473684e-05, | |
| "loss": 1.2776, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1257396449704142, | |
| "grad_norm": 209.46127319335938, | |
| "learning_rate": 4.860197368421053e-05, | |
| "loss": 1.1028, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.13313609467455623, | |
| "grad_norm": 0.22284063696861267, | |
| "learning_rate": 4.819078947368421e-05, | |
| "loss": 0.2779, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.14053254437869822, | |
| "grad_norm": 0.16761070489883423, | |
| "learning_rate": 4.7779605263157896e-05, | |
| "loss": 1.05, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.14792899408284024, | |
| "grad_norm": 33.303768157958984, | |
| "learning_rate": 4.736842105263158e-05, | |
| "loss": 0.5783, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15532544378698224, | |
| "grad_norm": 1.759582281112671, | |
| "learning_rate": 4.6957236842105265e-05, | |
| "loss": 0.5052, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.16272189349112426, | |
| "grad_norm": 0.16995219886302948, | |
| "learning_rate": 4.654605263157895e-05, | |
| "loss": 0.8945, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.17011834319526628, | |
| "grad_norm": 5.4278740882873535, | |
| "learning_rate": 4.6134868421052635e-05, | |
| "loss": 1.1972, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.17751479289940827, | |
| "grad_norm": 5.3494038581848145, | |
| "learning_rate": 4.572368421052632e-05, | |
| "loss": 0.5657, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1849112426035503, | |
| "grad_norm": 38.29714584350586, | |
| "learning_rate": 4.5312500000000004e-05, | |
| "loss": 0.6919, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.19230769230769232, | |
| "grad_norm": 31.136438369750977, | |
| "learning_rate": 4.490131578947369e-05, | |
| "loss": 0.4502, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.1997041420118343, | |
| "grad_norm": 16.622379302978516, | |
| "learning_rate": 4.449013157894737e-05, | |
| "loss": 0.4002, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.20710059171597633, | |
| "grad_norm": 0.7369871735572815, | |
| "learning_rate": 4.407894736842105e-05, | |
| "loss": 0.7867, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.21449704142011836, | |
| "grad_norm": 1.566986083984375, | |
| "learning_rate": 4.3667763157894735e-05, | |
| "loss": 0.9427, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.22189349112426035, | |
| "grad_norm": 20.51339340209961, | |
| "learning_rate": 4.3256578947368426e-05, | |
| "loss": 1.3318, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.22928994082840237, | |
| "grad_norm": 16.819499969482422, | |
| "learning_rate": 4.284539473684211e-05, | |
| "loss": 0.7456, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.23668639053254437, | |
| "grad_norm": 0.06786404550075531, | |
| "learning_rate": 4.2434210526315796e-05, | |
| "loss": 0.0428, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2440828402366864, | |
| "grad_norm": 18.684722900390625, | |
| "learning_rate": 4.202302631578947e-05, | |
| "loss": 0.5777, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.2485207100591716, | |
| "eval_accuracy": 0.8372615039281706, | |
| "eval_loss": 0.8213610649108887, | |
| "eval_runtime": 1015.4502, | |
| "eval_samples_per_second": 0.877, | |
| "eval_steps_per_second": 0.439, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.0029585798816567, | |
| "grad_norm": 0.05207992345094681, | |
| "learning_rate": 4.161184210526316e-05, | |
| "loss": 0.3122, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.0103550295857988, | |
| "grad_norm": 0.08761809021234512, | |
| "learning_rate": 4.120065789473684e-05, | |
| "loss": 0.8974, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.017751479289941, | |
| "grad_norm": 2.162889003753662, | |
| "learning_rate": 4.078947368421053e-05, | |
| "loss": 0.5468, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.0251479289940828, | |
| "grad_norm": 0.3578147888183594, | |
| "learning_rate": 4.037828947368421e-05, | |
| "loss": 0.4065, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.032544378698225, | |
| "grad_norm": 0.1941654235124588, | |
| "learning_rate": 3.9967105263157896e-05, | |
| "loss": 0.5691, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.0399408284023668, | |
| "grad_norm": 0.4009058475494385, | |
| "learning_rate": 3.955592105263158e-05, | |
| "loss": 1.0974, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.047337278106509, | |
| "grad_norm": 1.8374804258346558, | |
| "learning_rate": 3.9144736842105265e-05, | |
| "loss": 0.3989, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.054733727810651, | |
| "grad_norm": 0.1476195603609085, | |
| "learning_rate": 3.873355263157895e-05, | |
| "loss": 0.9985, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.0621301775147929, | |
| "grad_norm": 0.23249651491641998, | |
| "learning_rate": 3.8322368421052634e-05, | |
| "loss": 0.0116, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.069526627218935, | |
| "grad_norm": 120.95174407958984, | |
| "learning_rate": 3.791118421052632e-05, | |
| "loss": 0.2631, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.0769230769230769, | |
| "grad_norm": 0.18491333723068237, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.6313, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.084319526627219, | |
| "grad_norm": 139.33804321289062, | |
| "learning_rate": 3.708881578947369e-05, | |
| "loss": 0.8515, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.0917159763313609, | |
| "grad_norm": 0.22990567982196808, | |
| "learning_rate": 3.6677631578947366e-05, | |
| "loss": 0.7119, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.099112426035503, | |
| "grad_norm": 1.84480881690979, | |
| "learning_rate": 3.626644736842105e-05, | |
| "loss": 0.2762, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.106508875739645, | |
| "grad_norm": 12.12067699432373, | |
| "learning_rate": 3.5855263157894735e-05, | |
| "loss": 0.3234, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.113905325443787, | |
| "grad_norm": 25.058439254760742, | |
| "learning_rate": 3.5444078947368426e-05, | |
| "loss": 0.869, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.121301775147929, | |
| "grad_norm": 20.12900161743164, | |
| "learning_rate": 3.503289473684211e-05, | |
| "loss": 1.085, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.128698224852071, | |
| "grad_norm": 12.086252212524414, | |
| "learning_rate": 3.4621710526315795e-05, | |
| "loss": 0.7208, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.136094674556213, | |
| "grad_norm": 25.66025161743164, | |
| "learning_rate": 3.421052631578947e-05, | |
| "loss": 0.6842, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.143491124260355, | |
| "grad_norm": 15.72556209564209, | |
| "learning_rate": 3.379934210526316e-05, | |
| "loss": 0.7922, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.150887573964497, | |
| "grad_norm": 0.22508259117603302, | |
| "learning_rate": 3.338815789473684e-05, | |
| "loss": 0.4978, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1582840236686391, | |
| "grad_norm": 0.3129032254219055, | |
| "learning_rate": 3.297697368421053e-05, | |
| "loss": 0.9733, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.165680473372781, | |
| "grad_norm": 2.5810675621032715, | |
| "learning_rate": 3.256578947368421e-05, | |
| "loss": 1.0189, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.1730769230769231, | |
| "grad_norm": 0.23227569460868835, | |
| "learning_rate": 3.2154605263157896e-05, | |
| "loss": 0.1902, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.180473372781065, | |
| "grad_norm": 41.80963134765625, | |
| "learning_rate": 3.174342105263158e-05, | |
| "loss": 0.676, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.1878698224852071, | |
| "grad_norm": 0.23230111598968506, | |
| "learning_rate": 3.1332236842105265e-05, | |
| "loss": 0.7534, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.195266272189349, | |
| "grad_norm": 0.22635290026664734, | |
| "learning_rate": 3.092105263157895e-05, | |
| "loss": 0.4239, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.202662721893491, | |
| "grad_norm": 1.2736923694610596, | |
| "learning_rate": 3.0509868421052634e-05, | |
| "loss": 0.1946, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.2100591715976332, | |
| "grad_norm": 138.90115356445312, | |
| "learning_rate": 3.009868421052632e-05, | |
| "loss": 0.3972, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.217455621301775, | |
| "grad_norm": 0.8171183466911316, | |
| "learning_rate": 2.96875e-05, | |
| "loss": 0.3475, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.2248520710059172, | |
| "grad_norm": 0.14913396537303925, | |
| "learning_rate": 2.9276315789473684e-05, | |
| "loss": 0.0097, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.232248520710059, | |
| "grad_norm": 0.16601596772670746, | |
| "learning_rate": 2.886513157894737e-05, | |
| "loss": 0.7906, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.2396449704142012, | |
| "grad_norm": 0.0358225516974926, | |
| "learning_rate": 2.8453947368421054e-05, | |
| "loss": 1.0754, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.2470414201183433, | |
| "grad_norm": 18.65558433532715, | |
| "learning_rate": 2.8042763157894735e-05, | |
| "loss": 1.144, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.2492603550295858, | |
| "eval_accuracy": 0.8754208754208754, | |
| "eval_loss": 0.4453337490558624, | |
| "eval_runtime": 1018.2198, | |
| "eval_samples_per_second": 0.875, | |
| "eval_steps_per_second": 0.438, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 2.0051775147928996, | |
| "grad_norm": 0.26922106742858887, | |
| "learning_rate": 2.7631578947368426e-05, | |
| "loss": 0.0545, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.0125739644970415, | |
| "grad_norm": 0.08718305081129074, | |
| "learning_rate": 2.7220394736842107e-05, | |
| "loss": 0.2609, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.0199704142011834, | |
| "grad_norm": 0.062128640711307526, | |
| "learning_rate": 2.6809210526315792e-05, | |
| "loss": 0.205, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.0273668639053253, | |
| "grad_norm": 0.03102479875087738, | |
| "learning_rate": 2.6398026315789476e-05, | |
| "loss": 0.2414, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.0347633136094676, | |
| "grad_norm": 0.1195179671049118, | |
| "learning_rate": 2.598684210526316e-05, | |
| "loss": 0.8337, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.0421597633136095, | |
| "grad_norm": 0.18848393857479095, | |
| "learning_rate": 2.5575657894736842e-05, | |
| "loss": 0.938, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.0495562130177514, | |
| "grad_norm": 0.30771324038505554, | |
| "learning_rate": 2.5164473684210527e-05, | |
| "loss": 0.5181, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.0569526627218937, | |
| "grad_norm": 45.85493469238281, | |
| "learning_rate": 2.4753289473684215e-05, | |
| "loss": 0.9593, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.0643491124260356, | |
| "grad_norm": 0.8830978870391846, | |
| "learning_rate": 2.4342105263157896e-05, | |
| "loss": 0.0696, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.0717455621301775, | |
| "grad_norm": 0.23656447231769562, | |
| "learning_rate": 2.393092105263158e-05, | |
| "loss": 0.3063, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.0791420118343193, | |
| "grad_norm": 14.187068939208984, | |
| "learning_rate": 2.3519736842105265e-05, | |
| "loss": 1.1477, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.0865384615384617, | |
| "grad_norm": 20.795085906982422, | |
| "learning_rate": 2.3108552631578946e-05, | |
| "loss": 0.6215, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.0939349112426036, | |
| "grad_norm": 0.27157312631607056, | |
| "learning_rate": 2.2697368421052634e-05, | |
| "loss": 0.271, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.1013313609467454, | |
| "grad_norm": 26.25171661376953, | |
| "learning_rate": 2.228618421052632e-05, | |
| "loss": 0.9012, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.1087278106508878, | |
| "grad_norm": 0.18786455690860748, | |
| "learning_rate": 2.1875e-05, | |
| "loss": 0.1285, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.1161242603550297, | |
| "grad_norm": 0.09338750690221786, | |
| "learning_rate": 2.1463815789473684e-05, | |
| "loss": 0.6236, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.1235207100591715, | |
| "grad_norm": 0.15658584237098694, | |
| "learning_rate": 2.105263157894737e-05, | |
| "loss": 0.2534, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.1309171597633134, | |
| "grad_norm": 0.03897108510136604, | |
| "learning_rate": 2.0641447368421053e-05, | |
| "loss": 0.0317, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.1383136094674557, | |
| "grad_norm": 0.17228557169437408, | |
| "learning_rate": 2.0230263157894738e-05, | |
| "loss": 0.5894, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.1457100591715976, | |
| "grad_norm": 37.38359069824219, | |
| "learning_rate": 1.9819078947368423e-05, | |
| "loss": 0.3589, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.1531065088757395, | |
| "grad_norm": 14.766074180603027, | |
| "learning_rate": 1.9407894736842107e-05, | |
| "loss": 0.2452, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.160502958579882, | |
| "grad_norm": 0.09724285453557968, | |
| "learning_rate": 1.8996710526315788e-05, | |
| "loss": 0.4429, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.1678994082840237, | |
| "grad_norm": 0.20985926687717438, | |
| "learning_rate": 1.8585526315789476e-05, | |
| "loss": 0.3037, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.1752958579881656, | |
| "grad_norm": 17.582971572875977, | |
| "learning_rate": 1.8174342105263157e-05, | |
| "loss": 0.8678, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.1826923076923075, | |
| "grad_norm": 0.10595466196537018, | |
| "learning_rate": 1.7763157894736842e-05, | |
| "loss": 0.5378, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.19008875739645, | |
| "grad_norm": 0.08267045021057129, | |
| "learning_rate": 1.7351973684210527e-05, | |
| "loss": 0.3613, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.1974852071005917, | |
| "grad_norm": 0.33162182569503784, | |
| "learning_rate": 1.694078947368421e-05, | |
| "loss": 0.4509, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.2048816568047336, | |
| "grad_norm": 0.070041224360466, | |
| "learning_rate": 1.6529605263157896e-05, | |
| "loss": 0.2242, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.212278106508876, | |
| "grad_norm": 0.07664915174245834, | |
| "learning_rate": 1.611842105263158e-05, | |
| "loss": 0.0046, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.219674556213018, | |
| "grad_norm": 84.51457977294922, | |
| "learning_rate": 1.5707236842105265e-05, | |
| "loss": 0.4331, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.2270710059171597, | |
| "grad_norm": 0.13595078885555267, | |
| "learning_rate": 1.5296052631578946e-05, | |
| "loss": 0.2439, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.234467455621302, | |
| "grad_norm": 107.51094818115234, | |
| "learning_rate": 1.4884868421052634e-05, | |
| "loss": 0.1195, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.241863905325444, | |
| "grad_norm": 6.981244087219238, | |
| "learning_rate": 1.4473684210526317e-05, | |
| "loss": 0.2644, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.2492603550295858, | |
| "grad_norm": 0.01494936365634203, | |
| "learning_rate": 1.4062500000000001e-05, | |
| "loss": 0.2783, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.2492603550295858, | |
| "eval_accuracy": 0.8170594837261503, | |
| "eval_loss": 0.8930483460426331, | |
| "eval_runtime": 977.9049, | |
| "eval_samples_per_second": 0.911, | |
| "eval_steps_per_second": 0.456, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 3.007396449704142, | |
| "grad_norm": 2.669276714324951, | |
| "learning_rate": 1.3651315789473684e-05, | |
| "loss": 0.6067, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 3.014792899408284, | |
| "grad_norm": 0.0429365374147892, | |
| "learning_rate": 1.3240131578947369e-05, | |
| "loss": 0.2312, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 3.022189349112426, | |
| "grad_norm": 0.0598057359457016, | |
| "learning_rate": 1.2828947368421055e-05, | |
| "loss": 0.0344, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 3.029585798816568, | |
| "grad_norm": 0.0926346629858017, | |
| "learning_rate": 1.2417763157894738e-05, | |
| "loss": 0.9614, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.03698224852071, | |
| "grad_norm": 0.01585511490702629, | |
| "learning_rate": 1.200657894736842e-05, | |
| "loss": 0.7316, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 3.044378698224852, | |
| "grad_norm": 0.10066540539264679, | |
| "learning_rate": 1.1595394736842107e-05, | |
| "loss": 0.0041, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 3.051775147928994, | |
| "grad_norm": 0.06809567660093307, | |
| "learning_rate": 1.118421052631579e-05, | |
| "loss": 0.0092, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 3.059171597633136, | |
| "grad_norm": 0.14008688926696777, | |
| "learning_rate": 1.0773026315789474e-05, | |
| "loss": 1.0007, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 3.0665680473372783, | |
| "grad_norm": 0.030784226953983307, | |
| "learning_rate": 1.0361842105263159e-05, | |
| "loss": 0.4198, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.07396449704142, | |
| "grad_norm": 0.07336018979549408, | |
| "learning_rate": 9.950657894736842e-06, | |
| "loss": 0.2221, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 3.081360946745562, | |
| "grad_norm": 0.06217151880264282, | |
| "learning_rate": 9.539473684210528e-06, | |
| "loss": 0.2991, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 3.088757396449704, | |
| "grad_norm": 0.07419371604919434, | |
| "learning_rate": 9.128289473684211e-06, | |
| "loss": 0.6161, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 3.0961538461538463, | |
| "grad_norm": 0.09845346957445145, | |
| "learning_rate": 8.717105263157894e-06, | |
| "loss": 0.204, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 3.103550295857988, | |
| "grad_norm": 0.020927123725414276, | |
| "learning_rate": 8.30592105263158e-06, | |
| "loss": 0.5403, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 3.11094674556213, | |
| "grad_norm": 0.07396041601896286, | |
| "learning_rate": 7.894736842105263e-06, | |
| "loss": 0.4637, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 3.1183431952662723, | |
| "grad_norm": 0.11606968194246292, | |
| "learning_rate": 7.483552631578948e-06, | |
| "loss": 1.3375, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 3.1257396449704142, | |
| "grad_norm": 0.08246757835149765, | |
| "learning_rate": 7.072368421052632e-06, | |
| "loss": 0.4418, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 3.133136094674556, | |
| "grad_norm": 0.17207257449626923, | |
| "learning_rate": 6.661184210526317e-06, | |
| "loss": 0.2426, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 3.140532544378698, | |
| "grad_norm": 0.49036145210266113, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.2545, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.1479289940828403, | |
| "grad_norm": 68.61939239501953, | |
| "learning_rate": 5.838815789473685e-06, | |
| "loss": 0.5084, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 3.155325443786982, | |
| "grad_norm": 0.08831863105297089, | |
| "learning_rate": 5.4276315789473686e-06, | |
| "loss": 0.0049, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 3.162721893491124, | |
| "grad_norm": 0.10537251830101013, | |
| "learning_rate": 5.016447368421053e-06, | |
| "loss": 0.1406, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 3.1701183431952664, | |
| "grad_norm": 17.6816349029541, | |
| "learning_rate": 4.605263157894737e-06, | |
| "loss": 0.486, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 3.1775147928994083, | |
| "grad_norm": 0.461302250623703, | |
| "learning_rate": 4.194078947368421e-06, | |
| "loss": 0.0042, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 3.18491124260355, | |
| "grad_norm": 0.04792853444814682, | |
| "learning_rate": 3.7828947368421055e-06, | |
| "loss": 0.719, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 3.1923076923076925, | |
| "grad_norm": 0.058049630373716354, | |
| "learning_rate": 3.3717105263157897e-06, | |
| "loss": 0.2919, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 3.1997041420118344, | |
| "grad_norm": 13.063404083251953, | |
| "learning_rate": 2.960526315789474e-06, | |
| "loss": 0.2856, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 3.2071005917159763, | |
| "grad_norm": 0.4628206193447113, | |
| "learning_rate": 2.549342105263158e-06, | |
| "loss": 0.0601, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 3.214497041420118, | |
| "grad_norm": 0.045592982321977615, | |
| "learning_rate": 2.138157894736842e-06, | |
| "loss": 0.53, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.2218934911242605, | |
| "grad_norm": 0.04378387704491615, | |
| "learning_rate": 1.7269736842105266e-06, | |
| "loss": 0.0033, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 3.2292899408284024, | |
| "grad_norm": 0.052677396684885025, | |
| "learning_rate": 1.3157894736842106e-06, | |
| "loss": 0.003, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 3.2366863905325443, | |
| "grad_norm": 0.07824493199586868, | |
| "learning_rate": 9.046052631578948e-07, | |
| "loss": 0.3084, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 3.2440828402366866, | |
| "grad_norm": 0.05082060024142265, | |
| "learning_rate": 4.934210526315789e-07, | |
| "loss": 0.0037, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 3.247041420118343, | |
| "eval_accuracy": 0.8978675645342312, | |
| "eval_loss": 0.40774551033973694, | |
| "eval_runtime": 1030.7199, | |
| "eval_samples_per_second": 0.864, | |
| "eval_steps_per_second": 0.433, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 4.004437869822485, | |
| "grad_norm": 0.045703090727329254, | |
| "learning_rate": 8.223684210526316e-08, | |
| "loss": 0.2113, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 4.005917159763314, | |
| "eval_accuracy": 0.8978675645342312, | |
| "eval_loss": 0.40793702006340027, | |
| "eval_runtime": 1018.6267, | |
| "eval_samples_per_second": 0.875, | |
| "eval_steps_per_second": 0.438, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 4.005917159763314, | |
| "step": 1352, | |
| "total_flos": 3.3668665207526523e+18, | |
| "train_loss": 0.5293050300165978, | |
| "train_runtime": 11070.5286, | |
| "train_samples_per_second": 0.244, | |
| "train_steps_per_second": 0.122 | |
| }, | |
| { | |
| "epoch": 4.005917159763314, | |
| "eval_accuracy": 0.890927624872579, | |
| "eval_loss": 0.4576520323753357, | |
| "eval_runtime": 1130.3017, | |
| "eval_samples_per_second": 0.868, | |
| "eval_steps_per_second": 0.434, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 4.005917159763314, | |
| "eval_accuracy": 0.890927624872579, | |
| "eval_loss": 0.4576520621776581, | |
| "eval_runtime": 1124.6542, | |
| "eval_samples_per_second": 0.872, | |
| "eval_steps_per_second": 0.437, | |
| "step": 1352 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1352, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 500, | |
| "total_flos": 3.3668665207526523e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |