| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.97088108209656, | |
| "eval_steps": 500, | |
| "global_step": 13300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.015029118917903438, | |
| "grad_norm": 0.2293534129858017, | |
| "learning_rate": 6.766917293233083e-07, | |
| "loss": 1.5634, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.030058237835806877, | |
| "grad_norm": 0.2535412907600403, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 1.5043, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04508735675371031, | |
| "grad_norm": 0.3165118992328644, | |
| "learning_rate": 2.1804511278195492e-06, | |
| "loss": 1.5571, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06011647567161375, | |
| "grad_norm": 0.27761849761009216, | |
| "learning_rate": 2.9323308270676694e-06, | |
| "loss": 1.5064, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0751455945895172, | |
| "grad_norm": 0.34336039423942566, | |
| "learning_rate": 3.6842105263157892e-06, | |
| "loss": 1.5299, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09017471350742062, | |
| "grad_norm": 0.4327663481235504, | |
| "learning_rate": 4.436090225563911e-06, | |
| "loss": 1.5351, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.10520383242532406, | |
| "grad_norm": 0.4244738221168518, | |
| "learning_rate": 5.187969924812031e-06, | |
| "loss": 1.4876, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1202329513432275, | |
| "grad_norm": 0.39235126972198486, | |
| "learning_rate": 5.939849624060151e-06, | |
| "loss": 1.4138, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.13526207026113093, | |
| "grad_norm": 0.36149346828460693, | |
| "learning_rate": 6.691729323308271e-06, | |
| "loss": 1.3901, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1502911891790344, | |
| "grad_norm": 0.2174796313047409, | |
| "learning_rate": 7.4436090225563915e-06, | |
| "loss": 1.284, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.16532030809693782, | |
| "grad_norm": 0.19376038014888763, | |
| "learning_rate": 8.195488721804512e-06, | |
| "loss": 1.2713, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.18034942701484125, | |
| "grad_norm": 0.18585975468158722, | |
| "learning_rate": 8.947368421052632e-06, | |
| "loss": 1.2301, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1953785459327447, | |
| "grad_norm": 0.18462727963924408, | |
| "learning_rate": 9.699248120300752e-06, | |
| "loss": 1.231, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.21040766485064813, | |
| "grad_norm": 0.16348238289356232, | |
| "learning_rate": 1.0451127819548872e-05, | |
| "loss": 1.2167, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.22543678376855156, | |
| "grad_norm": 0.17574988305568695, | |
| "learning_rate": 1.1203007518796992e-05, | |
| "loss": 1.1999, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.240465902686455, | |
| "grad_norm": 0.14682741463184357, | |
| "learning_rate": 1.1954887218045113e-05, | |
| "loss": 1.2491, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.25549502160435844, | |
| "grad_norm": 0.1753804236650467, | |
| "learning_rate": 1.2706766917293233e-05, | |
| "loss": 1.2036, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.27052414052226187, | |
| "grad_norm": 0.17857442796230316, | |
| "learning_rate": 1.3458646616541353e-05, | |
| "loss": 1.1822, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2855532594401653, | |
| "grad_norm": 0.18367990851402283, | |
| "learning_rate": 1.4210526315789475e-05, | |
| "loss": 1.1679, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.3005823783580688, | |
| "grad_norm": 0.20284640789031982, | |
| "learning_rate": 1.4962406015037595e-05, | |
| "loss": 1.1337, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3156114972759722, | |
| "grad_norm": 0.16659210622310638, | |
| "learning_rate": 1.5714285714285715e-05, | |
| "loss": 1.181, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.33064061619387564, | |
| "grad_norm": 0.1798979490995407, | |
| "learning_rate": 1.6466165413533834e-05, | |
| "loss": 1.1785, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.34566973511177906, | |
| "grad_norm": 0.1689957082271576, | |
| "learning_rate": 1.7218045112781956e-05, | |
| "loss": 1.1489, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3606988540296825, | |
| "grad_norm": 0.199369415640831, | |
| "learning_rate": 1.7969924812030074e-05, | |
| "loss": 1.1677, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.375727972947586, | |
| "grad_norm": 0.23965977132320404, | |
| "learning_rate": 1.8721804511278196e-05, | |
| "loss": 1.1516, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3907570918654894, | |
| "grad_norm": 0.18958410620689392, | |
| "learning_rate": 1.9473684210526315e-05, | |
| "loss": 1.1278, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.40578621078339283, | |
| "grad_norm": 0.20659048855304718, | |
| "learning_rate": 2.0225563909774437e-05, | |
| "loss": 1.1613, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.42081532970129626, | |
| "grad_norm": 0.22374583780765533, | |
| "learning_rate": 2.097744360902256e-05, | |
| "loss": 1.1348, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4358444486191997, | |
| "grad_norm": 0.22938427329063416, | |
| "learning_rate": 2.1729323308270677e-05, | |
| "loss": 1.157, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.4508735675371031, | |
| "grad_norm": 0.2688145935535431, | |
| "learning_rate": 2.24812030075188e-05, | |
| "loss": 1.1329, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4659026864550066, | |
| "grad_norm": 0.21894283592700958, | |
| "learning_rate": 2.3233082706766917e-05, | |
| "loss": 1.1197, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.48093180537291, | |
| "grad_norm": 0.2249055653810501, | |
| "learning_rate": 2.398496240601504e-05, | |
| "loss": 1.127, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.49596092429081345, | |
| "grad_norm": 0.2487722635269165, | |
| "learning_rate": 2.4736842105263158e-05, | |
| "loss": 1.1331, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5109900432087169, | |
| "grad_norm": 0.2143404483795166, | |
| "learning_rate": 2.548872180451128e-05, | |
| "loss": 1.1342, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5260191621266204, | |
| "grad_norm": 0.27003639936447144, | |
| "learning_rate": 2.6240601503759398e-05, | |
| "loss": 1.133, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5410482810445237, | |
| "grad_norm": 0.25785332918167114, | |
| "learning_rate": 2.699248120300752e-05, | |
| "loss": 1.1284, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5560773999624272, | |
| "grad_norm": 0.24334581196308136, | |
| "learning_rate": 2.774436090225564e-05, | |
| "loss": 1.1149, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5711065188803306, | |
| "grad_norm": 0.23162595927715302, | |
| "learning_rate": 2.849624060150376e-05, | |
| "loss": 1.144, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5861356377982341, | |
| "grad_norm": 0.2650283873081207, | |
| "learning_rate": 2.924812030075188e-05, | |
| "loss": 1.1431, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.6011647567161376, | |
| "grad_norm": 0.2596570551395416, | |
| "learning_rate": 3e-05, | |
| "loss": 1.1211, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6161938756340409, | |
| "grad_norm": 0.25908759236335754, | |
| "learning_rate": 3.075187969924812e-05, | |
| "loss": 1.1287, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6312229945519444, | |
| "grad_norm": 0.24592378735542297, | |
| "learning_rate": 3.150375939849624e-05, | |
| "loss": 1.0913, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6462521134698478, | |
| "grad_norm": 0.2371867448091507, | |
| "learning_rate": 3.225563909774436e-05, | |
| "loss": 1.1435, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6612812323877513, | |
| "grad_norm": 0.25050684809684753, | |
| "learning_rate": 3.300751879699248e-05, | |
| "loss": 1.1009, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6763103513056548, | |
| "grad_norm": 0.26998868584632874, | |
| "learning_rate": 3.3759398496240603e-05, | |
| "loss": 1.1018, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6913394702235581, | |
| "grad_norm": 0.255825012922287, | |
| "learning_rate": 3.451127819548872e-05, | |
| "loss": 1.096, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.7063685891414616, | |
| "grad_norm": 0.2328484058380127, | |
| "learning_rate": 3.526315789473684e-05, | |
| "loss": 1.1083, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.721397708059365, | |
| "grad_norm": 0.2772115170955658, | |
| "learning_rate": 3.6015037593984966e-05, | |
| "loss": 1.1243, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7364268269772685, | |
| "grad_norm": 0.2620559334754944, | |
| "learning_rate": 3.6766917293233084e-05, | |
| "loss": 1.1357, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.751455945895172, | |
| "grad_norm": 0.2600899934768677, | |
| "learning_rate": 3.75187969924812e-05, | |
| "loss": 1.1044, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7664850648130753, | |
| "grad_norm": 0.23489312827587128, | |
| "learning_rate": 3.827067669172932e-05, | |
| "loss": 1.1028, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.7815141837309788, | |
| "grad_norm": 0.2843015491962433, | |
| "learning_rate": 3.9022556390977447e-05, | |
| "loss": 1.1234, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.7965433026488822, | |
| "grad_norm": 0.27744609117507935, | |
| "learning_rate": 3.9774436090225565e-05, | |
| "loss": 1.0939, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.8115724215667857, | |
| "grad_norm": 0.2344987690448761, | |
| "learning_rate": 4.0526315789473684e-05, | |
| "loss": 1.094, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.826601540484689, | |
| "grad_norm": 0.2847677171230316, | |
| "learning_rate": 4.12781954887218e-05, | |
| "loss": 1.1, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8416306594025925, | |
| "grad_norm": 0.3026759922504425, | |
| "learning_rate": 4.203007518796993e-05, | |
| "loss": 1.1191, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.856659778320496, | |
| "grad_norm": 0.38774576783180237, | |
| "learning_rate": 4.2781954887218046e-05, | |
| "loss": 1.1273, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8716888972383994, | |
| "grad_norm": 0.28009462356567383, | |
| "learning_rate": 4.3533834586466164e-05, | |
| "loss": 1.081, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.8867180161563029, | |
| "grad_norm": 0.2575189471244812, | |
| "learning_rate": 4.428571428571428e-05, | |
| "loss": 1.1077, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.9017471350742062, | |
| "grad_norm": 0.2847520112991333, | |
| "learning_rate": 4.503759398496241e-05, | |
| "loss": 1.1041, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9167762539921097, | |
| "grad_norm": 0.31493791937828064, | |
| "learning_rate": 4.5789473684210527e-05, | |
| "loss": 1.1406, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.9318053729100132, | |
| "grad_norm": 0.2649036645889282, | |
| "learning_rate": 4.6541353383458645e-05, | |
| "loss": 1.0949, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.9468344918279166, | |
| "grad_norm": 0.29710251092910767, | |
| "learning_rate": 4.729323308270677e-05, | |
| "loss": 1.0853, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.96186361074582, | |
| "grad_norm": 0.26907584071159363, | |
| "learning_rate": 4.804511278195489e-05, | |
| "loss": 1.1092, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9768927296637234, | |
| "grad_norm": 0.3357178568840027, | |
| "learning_rate": 4.879699248120301e-05, | |
| "loss": 1.1179, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9919218485816269, | |
| "grad_norm": 0.2772427201271057, | |
| "learning_rate": 4.9548872180451126e-05, | |
| "loss": 1.0903, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.0060116475671614, | |
| "grad_norm": 0.26991239190101624, | |
| "learning_rate": 5.030075187969925e-05, | |
| "loss": 1.1113, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.021040766485065, | |
| "grad_norm": 0.24760043621063232, | |
| "learning_rate": 5.1052631578947376e-05, | |
| "loss": 1.068, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.0360698854029682, | |
| "grad_norm": 0.28557974100112915, | |
| "learning_rate": 5.180451127819549e-05, | |
| "loss": 1.0954, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.0510990043208717, | |
| "grad_norm": 0.3007003962993622, | |
| "learning_rate": 5.2556390977443613e-05, | |
| "loss": 1.0944, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0661281232387751, | |
| "grad_norm": 0.30276528000831604, | |
| "learning_rate": 5.330827067669173e-05, | |
| "loss": 1.0945, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.0811572421566786, | |
| "grad_norm": 0.26913130283355713, | |
| "learning_rate": 5.406015037593986e-05, | |
| "loss": 1.112, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.0961863610745821, | |
| "grad_norm": 0.289982408285141, | |
| "learning_rate": 5.481203007518797e-05, | |
| "loss": 1.0891, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.1112154799924854, | |
| "grad_norm": 0.28320783376693726, | |
| "learning_rate": 5.5563909774436094e-05, | |
| "loss": 1.094, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.1262445989103889, | |
| "grad_norm": 0.31406116485595703, | |
| "learning_rate": 5.631578947368421e-05, | |
| "loss": 1.0853, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.1412737178282923, | |
| "grad_norm": 0.299730122089386, | |
| "learning_rate": 5.706766917293234e-05, | |
| "loss": 1.1048, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.1563028367461958, | |
| "grad_norm": 0.30774202942848206, | |
| "learning_rate": 5.781954887218045e-05, | |
| "loss": 1.0549, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.1713319556640993, | |
| "grad_norm": 0.325926810503006, | |
| "learning_rate": 5.8571428571428575e-05, | |
| "loss": 1.0823, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.1863610745820026, | |
| "grad_norm": 0.31851741671562195, | |
| "learning_rate": 5.9323308270676694e-05, | |
| "loss": 1.0989, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.201390193499906, | |
| "grad_norm": 0.3333583474159241, | |
| "learning_rate": 6.007518796992482e-05, | |
| "loss": 1.0625, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.2164193124178095, | |
| "grad_norm": 0.3349563479423523, | |
| "learning_rate": 6.082706766917293e-05, | |
| "loss": 1.1002, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.231448431335713, | |
| "grad_norm": 0.3039754629135132, | |
| "learning_rate": 6.157894736842106e-05, | |
| "loss": 1.0927, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.2464775502536165, | |
| "grad_norm": 0.3020300269126892, | |
| "learning_rate": 6.233082706766917e-05, | |
| "loss": 1.0983, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.2615066691715198, | |
| "grad_norm": 0.31834477186203003, | |
| "learning_rate": 6.308270676691729e-05, | |
| "loss": 1.0628, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.2765357880894233, | |
| "grad_norm": 0.3013087809085846, | |
| "learning_rate": 6.383458646616541e-05, | |
| "loss": 1.0683, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.2915649070073267, | |
| "grad_norm": 0.3001497983932495, | |
| "learning_rate": 6.458646616541354e-05, | |
| "loss": 1.0858, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.30659402592523, | |
| "grad_norm": 0.32003313302993774, | |
| "learning_rate": 6.533834586466165e-05, | |
| "loss": 1.0747, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.3216231448431337, | |
| "grad_norm": 0.3063625693321228, | |
| "learning_rate": 6.609022556390978e-05, | |
| "loss": 1.1008, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.336652263761037, | |
| "grad_norm": 0.27760475873947144, | |
| "learning_rate": 6.68421052631579e-05, | |
| "loss": 1.0903, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.3516813826789404, | |
| "grad_norm": 0.25132644176483154, | |
| "learning_rate": 6.759398496240602e-05, | |
| "loss": 1.0808, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.366710501596844, | |
| "grad_norm": 0.2900444567203522, | |
| "learning_rate": 6.834586466165414e-05, | |
| "loss": 1.0755, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.3817396205147472, | |
| "grad_norm": 0.2900155484676361, | |
| "learning_rate": 6.909774436090227e-05, | |
| "loss": 1.0797, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.396768739432651, | |
| "grad_norm": 0.31477174162864685, | |
| "learning_rate": 6.984962406015037e-05, | |
| "loss": 1.076, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.4117978583505542, | |
| "grad_norm": 0.3233202397823334, | |
| "learning_rate": 7.06015037593985e-05, | |
| "loss": 1.0968, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.4268269772684576, | |
| "grad_norm": 0.30731186270713806, | |
| "learning_rate": 7.135338345864661e-05, | |
| "loss": 1.0976, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.4418560961863611, | |
| "grad_norm": 0.24933114647865295, | |
| "learning_rate": 7.210526315789474e-05, | |
| "loss": 1.0713, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.4568852151042644, | |
| "grad_norm": 0.2990662753582001, | |
| "learning_rate": 7.285714285714286e-05, | |
| "loss": 1.0988, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.4719143340221679, | |
| "grad_norm": 0.25678712129592896, | |
| "learning_rate": 7.360902255639098e-05, | |
| "loss": 1.0874, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.4869434529400714, | |
| "grad_norm": 0.3273868262767792, | |
| "learning_rate": 7.43609022556391e-05, | |
| "loss": 1.1036, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.5019725718579748, | |
| "grad_norm": 0.26454275846481323, | |
| "learning_rate": 7.511278195488723e-05, | |
| "loss": 1.0713, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.5170016907758783, | |
| "grad_norm": 0.2492770105600357, | |
| "learning_rate": 7.586466165413533e-05, | |
| "loss": 1.063, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.5320308096937816, | |
| "grad_norm": 0.28998205065727234, | |
| "learning_rate": 7.661654135338347e-05, | |
| "loss": 1.0866, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.5470599286116853, | |
| "grad_norm": 0.26011377573013306, | |
| "learning_rate": 7.736842105263159e-05, | |
| "loss": 1.0615, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.5620890475295885, | |
| "grad_norm": 0.25039157271385193, | |
| "learning_rate": 7.81203007518797e-05, | |
| "loss": 1.0613, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.577118166447492, | |
| "grad_norm": 0.26238375902175903, | |
| "learning_rate": 7.887218045112782e-05, | |
| "loss": 1.0927, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.5921472853653955, | |
| "grad_norm": 0.23926205933094025, | |
| "learning_rate": 7.962406015037594e-05, | |
| "loss": 1.0568, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.6071764042832988, | |
| "grad_norm": 0.24725791811943054, | |
| "learning_rate": 8.037593984962406e-05, | |
| "loss": 1.0772, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.6222055232012025, | |
| "grad_norm": 0.25732311606407166, | |
| "learning_rate": 8.112781954887219e-05, | |
| "loss": 1.1058, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.6372346421191057, | |
| "grad_norm": 0.2595824897289276, | |
| "learning_rate": 8.18796992481203e-05, | |
| "loss": 1.1056, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.6522637610370092, | |
| "grad_norm": 0.25049930810928345, | |
| "learning_rate": 8.263157894736843e-05, | |
| "loss": 1.0818, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.6672928799549127, | |
| "grad_norm": 0.2525707185268402, | |
| "learning_rate": 8.338345864661655e-05, | |
| "loss": 1.1147, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.682321998872816, | |
| "grad_norm": 0.25421109795570374, | |
| "learning_rate": 8.413533834586467e-05, | |
| "loss": 1.0959, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.6973511177907197, | |
| "grad_norm": 0.2396637499332428, | |
| "learning_rate": 8.488721804511278e-05, | |
| "loss": 1.1012, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.712380236708623, | |
| "grad_norm": 0.24933594465255737, | |
| "learning_rate": 8.56390977443609e-05, | |
| "loss": 1.0931, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.7274093556265264, | |
| "grad_norm": 0.2631904184818268, | |
| "learning_rate": 8.639097744360902e-05, | |
| "loss": 1.1116, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.74243847454443, | |
| "grad_norm": 0.25884145498275757, | |
| "learning_rate": 8.714285714285715e-05, | |
| "loss": 1.0957, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.7574675934623332, | |
| "grad_norm": 0.23709504306316376, | |
| "learning_rate": 8.789473684210526e-05, | |
| "loss": 1.0804, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.7724967123802369, | |
| "grad_norm": 0.25201550126075745, | |
| "learning_rate": 8.864661654135339e-05, | |
| "loss": 1.0887, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.7875258312981401, | |
| "grad_norm": 0.2535940110683441, | |
| "learning_rate": 8.939849624060151e-05, | |
| "loss": 1.0748, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.8025549502160436, | |
| "grad_norm": 0.2509770691394806, | |
| "learning_rate": 9.015037593984963e-05, | |
| "loss": 1.1021, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.817584069133947, | |
| "grad_norm": 0.23271974921226501, | |
| "learning_rate": 9.090225563909775e-05, | |
| "loss": 1.0516, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.8326131880518504, | |
| "grad_norm": 0.249566912651062, | |
| "learning_rate": 9.165413533834586e-05, | |
| "loss": 1.0766, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.8476423069697538, | |
| "grad_norm": 0.22922058403491974, | |
| "learning_rate": 9.240601503759398e-05, | |
| "loss": 1.1056, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.8626714258876573, | |
| "grad_norm": 0.24767987430095673, | |
| "learning_rate": 9.315789473684211e-05, | |
| "loss": 1.0934, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.8777005448055608, | |
| "grad_norm": 0.23084762692451477, | |
| "learning_rate": 9.390977443609022e-05, | |
| "loss": 1.0894, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.8927296637234643, | |
| "grad_norm": 0.24973560869693756, | |
| "learning_rate": 9.466165413533835e-05, | |
| "loss": 1.0788, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.9077587826413676, | |
| "grad_norm": 0.248574361205101, | |
| "learning_rate": 9.541353383458647e-05, | |
| "loss": 1.0829, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.922787901559271, | |
| "grad_norm": 0.24072329699993134, | |
| "learning_rate": 9.616541353383459e-05, | |
| "loss": 1.1161, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.9378170204771745, | |
| "grad_norm": 0.2310166209936142, | |
| "learning_rate": 9.69172932330827e-05, | |
| "loss": 1.0682, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.952846139395078, | |
| "grad_norm": 0.23928825557231903, | |
| "learning_rate": 9.766917293233084e-05, | |
| "loss": 1.1194, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.9678752583129815, | |
| "grad_norm": 0.2643069624900818, | |
| "learning_rate": 9.842105263157894e-05, | |
| "loss": 1.0712, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.9829043772308848, | |
| "grad_norm": 0.2541036307811737, | |
| "learning_rate": 9.917293233082708e-05, | |
| "loss": 1.0847, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.9979334961487882, | |
| "grad_norm": 0.2341761291027069, | |
| "learning_rate": 9.99248120300752e-05, | |
| "loss": 1.0847, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.012023295134323, | |
| "grad_norm": 0.2271430492401123, | |
| "learning_rate": 9.999986051218537e-05, | |
| "loss": 1.0459, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.027052414052226, | |
| "grad_norm": 0.2847868800163269, | |
| "learning_rate": 9.999937833308459e-05, | |
| "loss": 1.0499, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.04208153297013, | |
| "grad_norm": 0.283787339925766, | |
| "learning_rate": 9.999855174394648e-05, | |
| "loss": 1.0434, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.057110651888033, | |
| "grad_norm": 0.3147590756416321, | |
| "learning_rate": 9.999738075046483e-05, | |
| "loss": 1.053, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.0721397708059364, | |
| "grad_norm": 0.26797565817832947, | |
| "learning_rate": 9.999586536070575e-05, | |
| "loss": 1.0599, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.08716888972384, | |
| "grad_norm": 0.3145821988582611, | |
| "learning_rate": 9.99940055851077e-05, | |
| "loss": 1.053, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.1021980086417433, | |
| "grad_norm": 0.2934500277042389, | |
| "learning_rate": 9.999180143648135e-05, | |
| "loss": 1.0613, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.117227127559647, | |
| "grad_norm": 0.26865336298942566, | |
| "learning_rate": 9.998925293000949e-05, | |
| "loss": 1.0548, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.1322562464775503, | |
| "grad_norm": 0.3006330132484436, | |
| "learning_rate": 9.998636008324698e-05, | |
| "loss": 1.0362, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.1472853653954536, | |
| "grad_norm": 0.3416139483451843, | |
| "learning_rate": 9.998312291612057e-05, | |
| "loss": 1.0588, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.1623144843133573, | |
| "grad_norm": 0.3035484552383423, | |
| "learning_rate": 9.997954145092878e-05, | |
| "loss": 1.0675, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.1773436032312605, | |
| "grad_norm": 0.2740626335144043, | |
| "learning_rate": 9.997561571234179e-05, | |
| "loss": 1.0435, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.1923727221491642, | |
| "grad_norm": 0.2556332051753998, | |
| "learning_rate": 9.997134572740121e-05, | |
| "loss": 1.0803, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.2074018410670675, | |
| "grad_norm": 0.30163928866386414, | |
| "learning_rate": 9.996673152551991e-05, | |
| "loss": 1.0734, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.2224309599849708, | |
| "grad_norm": 0.3375592529773712, | |
| "learning_rate": 9.996177313848184e-05, | |
| "loss": 1.0906, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.2374600789028745, | |
| "grad_norm": 0.2721370756626129, | |
| "learning_rate": 9.995647060044177e-05, | |
| "loss": 1.0335, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.2524891978207777, | |
| "grad_norm": 0.26590871810913086, | |
| "learning_rate": 9.995082394792514e-05, | |
| "loss": 1.0448, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.2675183167386814, | |
| "grad_norm": 0.31041955947875977, | |
| "learning_rate": 9.994483321982768e-05, | |
| "loss": 1.0715, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.2825474356565847, | |
| "grad_norm": 0.2897711396217346, | |
| "learning_rate": 9.993849845741524e-05, | |
| "loss": 1.0564, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.297576554574488, | |
| "grad_norm": 0.3064815402030945, | |
| "learning_rate": 9.993181970432349e-05, | |
| "loss": 1.0634, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.3126056734923917, | |
| "grad_norm": 0.28484266996383667, | |
| "learning_rate": 9.99247970065576e-05, | |
| "loss": 1.0742, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.327634792410295, | |
| "grad_norm": 0.2922673523426056, | |
| "learning_rate": 9.99174304124919e-05, | |
| "loss": 1.0851, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.3426639113281986, | |
| "grad_norm": 0.3106658160686493, | |
| "learning_rate": 9.990971997286961e-05, | |
| "loss": 1.1097, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.357693030246102, | |
| "grad_norm": 0.30149292945861816, | |
| "learning_rate": 9.990166574080246e-05, | |
| "loss": 1.048, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.372722149164005, | |
| "grad_norm": 0.2597978115081787, | |
| "learning_rate": 9.989326777177028e-05, | |
| "loss": 1.029, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.387751268081909, | |
| "grad_norm": 0.24886192381381989, | |
| "learning_rate": 9.988452612362071e-05, | |
| "loss": 1.054, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.402780386999812, | |
| "grad_norm": 0.3196369707584381, | |
| "learning_rate": 9.987544085656873e-05, | |
| "loss": 1.0715, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.417809505917716, | |
| "grad_norm": 0.28219732642173767, | |
| "learning_rate": 9.986601203319623e-05, | |
| "loss": 1.0631, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.432838624835619, | |
| "grad_norm": 0.2625892162322998, | |
| "learning_rate": 9.985623971845169e-05, | |
| "loss": 1.0699, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.4478677437535223, | |
| "grad_norm": 0.26191845536231995, | |
| "learning_rate": 9.984612397964956e-05, | |
| "loss": 1.0536, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.462896862671426, | |
| "grad_norm": 0.27230942249298096, | |
| "learning_rate": 9.983566488646999e-05, | |
| "loss": 1.0924, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.4779259815893293, | |
| "grad_norm": 0.2692161500453949, | |
| "learning_rate": 9.982486251095817e-05, | |
| "loss": 1.0414, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.492955100507233, | |
| "grad_norm": 0.2909376323223114, | |
| "learning_rate": 9.981371692752401e-05, | |
| "loss": 1.0797, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.5079842194251363, | |
| "grad_norm": 0.3020433783531189, | |
| "learning_rate": 9.980222821294143e-05, | |
| "loss": 1.0637, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.5230133383430395, | |
| "grad_norm": 0.2783840596675873, | |
| "learning_rate": 9.979039644634802e-05, | |
| "loss": 1.0617, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.5380424572609432, | |
| "grad_norm": 0.27026644349098206, | |
| "learning_rate": 9.977822170924434e-05, | |
| "loss": 1.0515, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.5530715761788465, | |
| "grad_norm": 0.2597585618495941, | |
| "learning_rate": 9.97657040854935e-05, | |
| "loss": 1.0541, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.56810069509675, | |
| "grad_norm": 0.2972753345966339, | |
| "learning_rate": 9.975284366132047e-05, | |
| "loss": 1.0541, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.5831298140146535, | |
| "grad_norm": 0.25682052969932556, | |
| "learning_rate": 9.973964052531154e-05, | |
| "loss": 1.0533, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.5981589329325567, | |
| "grad_norm": 0.2819693684577942, | |
| "learning_rate": 9.972609476841367e-05, | |
| "loss": 1.0458, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.61318805185046, | |
| "grad_norm": 0.28979477286338806, | |
| "learning_rate": 9.971220648393394e-05, | |
| "loss": 1.0747, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.6282171707683637, | |
| "grad_norm": 0.2849046289920807, | |
| "learning_rate": 9.96979757675388e-05, | |
| "loss": 1.05, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.6432462896862674, | |
| "grad_norm": 0.28079524636268616, | |
| "learning_rate": 9.968340271725352e-05, | |
| "loss": 1.0755, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.6582754086041707, | |
| "grad_norm": 0.27980852127075195, | |
| "learning_rate": 9.966848743346144e-05, | |
| "loss": 1.0874, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.673304527522074, | |
| "grad_norm": 0.25519728660583496, | |
| "learning_rate": 9.965323001890331e-05, | |
| "loss": 1.0319, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.688333646439977, | |
| "grad_norm": 0.25402480363845825, | |
| "learning_rate": 9.963763057867656e-05, | |
| "loss": 1.0268, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.703362765357881, | |
| "grad_norm": 0.25798556208610535, | |
| "learning_rate": 9.962168922023462e-05, | |
| "loss": 1.0365, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.7183918842757846, | |
| "grad_norm": 0.2535860538482666, | |
| "learning_rate": 9.960540605338613e-05, | |
| "loss": 1.0543, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.733421003193688, | |
| "grad_norm": 0.26214438676834106, | |
| "learning_rate": 9.958878119029418e-05, | |
| "loss": 1.0336, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.748450122111591, | |
| "grad_norm": 0.27087315917015076, | |
| "learning_rate": 9.957181474547563e-05, | |
| "loss": 1.0457, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.7634792410294944, | |
| "grad_norm": 0.27433788776397705, | |
| "learning_rate": 9.955450683580018e-05, | |
| "loss": 1.07, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.778508359947398, | |
| "grad_norm": 0.2705138027667999, | |
| "learning_rate": 9.953685758048967e-05, | |
| "loss": 1.0403, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.793537478865302, | |
| "grad_norm": 0.2626933157444, | |
| "learning_rate": 9.951886710111723e-05, | |
| "loss": 1.0464, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.808566597783205, | |
| "grad_norm": 0.27033478021621704, | |
| "learning_rate": 9.950053552160644e-05, | |
| "loss": 1.0653, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.8235957167011083, | |
| "grad_norm": 0.2985825836658478, | |
| "learning_rate": 9.948186296823048e-05, | |
| "loss": 1.0417, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.8386248356190116, | |
| "grad_norm": 0.2883852422237396, | |
| "learning_rate": 9.94628495696112e-05, | |
| "loss": 1.0503, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.8536539545369153, | |
| "grad_norm": 0.25887343287467957, | |
| "learning_rate": 9.94434954567184e-05, | |
| "loss": 1.0526, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.868683073454819, | |
| "grad_norm": 0.26801565289497375, | |
| "learning_rate": 9.94238007628687e-05, | |
| "loss": 1.0917, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.8837121923727222, | |
| "grad_norm": 0.2502713203430176, | |
| "learning_rate": 9.940376562372482e-05, | |
| "loss": 1.0638, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.8987413112906255, | |
| "grad_norm": 0.2549043297767639, | |
| "learning_rate": 9.93833901772945e-05, | |
| "loss": 1.0438, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.9137704302085288, | |
| "grad_norm": 0.26013997197151184, | |
| "learning_rate": 9.936267456392971e-05, | |
| "loss": 1.0759, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.9287995491264325, | |
| "grad_norm": 0.29080161452293396, | |
| "learning_rate": 9.934161892632547e-05, | |
| "loss": 1.0387, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.9438286680443357, | |
| "grad_norm": 0.27860552072525024, | |
| "learning_rate": 9.932022340951909e-05, | |
| "loss": 1.0339, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.9588577869622394, | |
| "grad_norm": 0.25391969084739685, | |
| "learning_rate": 9.929848816088897e-05, | |
| "loss": 1.0503, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.9738869058801427, | |
| "grad_norm": 0.2683584690093994, | |
| "learning_rate": 9.927641333015377e-05, | |
| "loss": 1.0617, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.988916024798046, | |
| "grad_norm": 0.29328426718711853, | |
| "learning_rate": 9.925399906937123e-05, | |
| "loss": 1.068, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 3.003005823783581, | |
| "grad_norm": 0.26925235986709595, | |
| "learning_rate": 9.923124553293718e-05, | |
| "loss": 1.0641, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.018034942701484, | |
| "grad_norm": 0.2933187186717987, | |
| "learning_rate": 9.920815287758451e-05, | |
| "loss": 1.0264, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 3.0330640616193874, | |
| "grad_norm": 0.30965468287467957, | |
| "learning_rate": 9.918472126238206e-05, | |
| "loss": 1.0154, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 3.048093180537291, | |
| "grad_norm": 0.3275061547756195, | |
| "learning_rate": 9.916095084873347e-05, | |
| "loss": 0.9905, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 3.0631222994551943, | |
| "grad_norm": 0.40177953243255615, | |
| "learning_rate": 9.913684180037619e-05, | |
| "loss": 1.0066, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 3.078151418373098, | |
| "grad_norm": 0.389649361371994, | |
| "learning_rate": 9.911239428338023e-05, | |
| "loss": 1.0424, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 3.0931805372910013, | |
| "grad_norm": 0.3205302953720093, | |
| "learning_rate": 9.908760846614709e-05, | |
| "loss": 1.0234, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 3.1082096562089045, | |
| "grad_norm": 0.3212546408176422, | |
| "learning_rate": 9.906248451940861e-05, | |
| "loss": 1.0075, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 3.1232387751268083, | |
| "grad_norm": 0.33269983530044556, | |
| "learning_rate": 9.903702261622567e-05, | |
| "loss": 1.0039, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 3.1382678940447115, | |
| "grad_norm": 0.34872928261756897, | |
| "learning_rate": 9.901122293198719e-05, | |
| "loss": 0.9952, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 3.153297012962615, | |
| "grad_norm": 0.348037987947464, | |
| "learning_rate": 9.898508564440879e-05, | |
| "loss": 1.0133, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.1683261318805185, | |
| "grad_norm": 0.3966461420059204, | |
| "learning_rate": 9.895861093353158e-05, | |
| "loss": 1.0049, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 3.1833552507984217, | |
| "grad_norm": 0.3553076684474945, | |
| "learning_rate": 9.893179898172095e-05, | |
| "loss": 0.9789, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 3.1983843697163254, | |
| "grad_norm": 0.38464319705963135, | |
| "learning_rate": 9.890464997366529e-05, | |
| "loss": 1.0062, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 3.2134134886342287, | |
| "grad_norm": 0.3749645948410034, | |
| "learning_rate": 9.887716409637478e-05, | |
| "loss": 1.0364, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 3.2284426075521324, | |
| "grad_norm": 0.3553982675075531, | |
| "learning_rate": 9.884934153917997e-05, | |
| "loss": 0.9896, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 3.2434717264700357, | |
| "grad_norm": 0.34840455651283264, | |
| "learning_rate": 9.882118249373063e-05, | |
| "loss": 0.9954, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 3.258500845387939, | |
| "grad_norm": 0.34040772914886475, | |
| "learning_rate": 9.879268715399432e-05, | |
| "loss": 1.0224, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 3.2735299643058426, | |
| "grad_norm": 0.37151041626930237, | |
| "learning_rate": 9.87638557162551e-05, | |
| "loss": 0.9864, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 3.288559083223746, | |
| "grad_norm": 0.34764307737350464, | |
| "learning_rate": 9.87346883791122e-05, | |
| "loss": 1.0121, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 3.3035882021416496, | |
| "grad_norm": 0.3537833094596863, | |
| "learning_rate": 9.870518534347853e-05, | |
| "loss": 0.9952, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.318617321059553, | |
| "grad_norm": 0.3364524245262146, | |
| "learning_rate": 9.867534681257951e-05, | |
| "loss": 1.0383, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 3.333646439977456, | |
| "grad_norm": 0.33494752645492554, | |
| "learning_rate": 9.864517299195144e-05, | |
| "loss": 1.0318, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 3.34867555889536, | |
| "grad_norm": 0.31135261058807373, | |
| "learning_rate": 9.861466408944027e-05, | |
| "loss": 0.9749, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 3.363704677813263, | |
| "grad_norm": 0.36317843198776245, | |
| "learning_rate": 9.858382031520005e-05, | |
| "loss": 1.0232, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 3.378733796731167, | |
| "grad_norm": 0.346181720495224, | |
| "learning_rate": 9.855264188169152e-05, | |
| "loss": 1.0099, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 3.39376291564907, | |
| "grad_norm": 0.35162779688835144, | |
| "learning_rate": 9.852112900368066e-05, | |
| "loss": 1.0128, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 3.4087920345669733, | |
| "grad_norm": 0.3490872383117676, | |
| "learning_rate": 9.848928189823723e-05, | |
| "loss": 1.0, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 3.423821153484877, | |
| "grad_norm": 0.3363298177719116, | |
| "learning_rate": 9.845710078473316e-05, | |
| "loss": 1.0171, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 3.4388502724027803, | |
| "grad_norm": 0.323453813791275, | |
| "learning_rate": 9.842458588484123e-05, | |
| "loss": 0.9908, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 3.453879391320684, | |
| "grad_norm": 0.3421192765235901, | |
| "learning_rate": 9.839173742253334e-05, | |
| "loss": 1.0134, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.4689085102385873, | |
| "grad_norm": 0.33773696422576904, | |
| "learning_rate": 9.835855562407912e-05, | |
| "loss": 0.9938, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 3.4839376291564905, | |
| "grad_norm": 0.34854745864868164, | |
| "learning_rate": 9.83250407180443e-05, | |
| "loss": 0.9922, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 3.4989667480743942, | |
| "grad_norm": 0.35300213098526, | |
| "learning_rate": 9.829119293528916e-05, | |
| "loss": 1.0067, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 3.5139958669922975, | |
| "grad_norm": 0.34796491265296936, | |
| "learning_rate": 9.82570125089669e-05, | |
| "loss": 1.0133, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 3.529024985910201, | |
| "grad_norm": 0.35767292976379395, | |
| "learning_rate": 9.822249967452213e-05, | |
| "loss": 1.0187, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 3.5440541048281045, | |
| "grad_norm": 0.3610760569572449, | |
| "learning_rate": 9.818765466968909e-05, | |
| "loss": 1.0044, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 3.5590832237460077, | |
| "grad_norm": 0.3299923241138458, | |
| "learning_rate": 9.815247773449018e-05, | |
| "loss": 0.9999, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 3.5741123426639114, | |
| "grad_norm": 0.27984675765037537, | |
| "learning_rate": 9.81169691112342e-05, | |
| "loss": 0.9758, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 3.5891414615818147, | |
| "grad_norm": 0.30341655015945435, | |
| "learning_rate": 9.80811290445147e-05, | |
| "loss": 1.0024, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 3.6041705804997184, | |
| "grad_norm": 0.33460941910743713, | |
| "learning_rate": 9.804495778120833e-05, | |
| "loss": 1.0167, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.6191996994176217, | |
| "grad_norm": 0.33041292428970337, | |
| "learning_rate": 9.800845557047314e-05, | |
| "loss": 1.0108, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 3.634228818335525, | |
| "grad_norm": 0.304404079914093, | |
| "learning_rate": 9.797162266374676e-05, | |
| "loss": 1.0052, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 3.6492579372534286, | |
| "grad_norm": 0.3226507008075714, | |
| "learning_rate": 9.793445931474485e-05, | |
| "loss": 1.0087, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 3.664287056171332, | |
| "grad_norm": 0.3016469180583954, | |
| "learning_rate": 9.789696577945917e-05, | |
| "loss": 1.0068, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 3.6793161750892356, | |
| "grad_norm": 0.317958265542984, | |
| "learning_rate": 9.785914231615594e-05, | |
| "loss": 1.0256, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 3.694345294007139, | |
| "grad_norm": 0.3319275677204132, | |
| "learning_rate": 9.782098918537399e-05, | |
| "loss": 0.9882, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 3.709374412925042, | |
| "grad_norm": 0.34686529636383057, | |
| "learning_rate": 9.778250664992304e-05, | |
| "loss": 1.0071, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 3.724403531842946, | |
| "grad_norm": 0.36334285140037537, | |
| "learning_rate": 9.77436949748818e-05, | |
| "loss": 1.0086, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 3.739432650760849, | |
| "grad_norm": 0.36445969343185425, | |
| "learning_rate": 9.770455442759621e-05, | |
| "loss": 1.0285, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 3.754461769678753, | |
| "grad_norm": 0.32181107997894287, | |
| "learning_rate": 9.766508527767757e-05, | |
| "loss": 1.0374, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.769490888596656, | |
| "grad_norm": 0.371354341506958, | |
| "learning_rate": 9.762528779700067e-05, | |
| "loss": 1.0192, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 3.7845200075145593, | |
| "grad_norm": 0.3308964669704437, | |
| "learning_rate": 9.758516225970198e-05, | |
| "loss": 1.0117, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 3.799549126432463, | |
| "grad_norm": 0.35072851181030273, | |
| "learning_rate": 9.754470894217767e-05, | |
| "loss": 1.02, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 3.8145782453503663, | |
| "grad_norm": 0.3249657452106476, | |
| "learning_rate": 9.750392812308178e-05, | |
| "loss": 1.0205, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 3.82960736426827, | |
| "grad_norm": 0.3178282380104065, | |
| "learning_rate": 9.74628200833243e-05, | |
| "loss": 1.0244, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 3.8446364831861732, | |
| "grad_norm": 0.3914138674736023, | |
| "learning_rate": 9.742138510606915e-05, | |
| "loss": 1.0201, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 3.8596656021040765, | |
| "grad_norm": 0.3437259793281555, | |
| "learning_rate": 9.737962347673231e-05, | |
| "loss": 1.0067, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 3.87469472102198, | |
| "grad_norm": 0.3310168385505676, | |
| "learning_rate": 9.733753548297988e-05, | |
| "loss": 1.0215, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 3.8897238399398835, | |
| "grad_norm": 0.35641738772392273, | |
| "learning_rate": 9.729512141472599e-05, | |
| "loss": 1.0181, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 3.904752958857787, | |
| "grad_norm": 0.36426904797554016, | |
| "learning_rate": 9.725238156413089e-05, | |
| "loss": 1.0174, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.9197820777756904, | |
| "grad_norm": 0.3366813659667969, | |
| "learning_rate": 9.720931622559893e-05, | |
| "loss": 1.0126, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 3.9348111966935937, | |
| "grad_norm": 0.3486657440662384, | |
| "learning_rate": 9.716592569577646e-05, | |
| "loss": 1.0161, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 3.9498403156114974, | |
| "grad_norm": 0.3317498564720154, | |
| "learning_rate": 9.712221027354991e-05, | |
| "loss": 1.0171, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 3.9648694345294007, | |
| "grad_norm": 0.3477359712123871, | |
| "learning_rate": 9.707817026004362e-05, | |
| "loss": 1.0195, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 3.9798985534473044, | |
| "grad_norm": 0.30774736404418945, | |
| "learning_rate": 9.70338059586178e-05, | |
| "loss": 1.0261, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 3.9949276723652076, | |
| "grad_norm": 0.38554686307907104, | |
| "learning_rate": 9.698911767486649e-05, | |
| "loss": 1.0376, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 4.0090174713507425, | |
| "grad_norm": 0.40208327770233154, | |
| "learning_rate": 9.694410571661537e-05, | |
| "loss": 0.9654, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 4.024046590268646, | |
| "grad_norm": 0.4230579733848572, | |
| "learning_rate": 9.689877039391968e-05, | |
| "loss": 0.9452, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 4.039075709186549, | |
| "grad_norm": 0.4582759439945221, | |
| "learning_rate": 9.685311201906215e-05, | |
| "loss": 0.9308, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 4.054104828104452, | |
| "grad_norm": 0.4000380337238312, | |
| "learning_rate": 9.680713090655072e-05, | |
| "loss": 0.9203, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 4.0691339470223555, | |
| "grad_norm": 0.3987461030483246, | |
| "learning_rate": 9.676082737311645e-05, | |
| "loss": 0.9427, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 4.08416306594026, | |
| "grad_norm": 0.4363115429878235, | |
| "learning_rate": 9.671420173771136e-05, | |
| "loss": 0.9249, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 4.099192184858163, | |
| "grad_norm": 0.39811596274375916, | |
| "learning_rate": 9.666725432150616e-05, | |
| "loss": 0.9205, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 4.114221303776066, | |
| "grad_norm": 0.4178659915924072, | |
| "learning_rate": 9.661998544788813e-05, | |
| "loss": 0.927, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 4.1292504226939695, | |
| "grad_norm": 0.43525931239128113, | |
| "learning_rate": 9.657239544245876e-05, | |
| "loss": 0.9172, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 4.144279541611873, | |
| "grad_norm": 0.38502469658851624, | |
| "learning_rate": 9.652448463303168e-05, | |
| "loss": 0.9331, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 4.159308660529776, | |
| "grad_norm": 0.50247722864151, | |
| "learning_rate": 9.647625334963024e-05, | |
| "loss": 0.9558, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 4.17433777944768, | |
| "grad_norm": 0.4176265597343445, | |
| "learning_rate": 9.642770192448536e-05, | |
| "loss": 0.9374, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 4.189366898365583, | |
| "grad_norm": 0.4144188463687897, | |
| "learning_rate": 9.637883069203314e-05, | |
| "loss": 0.9119, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 4.204396017283487, | |
| "grad_norm": 0.4362613558769226, | |
| "learning_rate": 9.632963998891262e-05, | |
| "loss": 0.928, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.21942513620139, | |
| "grad_norm": 0.45967820286750793, | |
| "learning_rate": 9.628013015396346e-05, | |
| "loss": 0.9398, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 4.234454255119294, | |
| "grad_norm": 0.4533185660839081, | |
| "learning_rate": 9.62303015282236e-05, | |
| "loss": 0.9586, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 4.249483374037197, | |
| "grad_norm": 0.438513845205307, | |
| "learning_rate": 9.618015445492688e-05, | |
| "loss": 0.9469, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 4.264512492955101, | |
| "grad_norm": 0.45950812101364136, | |
| "learning_rate": 9.612968927950065e-05, | |
| "loss": 0.9438, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 4.279541611873004, | |
| "grad_norm": 0.42663341760635376, | |
| "learning_rate": 9.607890634956355e-05, | |
| "loss": 0.9461, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 4.294570730790907, | |
| "grad_norm": 0.4346635043621063, | |
| "learning_rate": 9.602780601492294e-05, | |
| "loss": 0.9323, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 4.30959984970881, | |
| "grad_norm": 0.4921177327632904, | |
| "learning_rate": 9.597638862757255e-05, | |
| "loss": 0.9337, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 4.3246289686267145, | |
| "grad_norm": 0.39174574613571167, | |
| "learning_rate": 9.592465454169004e-05, | |
| "loss": 0.938, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 4.339658087544618, | |
| "grad_norm": 0.40984979271888733, | |
| "learning_rate": 9.587260411363465e-05, | |
| "loss": 0.9461, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 4.354687206462521, | |
| "grad_norm": 0.37494781613349915, | |
| "learning_rate": 9.582023770194461e-05, | |
| "loss": 0.9407, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 4.369716325380424, | |
| "grad_norm": 0.35851216316223145, | |
| "learning_rate": 9.57675556673348e-05, | |
| "loss": 0.9285, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 4.3847454442983285, | |
| "grad_norm": 0.37766364216804504, | |
| "learning_rate": 9.571455837269411e-05, | |
| "loss": 0.9268, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 4.399774563216232, | |
| "grad_norm": 0.45168834924697876, | |
| "learning_rate": 9.566124618308312e-05, | |
| "loss": 0.9593, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 4.414803682134135, | |
| "grad_norm": 0.43097320199012756, | |
| "learning_rate": 9.560761946573143e-05, | |
| "loss": 0.9537, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 4.429832801052038, | |
| "grad_norm": 0.415606826543808, | |
| "learning_rate": 9.555367859003525e-05, | |
| "loss": 0.929, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 4.4448619199699415, | |
| "grad_norm": 0.3891099989414215, | |
| "learning_rate": 9.54994239275548e-05, | |
| "loss": 0.9103, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 4.459891038887845, | |
| "grad_norm": 0.3769884705543518, | |
| "learning_rate": 9.544485585201169e-05, | |
| "loss": 0.9234, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 4.474920157805749, | |
| "grad_norm": 0.46022331714630127, | |
| "learning_rate": 9.538997473928647e-05, | |
| "loss": 0.9734, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 4.489949276723652, | |
| "grad_norm": 0.36743420362472534, | |
| "learning_rate": 9.533478096741597e-05, | |
| "loss": 0.9025, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 4.5049783956415554, | |
| "grad_norm": 0.4562210738658905, | |
| "learning_rate": 9.527927491659068e-05, | |
| "loss": 0.9444, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.520007514559459, | |
| "grad_norm": 0.4317024052143097, | |
| "learning_rate": 9.522345696915218e-05, | |
| "loss": 0.9301, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 4.535036633477363, | |
| "grad_norm": 0.43993476033210754, | |
| "learning_rate": 9.51673275095905e-05, | |
| "loss": 0.9425, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 4.550065752395266, | |
| "grad_norm": 0.34426409006118774, | |
| "learning_rate": 9.51108869245414e-05, | |
| "loss": 0.9348, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 4.565094871313169, | |
| "grad_norm": 0.44477733969688416, | |
| "learning_rate": 9.505413560278382e-05, | |
| "loss": 0.9295, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 4.580123990231073, | |
| "grad_norm": 0.4211689829826355, | |
| "learning_rate": 9.49970739352371e-05, | |
| "loss": 0.933, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 4.595153109148976, | |
| "grad_norm": 0.45019835233688354, | |
| "learning_rate": 9.493970231495835e-05, | |
| "loss": 0.9471, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 4.610182228066879, | |
| "grad_norm": 0.42713072896003723, | |
| "learning_rate": 9.488202113713973e-05, | |
| "loss": 0.953, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 4.625211346984783, | |
| "grad_norm": 0.41138195991516113, | |
| "learning_rate": 9.482403079910571e-05, | |
| "loss": 0.9398, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 4.640240465902687, | |
| "grad_norm": 0.42336663603782654, | |
| "learning_rate": 9.476573170031035e-05, | |
| "loss": 0.9342, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 4.65526958482059, | |
| "grad_norm": 0.4236120581626892, | |
| "learning_rate": 9.470712424233452e-05, | |
| "loss": 0.9306, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 4.670298703738493, | |
| "grad_norm": 0.47870710492134094, | |
| "learning_rate": 9.464820882888319e-05, | |
| "loss": 0.9763, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 4.685327822656397, | |
| "grad_norm": 0.44699183106422424, | |
| "learning_rate": 9.45889858657826e-05, | |
| "loss": 0.9479, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 4.7003569415743005, | |
| "grad_norm": 0.41658318042755127, | |
| "learning_rate": 9.452945576097748e-05, | |
| "loss": 0.9381, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 4.715386060492204, | |
| "grad_norm": 0.42650163173675537, | |
| "learning_rate": 9.446961892452824e-05, | |
| "loss": 0.9333, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 4.730415179410107, | |
| "grad_norm": 0.4480834901332855, | |
| "learning_rate": 9.440947576860814e-05, | |
| "loss": 0.9349, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 4.74544429832801, | |
| "grad_norm": 0.41825857758522034, | |
| "learning_rate": 9.434902670750047e-05, | |
| "loss": 0.9768, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 4.7604734172459136, | |
| "grad_norm": 0.38604798913002014, | |
| "learning_rate": 9.428827215759568e-05, | |
| "loss": 0.9374, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 4.775502536163818, | |
| "grad_norm": 0.43158042430877686, | |
| "learning_rate": 9.42272125373885e-05, | |
| "loss": 0.942, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 4.790531655081721, | |
| "grad_norm": 0.4181406497955322, | |
| "learning_rate": 9.416584826747509e-05, | |
| "loss": 0.9427, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 4.805560773999624, | |
| "grad_norm": 0.42289501428604126, | |
| "learning_rate": 9.410417977055011e-05, | |
| "loss": 0.9731, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 4.8205898929175275, | |
| "grad_norm": 0.42214304208755493, | |
| "learning_rate": 9.404220747140382e-05, | |
| "loss": 0.9236, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 4.835619011835432, | |
| "grad_norm": 0.4040350019931793, | |
| "learning_rate": 9.397993179691917e-05, | |
| "loss": 0.9478, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 4.850648130753335, | |
| "grad_norm": 0.40848028659820557, | |
| "learning_rate": 9.391735317606885e-05, | |
| "loss": 0.955, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 4.865677249671238, | |
| "grad_norm": 0.46537673473358154, | |
| "learning_rate": 9.385447203991231e-05, | |
| "loss": 0.9618, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 4.880706368589141, | |
| "grad_norm": 0.419888973236084, | |
| "learning_rate": 9.379128882159283e-05, | |
| "loss": 0.9686, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 4.895735487507045, | |
| "grad_norm": 0.3668920397758484, | |
| "learning_rate": 9.372780395633451e-05, | |
| "loss": 0.9389, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 4.910764606424948, | |
| "grad_norm": 0.3719962239265442, | |
| "learning_rate": 9.36640178814393e-05, | |
| "loss": 0.9546, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 4.925793725342852, | |
| "grad_norm": 0.3528194725513458, | |
| "learning_rate": 9.359993103628393e-05, | |
| "loss": 0.9492, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 4.940822844260755, | |
| "grad_norm": 0.4485328495502472, | |
| "learning_rate": 9.353554386231695e-05, | |
| "loss": 0.9555, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 4.955851963178659, | |
| "grad_norm": 0.4136585593223572, | |
| "learning_rate": 9.347085680305565e-05, | |
| "loss": 0.9383, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 4.970881082096562, | |
| "grad_norm": 0.4350145757198334, | |
| "learning_rate": 9.340587030408304e-05, | |
| "loss": 0.9432, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 4.985910201014466, | |
| "grad_norm": 0.5096591114997864, | |
| "learning_rate": 9.334058481304471e-05, | |
| "loss": 0.9451, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.6608612537384033, | |
| "learning_rate": 9.327500077964584e-05, | |
| "loss": 0.935, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 5.015029118917903, | |
| "grad_norm": 0.4970506429672241, | |
| "learning_rate": 9.320911865564802e-05, | |
| "loss": 0.8215, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 5.0300582378358065, | |
| "grad_norm": 0.4373551607131958, | |
| "learning_rate": 9.314293889486619e-05, | |
| "loss": 0.8335, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 5.045087356753711, | |
| "grad_norm": 0.47342097759246826, | |
| "learning_rate": 9.30764619531655e-05, | |
| "loss": 0.8232, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 5.060116475671614, | |
| "grad_norm": 0.4043892025947571, | |
| "learning_rate": 9.300968828845817e-05, | |
| "loss": 0.8394, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 5.075145594589517, | |
| "grad_norm": 0.5077358484268188, | |
| "learning_rate": 9.294261836070032e-05, | |
| "loss": 0.8202, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 5.0901747135074205, | |
| "grad_norm": 0.5389407277107239, | |
| "learning_rate": 9.28752526318888e-05, | |
| "loss": 0.812, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 5.105203832425324, | |
| "grad_norm": 0.5698477625846863, | |
| "learning_rate": 9.28075915660581e-05, | |
| "loss": 0.8424, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 5.120232951343228, | |
| "grad_norm": 0.47804853320121765, | |
| "learning_rate": 9.273963562927695e-05, | |
| "loss": 0.8513, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 5.135262070261131, | |
| "grad_norm": 0.5664450526237488, | |
| "learning_rate": 9.267138528964536e-05, | |
| "loss": 0.8276, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 5.150291189179034, | |
| "grad_norm": 0.5398600697517395, | |
| "learning_rate": 9.260284101729116e-05, | |
| "loss": 0.8398, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 5.165320308096938, | |
| "grad_norm": 0.5055420398712158, | |
| "learning_rate": 9.253400328436699e-05, | |
| "loss": 0.8297, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 5.180349427014841, | |
| "grad_norm": 0.4511585831642151, | |
| "learning_rate": 9.246487256504682e-05, | |
| "loss": 0.8141, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 5.195378545932745, | |
| "grad_norm": 0.5470993518829346, | |
| "learning_rate": 9.239544933552286e-05, | |
| "loss": 0.8434, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 5.210407664850648, | |
| "grad_norm": 0.4637773036956787, | |
| "learning_rate": 9.232573407400221e-05, | |
| "loss": 0.8497, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 5.225436783768552, | |
| "grad_norm": 0.4901561141014099, | |
| "learning_rate": 9.225572726070354e-05, | |
| "loss": 0.8361, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 5.240465902686455, | |
| "grad_norm": 0.531245231628418, | |
| "learning_rate": 9.218542937785384e-05, | |
| "loss": 0.8506, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 5.255495021604358, | |
| "grad_norm": 0.5206908583641052, | |
| "learning_rate": 9.211484090968506e-05, | |
| "loss": 0.8347, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 5.270524140522262, | |
| "grad_norm": 0.5049258470535278, | |
| "learning_rate": 9.204396234243076e-05, | |
| "loss": 0.8383, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 5.2855532594401655, | |
| "grad_norm": 0.5462550520896912, | |
| "learning_rate": 9.197279416432284e-05, | |
| "loss": 0.8301, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 5.300582378358069, | |
| "grad_norm": 0.5243920683860779, | |
| "learning_rate": 9.190133686558808e-05, | |
| "loss": 0.8392, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 5.315611497275972, | |
| "grad_norm": 0.5010761618614197, | |
| "learning_rate": 9.182959093844483e-05, | |
| "loss": 0.8215, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 5.330640616193875, | |
| "grad_norm": 0.5377451181411743, | |
| "learning_rate": 9.175755687709956e-05, | |
| "loss": 0.8311, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 5.3456697351117795, | |
| "grad_norm": 0.5271348357200623, | |
| "learning_rate": 9.168523517774356e-05, | |
| "loss": 0.8266, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 5.360698854029683, | |
| "grad_norm": 0.48982876539230347, | |
| "learning_rate": 9.161262633854935e-05, | |
| "loss": 0.8571, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 5.375727972947586, | |
| "grad_norm": 0.5555334687232971, | |
| "learning_rate": 9.153973085966746e-05, | |
| "loss": 0.8414, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 5.390757091865489, | |
| "grad_norm": 0.5088291764259338, | |
| "learning_rate": 9.146654924322277e-05, | |
| "loss": 0.8541, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 5.4057862107833925, | |
| "grad_norm": 0.6044062376022339, | |
| "learning_rate": 9.139308199331125e-05, | |
| "loss": 0.8553, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 5.420815329701297, | |
| "grad_norm": 0.549253523349762, | |
| "learning_rate": 9.131932961599636e-05, | |
| "loss": 0.8303, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 5.4358444486192, | |
| "grad_norm": 0.5907899737358093, | |
| "learning_rate": 9.124529261930559e-05, | |
| "loss": 0.8264, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 5.450873567537103, | |
| "grad_norm": 0.5540890097618103, | |
| "learning_rate": 9.117097151322697e-05, | |
| "loss": 0.8292, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 5.465902686455006, | |
| "grad_norm": 0.5545858144760132, | |
| "learning_rate": 9.109636680970557e-05, | |
| "loss": 0.8382, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 5.48093180537291, | |
| "grad_norm": 0.5407220721244812, | |
| "learning_rate": 9.102147902263995e-05, | |
| "loss": 0.863, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 5.495960924290814, | |
| "grad_norm": 0.5022987723350525, | |
| "learning_rate": 9.094630866787863e-05, | |
| "loss": 0.8624, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 5.510990043208717, | |
| "grad_norm": 0.5069270730018616, | |
| "learning_rate": 9.087085626321657e-05, | |
| "loss": 0.8494, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 5.52601916212662, | |
| "grad_norm": 0.586992621421814, | |
| "learning_rate": 9.07951223283915e-05, | |
| "loss": 0.8708, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 5.541048281044524, | |
| "grad_norm": 0.48386263847351074, | |
| "learning_rate": 9.071910738508048e-05, | |
| "loss": 0.8327, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 5.556077399962427, | |
| "grad_norm": 0.5556206703186035, | |
| "learning_rate": 9.064281195689621e-05, | |
| "loss": 0.8506, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 5.571106518880331, | |
| "grad_norm": 0.4873793423175812, | |
| "learning_rate": 9.056623656938344e-05, | |
| "loss": 0.8314, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 5.586135637798234, | |
| "grad_norm": 0.5752863883972168, | |
| "learning_rate": 9.048938175001535e-05, | |
| "loss": 0.8559, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 5.601164756716138, | |
| "grad_norm": 0.5001512765884399, | |
| "learning_rate": 9.041224802818999e-05, | |
| "loss": 0.8517, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 5.616193875634041, | |
| "grad_norm": 0.5640326142311096, | |
| "learning_rate": 9.033483593522651e-05, | |
| "loss": 0.8471, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 5.631222994551944, | |
| "grad_norm": 0.544611930847168, | |
| "learning_rate": 9.025714600436157e-05, | |
| "loss": 0.8314, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 5.646252113469847, | |
| "grad_norm": 0.5598495602607727, | |
| "learning_rate": 9.017917877074565e-05, | |
| "loss": 0.8454, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 5.6612812323877515, | |
| "grad_norm": 0.6049039959907532, | |
| "learning_rate": 9.010093477143942e-05, | |
| "loss": 0.8376, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 5.676310351305655, | |
| "grad_norm": 0.5953666567802429, | |
| "learning_rate": 9.002241454540992e-05, | |
| "loss": 0.8655, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 5.691339470223558, | |
| "grad_norm": 0.5012089610099792, | |
| "learning_rate": 8.994361863352696e-05, | |
| "loss": 0.8556, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 5.706368589141461, | |
| "grad_norm": 0.5770487189292908, | |
| "learning_rate": 8.986454757855938e-05, | |
| "loss": 0.8613, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 5.721397708059365, | |
| "grad_norm": 0.5475596189498901, | |
| "learning_rate": 8.978520192517121e-05, | |
| "loss": 0.8689, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 5.736426826977269, | |
| "grad_norm": 0.4748040437698364, | |
| "learning_rate": 8.970558221991807e-05, | |
| "loss": 0.8444, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 5.751455945895172, | |
| "grad_norm": 0.5324169993400574, | |
| "learning_rate": 8.962568901124327e-05, | |
| "loss": 0.8642, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 5.766485064813075, | |
| "grad_norm": 0.5375658869743347, | |
| "learning_rate": 8.954552284947411e-05, | |
| "loss": 0.8528, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 5.7815141837309785, | |
| "grad_norm": 0.5448617339134216, | |
| "learning_rate": 8.946508428681807e-05, | |
| "loss": 0.8394, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 5.796543302648882, | |
| "grad_norm": 0.5199793577194214, | |
| "learning_rate": 8.938437387735903e-05, | |
| "loss": 0.8615, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 5.811572421566786, | |
| "grad_norm": 0.5268539190292358, | |
| "learning_rate": 8.930339217705337e-05, | |
| "loss": 0.8661, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 5.826601540484689, | |
| "grad_norm": 0.5181281566619873, | |
| "learning_rate": 8.922213974372628e-05, | |
| "loss": 0.8643, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 5.841630659402592, | |
| "grad_norm": 0.5384554862976074, | |
| "learning_rate": 8.914061713706776e-05, | |
| "loss": 0.8355, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 5.856659778320496, | |
| "grad_norm": 0.5838069319725037, | |
| "learning_rate": 8.905882491862888e-05, | |
| "loss": 0.8723, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 5.8716888972384, | |
| "grad_norm": 0.5165135860443115, | |
| "learning_rate": 8.897676365181784e-05, | |
| "loss": 0.8298, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 5.886718016156303, | |
| "grad_norm": 0.5289579033851624, | |
| "learning_rate": 8.889443390189618e-05, | |
| "loss": 0.8664, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 5.901747135074206, | |
| "grad_norm": 0.4891420304775238, | |
| "learning_rate": 8.88118362359748e-05, | |
| "loss": 0.8503, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 5.91677625399211, | |
| "grad_norm": 0.49529027938842773, | |
| "learning_rate": 8.872897122301004e-05, | |
| "loss": 0.8497, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 5.931805372910013, | |
| "grad_norm": 0.6124776601791382, | |
| "learning_rate": 8.864583943379987e-05, | |
| "loss": 0.8829, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 5.946834491827916, | |
| "grad_norm": 0.5730892419815063, | |
| "learning_rate": 8.856244144097988e-05, | |
| "loss": 0.8372, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 5.96186361074582, | |
| "grad_norm": 0.5806572437286377, | |
| "learning_rate": 8.847877781901928e-05, | |
| "loss": 0.8661, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 5.9768927296637235, | |
| "grad_norm": 0.5184414386749268, | |
| "learning_rate": 8.83948491442171e-05, | |
| "loss": 0.8747, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 5.991921848581627, | |
| "grad_norm": 0.5810568332672119, | |
| "learning_rate": 8.831065599469806e-05, | |
| "loss": 0.8747, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 6.006011647567162, | |
| "grad_norm": 0.5326306819915771, | |
| "learning_rate": 8.822619895040868e-05, | |
| "loss": 0.7988, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 6.021040766485065, | |
| "grad_norm": 0.5372363924980164, | |
| "learning_rate": 8.814147859311332e-05, | |
| "loss": 0.712, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 6.036069885402968, | |
| "grad_norm": 0.6200835108757019, | |
| "learning_rate": 8.805649550639004e-05, | |
| "loss": 0.7213, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 6.051099004320871, | |
| "grad_norm": 0.5874983072280884, | |
| "learning_rate": 8.797125027562665e-05, | |
| "loss": 0.7096, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 6.066128123238775, | |
| "grad_norm": 0.6422827243804932, | |
| "learning_rate": 8.788574348801675e-05, | |
| "loss": 0.7223, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 6.081157242156679, | |
| "grad_norm": 0.641160786151886, | |
| "learning_rate": 8.779997573255553e-05, | |
| "loss": 0.7231, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 6.096186361074582, | |
| "grad_norm": 0.7293818593025208, | |
| "learning_rate": 8.771394760003593e-05, | |
| "loss": 0.7092, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 6.111215479992485, | |
| "grad_norm": 0.60944664478302, | |
| "learning_rate": 8.762765968304431e-05, | |
| "loss": 0.7203, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 6.126244598910389, | |
| "grad_norm": 0.6189725399017334, | |
| "learning_rate": 8.754111257595657e-05, | |
| "loss": 0.7136, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 6.141273717828292, | |
| "grad_norm": 0.6322532296180725, | |
| "learning_rate": 8.745430687493396e-05, | |
| "loss": 0.7382, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 6.156302836746196, | |
| "grad_norm": 0.6236686706542969, | |
| "learning_rate": 8.736724317791902e-05, | |
| "loss": 0.7221, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 6.171331955664099, | |
| "grad_norm": 0.5708134174346924, | |
| "learning_rate": 8.727992208463143e-05, | |
| "loss": 0.7205, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 6.186361074582003, | |
| "grad_norm": 0.6412458419799805, | |
| "learning_rate": 8.719234419656387e-05, | |
| "loss": 0.7306, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 6.201390193499906, | |
| "grad_norm": 0.6535741686820984, | |
| "learning_rate": 8.710451011697793e-05, | |
| "loss": 0.7169, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 6.216419312417809, | |
| "grad_norm": 0.6490382552146912, | |
| "learning_rate": 8.701642045089992e-05, | |
| "loss": 0.7145, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 6.231448431335713, | |
| "grad_norm": 0.7014051079750061, | |
| "learning_rate": 8.692807580511667e-05, | |
| "loss": 0.7569, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 6.2464775502536165, | |
| "grad_norm": 0.7195674180984497, | |
| "learning_rate": 8.683947678817139e-05, | |
| "loss": 0.7244, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 6.26150666917152, | |
| "grad_norm": 0.6836762428283691, | |
| "learning_rate": 8.675062401035952e-05, | |
| "loss": 0.7303, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 6.276535788089423, | |
| "grad_norm": 0.6135929822921753, | |
| "learning_rate": 8.666151808372439e-05, | |
| "loss": 0.7179, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 6.291564907007326, | |
| "grad_norm": 0.6589913368225098, | |
| "learning_rate": 8.657215962205319e-05, | |
| "loss": 0.7455, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 6.30659402592523, | |
| "grad_norm": 0.6406304836273193, | |
| "learning_rate": 8.648254924087254e-05, | |
| "loss": 0.7496, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 6.321623144843134, | |
| "grad_norm": 0.6410109400749207, | |
| "learning_rate": 8.639268755744447e-05, | |
| "loss": 0.7355, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 6.336652263761037, | |
| "grad_norm": 0.6654278039932251, | |
| "learning_rate": 8.630257519076196e-05, | |
| "loss": 0.7367, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 6.35168138267894, | |
| "grad_norm": 0.588206946849823, | |
| "learning_rate": 8.621221276154481e-05, | |
| "loss": 0.7255, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 6.3667105015968435, | |
| "grad_norm": 0.633627712726593, | |
| "learning_rate": 8.612160089223529e-05, | |
| "loss": 0.7248, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 6.381739620514748, | |
| "grad_norm": 0.6771560311317444, | |
| "learning_rate": 8.603074020699393e-05, | |
| "loss": 0.7393, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 6.396768739432651, | |
| "grad_norm": 0.682534396648407, | |
| "learning_rate": 8.593963133169514e-05, | |
| "loss": 0.7406, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 6.411797858350554, | |
| "grad_norm": 0.6308305859565735, | |
| "learning_rate": 8.584827489392293e-05, | |
| "loss": 0.751, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 6.426826977268457, | |
| "grad_norm": 0.7026039958000183, | |
| "learning_rate": 8.575667152296665e-05, | |
| "loss": 0.7335, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 6.441856096186361, | |
| "grad_norm": 0.6078832149505615, | |
| "learning_rate": 8.566482184981651e-05, | |
| "loss": 0.752, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 6.456885215104265, | |
| "grad_norm": 0.6271105408668518, | |
| "learning_rate": 8.557272650715939e-05, | |
| "loss": 0.7436, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 6.471914334022168, | |
| "grad_norm": 0.7435263991355896, | |
| "learning_rate": 8.54803861293744e-05, | |
| "loss": 0.7516, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 6.486943452940071, | |
| "grad_norm": 0.6983492970466614, | |
| "learning_rate": 8.538780135252844e-05, | |
| "loss": 0.7369, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 6.501972571857975, | |
| "grad_norm": 0.6141520738601685, | |
| "learning_rate": 8.529497281437204e-05, | |
| "loss": 0.7415, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 6.517001690775878, | |
| "grad_norm": 0.580833375453949, | |
| "learning_rate": 8.520190115433473e-05, | |
| "loss": 0.7542, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 6.532030809693782, | |
| "grad_norm": 0.6651113033294678, | |
| "learning_rate": 8.510858701352076e-05, | |
| "loss": 0.7251, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 6.547059928611685, | |
| "grad_norm": 0.676468551158905, | |
| "learning_rate": 8.501503103470466e-05, | |
| "loss": 0.7377, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 6.5620890475295885, | |
| "grad_norm": 0.6262651085853577, | |
| "learning_rate": 8.492123386232677e-05, | |
| "loss": 0.7158, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 6.577118166447492, | |
| "grad_norm": 0.7301998138427734, | |
| "learning_rate": 8.482719614248894e-05, | |
| "loss": 0.7483, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 6.592147285365395, | |
| "grad_norm": 0.602796733379364, | |
| "learning_rate": 8.473291852294987e-05, | |
| "loss": 0.7332, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 6.607176404283299, | |
| "grad_norm": 0.6329184770584106, | |
| "learning_rate": 8.463840165312082e-05, | |
| "loss": 0.7518, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 6.6222055232012025, | |
| "grad_norm": 0.7019734382629395, | |
| "learning_rate": 8.454364618406106e-05, | |
| "loss": 0.7702, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 6.637234642119106, | |
| "grad_norm": 0.6546521782875061, | |
| "learning_rate": 8.444865276847338e-05, | |
| "loss": 0.751, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 6.652263761037009, | |
| "grad_norm": 0.7014687657356262, | |
| "learning_rate": 8.435342206069965e-05, | |
| "loss": 0.7662, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 6.667292879954912, | |
| "grad_norm": 0.6677362322807312, | |
| "learning_rate": 8.425795471671625e-05, | |
| "loss": 0.74, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 6.682321998872816, | |
| "grad_norm": 0.6421080231666565, | |
| "learning_rate": 8.416225139412959e-05, | |
| "loss": 0.7491, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 6.69735111779072, | |
| "grad_norm": 0.6495652794837952, | |
| "learning_rate": 8.406631275217156e-05, | |
| "loss": 0.7612, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 6.712380236708623, | |
| "grad_norm": 0.7310630679130554, | |
| "learning_rate": 8.397013945169501e-05, | |
| "loss": 0.7475, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 6.727409355626526, | |
| "grad_norm": 0.6594589948654175, | |
| "learning_rate": 8.387373215516918e-05, | |
| "loss": 0.7295, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 6.7424384745444295, | |
| "grad_norm": 0.6998351216316223, | |
| "learning_rate": 8.377709152667512e-05, | |
| "loss": 0.756, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 6.757467593462334, | |
| "grad_norm": 0.6579599380493164, | |
| "learning_rate": 8.368021823190116e-05, | |
| "loss": 0.7256, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 6.772496712380237, | |
| "grad_norm": 0.6116402745246887, | |
| "learning_rate": 8.358311293813832e-05, | |
| "loss": 0.7358, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 6.78752583129814, | |
| "grad_norm": 0.6876879930496216, | |
| "learning_rate": 8.348577631427566e-05, | |
| "loss": 0.7568, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 6.802554950216043, | |
| "grad_norm": 0.6426005363464355, | |
| "learning_rate": 8.33882090307957e-05, | |
| "loss": 0.7563, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 6.817584069133947, | |
| "grad_norm": 0.6187247633934021, | |
| "learning_rate": 8.329041175976987e-05, | |
| "loss": 0.7367, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 6.832613188051851, | |
| "grad_norm": 0.6543039679527283, | |
| "learning_rate": 8.319238517485375e-05, | |
| "loss": 0.7577, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 6.847642306969754, | |
| "grad_norm": 0.6411317586898804, | |
| "learning_rate": 8.309412995128256e-05, | |
| "loss": 0.7614, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 6.862671425887657, | |
| "grad_norm": 0.7125687599182129, | |
| "learning_rate": 8.299564676586638e-05, | |
| "loss": 0.7572, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 6.877700544805561, | |
| "grad_norm": 0.7412214875221252, | |
| "learning_rate": 8.289693629698564e-05, | |
| "loss": 0.7724, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 6.892729663723464, | |
| "grad_norm": 0.6838482022285461, | |
| "learning_rate": 8.279799922458629e-05, | |
| "loss": 0.7428, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 6.907758782641368, | |
| "grad_norm": 0.6079447269439697, | |
| "learning_rate": 8.269883623017522e-05, | |
| "loss": 0.7515, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 6.922787901559271, | |
| "grad_norm": 0.7181859612464905, | |
| "learning_rate": 8.259944799681555e-05, | |
| "loss": 0.7472, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 6.9378170204771745, | |
| "grad_norm": 0.7185594439506531, | |
| "learning_rate": 8.249983520912187e-05, | |
| "loss": 0.7582, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 6.952846139395078, | |
| "grad_norm": 0.7397907972335815, | |
| "learning_rate": 8.239999855325563e-05, | |
| "loss": 0.7578, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 6.967875258312981, | |
| "grad_norm": 0.6544892191886902, | |
| "learning_rate": 8.229993871692028e-05, | |
| "loss": 0.7511, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 6.982904377230885, | |
| "grad_norm": 0.7269999384880066, | |
| "learning_rate": 8.219965638935662e-05, | |
| "loss": 0.7557, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 6.9979334961487885, | |
| "grad_norm": 0.7143056392669678, | |
| "learning_rate": 8.209915226133807e-05, | |
| "loss": 0.7603, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 7.012023295134322, | |
| "grad_norm": 0.740738034248352, | |
| "learning_rate": 8.199842702516583e-05, | |
| "loss": 0.6384, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 7.027052414052227, | |
| "grad_norm": 0.7142441868782043, | |
| "learning_rate": 8.189748137466417e-05, | |
| "loss": 0.6018, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 7.04208153297013, | |
| "grad_norm": 0.8026095628738403, | |
| "learning_rate": 8.179631600517565e-05, | |
| "loss": 0.6187, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 7.057110651888033, | |
| "grad_norm": 0.8209463953971863, | |
| "learning_rate": 8.169493161355633e-05, | |
| "loss": 0.6178, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 7.072139770805936, | |
| "grad_norm": 0.7156078219413757, | |
| "learning_rate": 8.159332889817088e-05, | |
| "loss": 0.6223, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 7.08716888972384, | |
| "grad_norm": 0.7837380170822144, | |
| "learning_rate": 8.149150855888794e-05, | |
| "loss": 0.603, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 7.102198008641744, | |
| "grad_norm": 0.7317357063293457, | |
| "learning_rate": 8.138947129707517e-05, | |
| "loss": 0.6183, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 7.117227127559647, | |
| "grad_norm": 0.6778579950332642, | |
| "learning_rate": 8.128721781559443e-05, | |
| "loss": 0.6123, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 7.13225624647755, | |
| "grad_norm": 0.6829363703727722, | |
| "learning_rate": 8.118474881879701e-05, | |
| "loss": 0.6111, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 7.147285365395454, | |
| "grad_norm": 0.7064921855926514, | |
| "learning_rate": 8.108206501251866e-05, | |
| "loss": 0.6142, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 7.162314484313357, | |
| "grad_norm": 0.7147718071937561, | |
| "learning_rate": 8.097916710407492e-05, | |
| "loss": 0.6128, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 7.177343603231261, | |
| "grad_norm": 0.7428337335586548, | |
| "learning_rate": 8.0876055802256e-05, | |
| "loss": 0.6087, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 7.192372722149164, | |
| "grad_norm": 0.7002803087234497, | |
| "learning_rate": 8.077273181732207e-05, | |
| "loss": 0.6421, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 7.2074018410670675, | |
| "grad_norm": 0.7221034169197083, | |
| "learning_rate": 8.066919586099834e-05, | |
| "loss": 0.6159, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 7.222430959984971, | |
| "grad_norm": 0.7155001759529114, | |
| "learning_rate": 8.056544864647015e-05, | |
| "loss": 0.6227, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 7.237460078902874, | |
| "grad_norm": 0.828462541103363, | |
| "learning_rate": 8.046149088837802e-05, | |
| "loss": 0.6249, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 7.252489197820778, | |
| "grad_norm": 0.7177339792251587, | |
| "learning_rate": 8.035732330281273e-05, | |
| "loss": 0.6205, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 7.267518316738681, | |
| "grad_norm": 0.7466073632240295, | |
| "learning_rate": 8.025294660731048e-05, | |
| "loss": 0.6225, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 7.282547435656585, | |
| "grad_norm": 0.7658254504203796, | |
| "learning_rate": 8.014836152084784e-05, | |
| "loss": 0.6259, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 7.297576554574488, | |
| "grad_norm": 0.7269898653030396, | |
| "learning_rate": 8.00435687638368e-05, | |
| "loss": 0.6228, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 7.312605673492391, | |
| "grad_norm": 0.8240427374839783, | |
| "learning_rate": 7.993856905811991e-05, | |
| "loss": 0.6242, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 7.327634792410295, | |
| "grad_norm": 0.7971922755241394, | |
| "learning_rate": 7.983336312696522e-05, | |
| "loss": 0.6272, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 7.342663911328199, | |
| "grad_norm": 0.7452378869056702, | |
| "learning_rate": 7.972795169506129e-05, | |
| "loss": 0.6214, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 7.357693030246102, | |
| "grad_norm": 0.7922284603118896, | |
| "learning_rate": 7.962233548851227e-05, | |
| "loss": 0.6257, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 7.372722149164005, | |
| "grad_norm": 0.8231662511825562, | |
| "learning_rate": 7.951651523483283e-05, | |
| "loss": 0.6288, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 7.387751268081908, | |
| "grad_norm": 0.7604002952575684, | |
| "learning_rate": 7.941049166294319e-05, | |
| "loss": 0.6416, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 7.402780386999812, | |
| "grad_norm": 0.7322626709938049, | |
| "learning_rate": 7.930426550316406e-05, | |
| "loss": 0.628, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 7.417809505917716, | |
| "grad_norm": 0.7688371539115906, | |
| "learning_rate": 7.919783748721168e-05, | |
| "loss": 0.6245, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 7.432838624835619, | |
| "grad_norm": 0.8524195551872253, | |
| "learning_rate": 7.909120834819268e-05, | |
| "loss": 0.6431, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 7.447867743753522, | |
| "grad_norm": 0.8562901020050049, | |
| "learning_rate": 7.898437882059913e-05, | |
| "loss": 0.6291, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 7.462896862671426, | |
| "grad_norm": 0.7663971185684204, | |
| "learning_rate": 7.887734964030337e-05, | |
| "loss": 0.6361, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 7.47792598158933, | |
| "grad_norm": 0.7779290676116943, | |
| "learning_rate": 7.87701215445531e-05, | |
| "loss": 0.6321, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 7.492955100507233, | |
| "grad_norm": 0.8450044393539429, | |
| "learning_rate": 7.86626952719661e-05, | |
| "loss": 0.6554, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 7.507984219425136, | |
| "grad_norm": 0.7660729885101318, | |
| "learning_rate": 7.855507156252535e-05, | |
| "loss": 0.6546, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 7.5230133383430395, | |
| "grad_norm": 0.9639895558357239, | |
| "learning_rate": 7.844725115757375e-05, | |
| "loss": 0.6388, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 7.538042457260943, | |
| "grad_norm": 0.8670216798782349, | |
| "learning_rate": 7.833923479980914e-05, | |
| "loss": 0.6489, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 7.553071576178846, | |
| "grad_norm": 0.7850314974784851, | |
| "learning_rate": 7.823102323327911e-05, | |
| "loss": 0.6397, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 7.56810069509675, | |
| "grad_norm": 0.7203473448753357, | |
| "learning_rate": 7.812261720337594e-05, | |
| "loss": 0.6466, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 7.5831298140146535, | |
| "grad_norm": 0.7159662246704102, | |
| "learning_rate": 7.801401745683143e-05, | |
| "loss": 0.6336, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 7.598158932932557, | |
| "grad_norm": 0.8092458844184875, | |
| "learning_rate": 7.79052247417117e-05, | |
| "loss": 0.6415, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 7.61318805185046, | |
| "grad_norm": 0.7300180196762085, | |
| "learning_rate": 7.779623980741214e-05, | |
| "loss": 0.6469, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 7.628217170768364, | |
| "grad_norm": 0.8448249697685242, | |
| "learning_rate": 7.768706340465219e-05, | |
| "loss": 0.6281, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 7.643246289686267, | |
| "grad_norm": 0.7753276824951172, | |
| "learning_rate": 7.757769628547018e-05, | |
| "loss": 0.644, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 7.658275408604171, | |
| "grad_norm": 0.7004479765892029, | |
| "learning_rate": 7.746813920321816e-05, | |
| "loss": 0.6349, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 7.673304527522074, | |
| "grad_norm": 0.7119005918502808, | |
| "learning_rate": 7.735839291255667e-05, | |
| "loss": 0.6477, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 7.688333646439977, | |
| "grad_norm": 0.8026734590530396, | |
| "learning_rate": 7.724845816944961e-05, | |
| "loss": 0.6302, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 7.7033627653578804, | |
| "grad_norm": 0.7971638441085815, | |
| "learning_rate": 7.713833573115894e-05, | |
| "loss": 0.642, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 7.718391884275785, | |
| "grad_norm": 0.7363801598548889, | |
| "learning_rate": 7.70280263562396e-05, | |
| "loss": 0.6509, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 7.733421003193688, | |
| "grad_norm": 0.7832568883895874, | |
| "learning_rate": 7.691753080453412e-05, | |
| "loss": 0.6517, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 7.748450122111591, | |
| "grad_norm": 0.7115653157234192, | |
| "learning_rate": 7.680684983716753e-05, | |
| "loss": 0.6484, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 7.763479241029494, | |
| "grad_norm": 0.7662774324417114, | |
| "learning_rate": 7.6695984216542e-05, | |
| "loss": 0.6496, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 7.7785083599473985, | |
| "grad_norm": 0.7544398307800293, | |
| "learning_rate": 7.658493470633173e-05, | |
| "loss": 0.6394, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 7.793537478865302, | |
| "grad_norm": 0.7812057733535767, | |
| "learning_rate": 7.647370207147748e-05, | |
| "loss": 0.6494, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 7.808566597783205, | |
| "grad_norm": 0.7722028493881226, | |
| "learning_rate": 7.636228707818154e-05, | |
| "loss": 0.6395, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 7.823595716701108, | |
| "grad_norm": 0.776189923286438, | |
| "learning_rate": 7.625069049390227e-05, | |
| "loss": 0.6474, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 7.838624835619012, | |
| "grad_norm": 0.6927589178085327, | |
| "learning_rate": 7.613891308734894e-05, | |
| "loss": 0.6419, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 7.853653954536915, | |
| "grad_norm": 0.8120152354240417, | |
| "learning_rate": 7.60269556284763e-05, | |
| "loss": 0.6638, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 7.868683073454819, | |
| "grad_norm": 0.8518467545509338, | |
| "learning_rate": 7.59148188884794e-05, | |
| "loss": 0.6546, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 7.883712192372722, | |
| "grad_norm": 0.8371894359588623, | |
| "learning_rate": 7.580250363978824e-05, | |
| "loss": 0.6567, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 7.8987413112906255, | |
| "grad_norm": 0.8003565669059753, | |
| "learning_rate": 7.569001065606238e-05, | |
| "loss": 0.6443, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 7.913770430208529, | |
| "grad_norm": 0.8672810196876526, | |
| "learning_rate": 7.557734071218576e-05, | |
| "loss": 0.6559, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 7.928799549126433, | |
| "grad_norm": 0.7518348097801208, | |
| "learning_rate": 7.546449458426117e-05, | |
| "loss": 0.6579, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 7.943828668044336, | |
| "grad_norm": 0.8424391150474548, | |
| "learning_rate": 7.535147304960508e-05, | |
| "loss": 0.6588, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 7.9588577869622394, | |
| "grad_norm": 0.7776015996932983, | |
| "learning_rate": 7.52382768867422e-05, | |
| "loss": 0.6516, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 7.973886905880143, | |
| "grad_norm": 0.8192471861839294, | |
| "learning_rate": 7.512490687540009e-05, | |
| "loss": 0.6686, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 7.988916024798046, | |
| "grad_norm": 0.7316805720329285, | |
| "learning_rate": 7.501136379650388e-05, | |
| "loss": 0.6505, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 8.00300582378358, | |
| "grad_norm": 0.8020321726799011, | |
| "learning_rate": 7.489764843217082e-05, | |
| "loss": 0.6468, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 8.018034942701485, | |
| "grad_norm": 0.7429752349853516, | |
| "learning_rate": 7.478376156570489e-05, | |
| "loss": 0.5209, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 8.033064061619388, | |
| "grad_norm": 0.7338524460792542, | |
| "learning_rate": 7.466970398159145e-05, | |
| "loss": 0.5215, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 8.048093180537292, | |
| "grad_norm": 0.7771674990653992, | |
| "learning_rate": 7.45554764654918e-05, | |
| "loss": 0.5066, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 8.063122299455195, | |
| "grad_norm": 0.7496100068092346, | |
| "learning_rate": 7.444107980423778e-05, | |
| "loss": 0.5101, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 8.078151418373098, | |
| "grad_norm": 0.8719698786735535, | |
| "learning_rate": 7.432651478582636e-05, | |
| "loss": 0.513, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 8.093180537291001, | |
| "grad_norm": 0.706078052520752, | |
| "learning_rate": 7.42117821994142e-05, | |
| "loss": 0.5185, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 8.108209656208905, | |
| "grad_norm": 0.7622345685958862, | |
| "learning_rate": 7.409688283531222e-05, | |
| "loss": 0.5162, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 8.123238775126808, | |
| "grad_norm": 0.7656405568122864, | |
| "learning_rate": 7.398181748498015e-05, | |
| "loss": 0.5137, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 8.138267894044711, | |
| "grad_norm": 0.8089895248413086, | |
| "learning_rate": 7.386658694102103e-05, | |
| "loss": 0.5006, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 8.153297012962614, | |
| "grad_norm": 0.7622844576835632, | |
| "learning_rate": 7.375119199717591e-05, | |
| "loss": 0.5224, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 8.16832613188052, | |
| "grad_norm": 0.8785136342048645, | |
| "learning_rate": 7.363563344831818e-05, | |
| "loss": 0.5277, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 8.183355250798423, | |
| "grad_norm": 0.8507887721061707, | |
| "learning_rate": 7.351991209044821e-05, | |
| "loss": 0.5203, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 8.198384369716326, | |
| "grad_norm": 0.9602698683738708, | |
| "learning_rate": 7.340402872068789e-05, | |
| "loss": 0.5186, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 8.21341348863423, | |
| "grad_norm": 0.8880749344825745, | |
| "learning_rate": 7.328798413727503e-05, | |
| "loss": 0.5175, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 8.228442607552132, | |
| "grad_norm": 0.8679527640342712, | |
| "learning_rate": 7.317177913955795e-05, | |
| "loss": 0.513, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 8.243471726470036, | |
| "grad_norm": 0.7859882116317749, | |
| "learning_rate": 7.305541452798997e-05, | |
| "loss": 0.5252, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 8.258500845387939, | |
| "grad_norm": 0.8226519227027893, | |
| "learning_rate": 7.293889110412387e-05, | |
| "loss": 0.5211, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 8.273529964305842, | |
| "grad_norm": 0.8628718256950378, | |
| "learning_rate": 7.282220967060633e-05, | |
| "loss": 0.5294, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 8.288559083223745, | |
| "grad_norm": 0.9453558325767517, | |
| "learning_rate": 7.270537103117252e-05, | |
| "loss": 0.5238, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 8.303588202141649, | |
| "grad_norm": 0.9046574831008911, | |
| "learning_rate": 7.258837599064043e-05, | |
| "loss": 0.5186, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 8.318617321059552, | |
| "grad_norm": 0.9415176510810852, | |
| "learning_rate": 7.24712253549054e-05, | |
| "loss": 0.5282, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 8.333646439977457, | |
| "grad_norm": 0.8018948435783386, | |
| "learning_rate": 7.235391993093456e-05, | |
| "loss": 0.5264, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 8.34867555889536, | |
| "grad_norm": 0.818480908870697, | |
| "learning_rate": 7.22364605267613e-05, | |
| "loss": 0.5272, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 8.363704677813264, | |
| "grad_norm": 0.8961235284805298, | |
| "learning_rate": 7.211884795147958e-05, | |
| "loss": 0.5373, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 8.378733796731167, | |
| "grad_norm": 0.8245147466659546, | |
| "learning_rate": 7.200108301523854e-05, | |
| "loss": 0.5423, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 8.39376291564907, | |
| "grad_norm": 0.8225317001342773, | |
| "learning_rate": 7.188316652923677e-05, | |
| "loss": 0.5374, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 8.408792034566973, | |
| "grad_norm": 0.9353516697883606, | |
| "learning_rate": 7.176509930571682e-05, | |
| "loss": 0.5418, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 8.423821153484877, | |
| "grad_norm": 0.9062713384628296, | |
| "learning_rate": 7.16468821579595e-05, | |
| "loss": 0.5508, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 8.43885027240278, | |
| "grad_norm": 0.8618881106376648, | |
| "learning_rate": 7.152851590027843e-05, | |
| "loss": 0.5424, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 8.453879391320683, | |
| "grad_norm": 0.8350569009780884, | |
| "learning_rate": 7.141000134801425e-05, | |
| "loss": 0.5433, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 8.468908510238588, | |
| "grad_norm": 0.8575078845024109, | |
| "learning_rate": 7.129133931752914e-05, | |
| "loss": 0.5459, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 8.483937629156491, | |
| "grad_norm": 0.869219183921814, | |
| "learning_rate": 7.117253062620118e-05, | |
| "loss": 0.5397, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 8.498966748074395, | |
| "grad_norm": 0.900360643863678, | |
| "learning_rate": 7.105357609241863e-05, | |
| "loss": 0.5435, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 8.513995866992298, | |
| "grad_norm": 0.9262248277664185, | |
| "learning_rate": 7.093447653557441e-05, | |
| "loss": 0.5462, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 8.529024985910201, | |
| "grad_norm": 0.9586583971977234, | |
| "learning_rate": 7.081523277606035e-05, | |
| "loss": 0.5386, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 8.544054104828104, | |
| "grad_norm": 0.8671521544456482, | |
| "learning_rate": 7.069584563526166e-05, | |
| "loss": 0.539, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 8.559083223746008, | |
| "grad_norm": 0.8206884860992432, | |
| "learning_rate": 7.057631593555111e-05, | |
| "loss": 0.5389, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 8.574112342663911, | |
| "grad_norm": 0.8640275597572327, | |
| "learning_rate": 7.045664450028352e-05, | |
| "loss": 0.5443, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 8.589141461581814, | |
| "grad_norm": 0.8697555661201477, | |
| "learning_rate": 7.033683215379002e-05, | |
| "loss": 0.5488, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 8.604170580499718, | |
| "grad_norm": 0.9721740484237671, | |
| "learning_rate": 7.021687972137235e-05, | |
| "loss": 0.5474, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 8.61919969941762, | |
| "grad_norm": 0.895819902420044, | |
| "learning_rate": 7.009678802929724e-05, | |
| "loss": 0.5504, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 8.634228818335526, | |
| "grad_norm": 1.060189962387085, | |
| "learning_rate": 6.997655790479061e-05, | |
| "loss": 0.5469, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 8.649257937253429, | |
| "grad_norm": 0.955331563949585, | |
| "learning_rate": 6.985619017603207e-05, | |
| "loss": 0.5491, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 8.664287056171332, | |
| "grad_norm": 0.9543823599815369, | |
| "learning_rate": 6.973568567214894e-05, | |
| "loss": 0.5549, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 8.679316175089236, | |
| "grad_norm": 0.8880019187927246, | |
| "learning_rate": 6.961504522321076e-05, | |
| "loss": 0.5466, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 8.694345294007139, | |
| "grad_norm": 0.8980219960212708, | |
| "learning_rate": 6.949426966022354e-05, | |
| "loss": 0.5321, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 8.709374412925042, | |
| "grad_norm": 0.9821533560752869, | |
| "learning_rate": 6.937335981512389e-05, | |
| "loss": 0.5466, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 8.724403531842945, | |
| "grad_norm": 0.9177353978157043, | |
| "learning_rate": 6.925231652077348e-05, | |
| "loss": 0.5568, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 8.739432650760849, | |
| "grad_norm": 0.9436571002006531, | |
| "learning_rate": 6.913114061095319e-05, | |
| "loss": 0.5537, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 8.754461769678752, | |
| "grad_norm": 0.8605087995529175, | |
| "learning_rate": 6.900983292035739e-05, | |
| "loss": 0.5456, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 8.769490888596657, | |
| "grad_norm": 0.9178728461265564, | |
| "learning_rate": 6.888839428458818e-05, | |
| "loss": 0.5522, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 8.78452000751456, | |
| "grad_norm": 0.8443792462348938, | |
| "learning_rate": 6.876682554014967e-05, | |
| "loss": 0.5465, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 8.799549126432463, | |
| "grad_norm": 0.8694719076156616, | |
| "learning_rate": 6.86451275244422e-05, | |
| "loss": 0.5516, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 8.814578245350367, | |
| "grad_norm": 0.8430178165435791, | |
| "learning_rate": 6.852330107575652e-05, | |
| "loss": 0.549, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 8.82960736426827, | |
| "grad_norm": 0.8651490211486816, | |
| "learning_rate": 6.840134703326815e-05, | |
| "loss": 0.5525, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 8.844636483186173, | |
| "grad_norm": 0.7867377400398254, | |
| "learning_rate": 6.827926623703142e-05, | |
| "loss": 0.5594, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 8.859665602104076, | |
| "grad_norm": 0.9743750691413879, | |
| "learning_rate": 6.815705952797382e-05, | |
| "loss": 0.5617, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 8.87469472102198, | |
| "grad_norm": 0.8857339024543762, | |
| "learning_rate": 6.80347277478902e-05, | |
| "loss": 0.5559, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 8.889723839939883, | |
| "grad_norm": 0.9169685244560242, | |
| "learning_rate": 6.791227173943684e-05, | |
| "loss": 0.5473, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 8.904752958857786, | |
| "grad_norm": 1.0672627687454224, | |
| "learning_rate": 6.778969234612584e-05, | |
| "loss": 0.5532, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 8.91978207777569, | |
| "grad_norm": 0.9694510698318481, | |
| "learning_rate": 6.766699041231913e-05, | |
| "loss": 0.5541, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 8.934811196693595, | |
| "grad_norm": 0.940804123878479, | |
| "learning_rate": 6.754416678322281e-05, | |
| "loss": 0.5569, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 8.949840315611498, | |
| "grad_norm": 0.9347053170204163, | |
| "learning_rate": 6.74212223048812e-05, | |
| "loss": 0.5614, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 8.964869434529401, | |
| "grad_norm": 0.8529021739959717, | |
| "learning_rate": 6.729815782417105e-05, | |
| "loss": 0.5438, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 8.979898553447304, | |
| "grad_norm": 0.9158792495727539, | |
| "learning_rate": 6.717497418879579e-05, | |
| "loss": 0.5687, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 8.994927672365208, | |
| "grad_norm": 0.8642351627349854, | |
| "learning_rate": 6.705167224727955e-05, | |
| "loss": 0.5508, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 9.009017471350742, | |
| "grad_norm": 1.036657452583313, | |
| "learning_rate": 6.692825284896142e-05, | |
| "loss": 0.496, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 9.024046590268645, | |
| "grad_norm": 1.0688594579696655, | |
| "learning_rate": 6.680471684398957e-05, | |
| "loss": 0.4279, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 9.039075709186548, | |
| "grad_norm": 0.9282298684120178, | |
| "learning_rate": 6.668106508331539e-05, | |
| "loss": 0.4258, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 9.054104828104453, | |
| "grad_norm": 0.8562738299369812, | |
| "learning_rate": 6.655729841868758e-05, | |
| "loss": 0.4266, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 9.069133947022356, | |
| "grad_norm": 0.9267016649246216, | |
| "learning_rate": 6.643341770264642e-05, | |
| "loss": 0.4253, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 9.08416306594026, | |
| "grad_norm": 0.838796079158783, | |
| "learning_rate": 6.630942378851774e-05, | |
| "loss": 0.4209, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 9.099192184858163, | |
| "grad_norm": 1.0836501121520996, | |
| "learning_rate": 6.618531753040712e-05, | |
| "loss": 0.4319, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 9.114221303776066, | |
| "grad_norm": 0.912151038646698, | |
| "learning_rate": 6.606109978319404e-05, | |
| "loss": 0.4242, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 9.12925042269397, | |
| "grad_norm": 0.9484944939613342, | |
| "learning_rate": 6.593677140252588e-05, | |
| "loss": 0.4275, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 9.144279541611873, | |
| "grad_norm": 0.8877925276756287, | |
| "learning_rate": 6.581233324481216e-05, | |
| "loss": 0.4372, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 9.159308660529776, | |
| "grad_norm": 0.9061231017112732, | |
| "learning_rate": 6.568778616721853e-05, | |
| "loss": 0.4309, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 9.17433777944768, | |
| "grad_norm": 0.9550976753234863, | |
| "learning_rate": 6.556313102766094e-05, | |
| "loss": 0.4344, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 9.189366898365583, | |
| "grad_norm": 0.9908791780471802, | |
| "learning_rate": 6.543836868479968e-05, | |
| "loss": 0.4366, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 9.204396017283488, | |
| "grad_norm": 1.0337473154067993, | |
| "learning_rate": 6.531349999803353e-05, | |
| "loss": 0.4357, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 9.21942513620139, | |
| "grad_norm": 0.9019971489906311, | |
| "learning_rate": 6.518852582749373e-05, | |
| "loss": 0.439, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 9.234454255119294, | |
| "grad_norm": 0.9498554468154907, | |
| "learning_rate": 6.506344703403819e-05, | |
| "loss": 0.4348, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 9.249483374037197, | |
| "grad_norm": 0.9589983820915222, | |
| "learning_rate": 6.493826447924541e-05, | |
| "loss": 0.4512, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 9.2645124929551, | |
| "grad_norm": 0.9420648217201233, | |
| "learning_rate": 6.481297902540875e-05, | |
| "loss": 0.4415, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 9.279541611873004, | |
| "grad_norm": 0.8353439569473267, | |
| "learning_rate": 6.468759153553022e-05, | |
| "loss": 0.4482, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 9.294570730790907, | |
| "grad_norm": 0.9372383952140808, | |
| "learning_rate": 6.456210287331483e-05, | |
| "loss": 0.4401, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 9.30959984970881, | |
| "grad_norm": 1.0183303356170654, | |
| "learning_rate": 6.443651390316437e-05, | |
| "loss": 0.4387, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 9.324628968626714, | |
| "grad_norm": 0.9157505035400391, | |
| "learning_rate": 6.431082549017166e-05, | |
| "loss": 0.4364, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 9.339658087544617, | |
| "grad_norm": 0.9424082040786743, | |
| "learning_rate": 6.41850385001145e-05, | |
| "loss": 0.4456, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 9.354687206462522, | |
| "grad_norm": 0.987912654876709, | |
| "learning_rate": 6.405915379944966e-05, | |
| "loss": 0.4427, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 9.369716325380425, | |
| "grad_norm": 0.9018827676773071, | |
| "learning_rate": 6.393317225530706e-05, | |
| "loss": 0.4545, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 9.384745444298328, | |
| "grad_norm": 0.8961259722709656, | |
| "learning_rate": 6.380709473548361e-05, | |
| "loss": 0.4524, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 9.399774563216232, | |
| "grad_norm": 0.939476728439331, | |
| "learning_rate": 6.368092210843739e-05, | |
| "loss": 0.4465, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 9.414803682134135, | |
| "grad_norm": 0.9325003623962402, | |
| "learning_rate": 6.35546552432816e-05, | |
| "loss": 0.4562, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 9.429832801052038, | |
| "grad_norm": 1.0927010774612427, | |
| "learning_rate": 6.342829500977856e-05, | |
| "loss": 0.4499, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 9.444861919969942, | |
| "grad_norm": 0.9243865013122559, | |
| "learning_rate": 6.330184227833376e-05, | |
| "loss": 0.4469, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 9.459891038887845, | |
| "grad_norm": 0.9676965475082397, | |
| "learning_rate": 6.31752979199898e-05, | |
| "loss": 0.4475, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 9.474920157805748, | |
| "grad_norm": 1.0749905109405518, | |
| "learning_rate": 6.30486628064205e-05, | |
| "loss": 0.4644, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 9.489949276723651, | |
| "grad_norm": 1.0174274444580078, | |
| "learning_rate": 6.292193780992474e-05, | |
| "loss": 0.4657, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 9.504978395641556, | |
| "grad_norm": 0.9137683510780334, | |
| "learning_rate": 6.279512380342065e-05, | |
| "loss": 0.4574, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 9.52000751455946, | |
| "grad_norm": 0.8929033279418945, | |
| "learning_rate": 6.266822166043937e-05, | |
| "loss": 0.4571, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 9.535036633477363, | |
| "grad_norm": 1.0599805116653442, | |
| "learning_rate": 6.254123225511923e-05, | |
| "loss": 0.4606, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 9.550065752395266, | |
| "grad_norm": 1.183914065361023, | |
| "learning_rate": 6.241415646219963e-05, | |
| "loss": 0.459, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 9.56509487131317, | |
| "grad_norm": 1.0352977514266968, | |
| "learning_rate": 6.228699515701501e-05, | |
| "loss": 0.4593, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 9.580123990231073, | |
| "grad_norm": 0.8676705956459045, | |
| "learning_rate": 6.215974921548887e-05, | |
| "loss": 0.4546, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 9.595153109148976, | |
| "grad_norm": 1.03312087059021, | |
| "learning_rate": 6.203241951412767e-05, | |
| "loss": 0.4495, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 9.61018222806688, | |
| "grad_norm": 0.9865357279777527, | |
| "learning_rate": 6.19050069300149e-05, | |
| "loss": 0.4533, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 9.625211346984782, | |
| "grad_norm": 1.0788352489471436, | |
| "learning_rate": 6.177751234080491e-05, | |
| "loss": 0.4515, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 9.640240465902686, | |
| "grad_norm": 1.049320936203003, | |
| "learning_rate": 6.164993662471692e-05, | |
| "loss": 0.4568, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 9.65526958482059, | |
| "grad_norm": 0.9056411981582642, | |
| "learning_rate": 6.152228066052904e-05, | |
| "loss": 0.4648, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 9.670298703738494, | |
| "grad_norm": 0.9347831010818481, | |
| "learning_rate": 6.139454532757208e-05, | |
| "loss": 0.4622, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 9.685327822656397, | |
| "grad_norm": 0.9340201020240784, | |
| "learning_rate": 6.126673150572362e-05, | |
| "loss": 0.4537, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 9.7003569415743, | |
| "grad_norm": 0.9909615516662598, | |
| "learning_rate": 6.113884007540184e-05, | |
| "loss": 0.4704, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 9.715386060492204, | |
| "grad_norm": 1.0939775705337524, | |
| "learning_rate": 6.1010871917559576e-05, | |
| "loss": 0.4596, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 9.730415179410107, | |
| "grad_norm": 0.9341562986373901, | |
| "learning_rate": 6.088282791367812e-05, | |
| "loss": 0.46, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 9.74544429832801, | |
| "grad_norm": 0.9412760734558105, | |
| "learning_rate": 6.075470894576124e-05, | |
| "loss": 0.4701, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 9.760473417245914, | |
| "grad_norm": 1.0007338523864746, | |
| "learning_rate": 6.062651589632911e-05, | |
| "loss": 0.4652, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 9.775502536163817, | |
| "grad_norm": 1.0357065200805664, | |
| "learning_rate": 6.0498249648412134e-05, | |
| "loss": 0.4684, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 9.79053165508172, | |
| "grad_norm": 0.8514649868011475, | |
| "learning_rate": 6.036991108554497e-05, | |
| "loss": 0.454, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 9.805560773999623, | |
| "grad_norm": 0.9953536987304688, | |
| "learning_rate": 6.02415010917604e-05, | |
| "loss": 0.4579, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 9.820589892917528, | |
| "grad_norm": 0.9308024644851685, | |
| "learning_rate": 6.011302055158324e-05, | |
| "loss": 0.4631, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 9.835619011835432, | |
| "grad_norm": 0.9298855662345886, | |
| "learning_rate": 5.9984470350024256e-05, | |
| "loss": 0.4544, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 9.850648130753335, | |
| "grad_norm": 0.9751214385032654, | |
| "learning_rate": 5.985585137257401e-05, | |
| "loss": 0.4571, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 9.865677249671238, | |
| "grad_norm": 0.9474308490753174, | |
| "learning_rate": 5.9727164505196905e-05, | |
| "loss": 0.4658, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 9.880706368589141, | |
| "grad_norm": 1.0583529472351074, | |
| "learning_rate": 5.95984106343249e-05, | |
| "loss": 0.4561, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 9.895735487507045, | |
| "grad_norm": 1.0418837070465088, | |
| "learning_rate": 5.946959064685156e-05, | |
| "loss": 0.4637, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 9.910764606424948, | |
| "grad_norm": 1.0113483667373657, | |
| "learning_rate": 5.934070543012582e-05, | |
| "loss": 0.4705, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 9.925793725342851, | |
| "grad_norm": 1.046410083770752, | |
| "learning_rate": 5.921175587194601e-05, | |
| "loss": 0.4884, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 9.940822844260754, | |
| "grad_norm": 0.9872678518295288, | |
| "learning_rate": 5.9082742860553576e-05, | |
| "loss": 0.4744, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 9.95585196317866, | |
| "grad_norm": 1.0428500175476074, | |
| "learning_rate": 5.895366728462709e-05, | |
| "loss": 0.4704, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 9.970881082096563, | |
| "grad_norm": 0.922476053237915, | |
| "learning_rate": 5.882453003327612e-05, | |
| "loss": 0.465, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 9.985910201014466, | |
| "grad_norm": 1.03745698928833, | |
| "learning_rate": 5.8695331996034986e-05, | |
| "loss": 0.4674, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 1.6415784358978271, | |
| "learning_rate": 5.8566074062856815e-05, | |
| "loss": 0.4717, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 10.015029118917903, | |
| "grad_norm": 0.9536633491516113, | |
| "learning_rate": 5.8436757124107245e-05, | |
| "loss": 0.361, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 10.030058237835807, | |
| "grad_norm": 0.8403608202934265, | |
| "learning_rate": 5.83073820705584e-05, | |
| "loss": 0.3593, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 10.04508735675371, | |
| "grad_norm": 1.0014981031417847, | |
| "learning_rate": 5.8177949793382705e-05, | |
| "loss": 0.3669, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 10.060116475671613, | |
| "grad_norm": 0.9928374290466309, | |
| "learning_rate": 5.804846118414671e-05, | |
| "loss": 0.3584, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 10.075145594589518, | |
| "grad_norm": 0.9604836106300354, | |
| "learning_rate": 5.7918917134805096e-05, | |
| "loss": 0.3467, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 10.090174713507421, | |
| "grad_norm": 1.0535321235656738, | |
| "learning_rate": 5.7789318537694335e-05, | |
| "loss": 0.3623, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 10.105203832425325, | |
| "grad_norm": 1.0338060855865479, | |
| "learning_rate": 5.76596662855267e-05, | |
| "loss": 0.3504, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 10.120232951343228, | |
| "grad_norm": 0.9590771794319153, | |
| "learning_rate": 5.752996127138404e-05, | |
| "loss": 0.3571, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 10.135262070261131, | |
| "grad_norm": 0.939929187297821, | |
| "learning_rate": 5.740020438871162e-05, | |
| "loss": 0.3709, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 10.150291189179034, | |
| "grad_norm": 1.0055979490280151, | |
| "learning_rate": 5.727039653131202e-05, | |
| "loss": 0.3646, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 10.165320308096938, | |
| "grad_norm": 1.0767991542816162, | |
| "learning_rate": 5.714053859333893e-05, | |
| "loss": 0.3626, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 10.180349427014841, | |
| "grad_norm": 0.9774537682533264, | |
| "learning_rate": 5.701063146929103e-05, | |
| "loss": 0.3691, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 10.195378545932744, | |
| "grad_norm": 1.1948145627975464, | |
| "learning_rate": 5.688067605400579e-05, | |
| "loss": 0.3707, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 10.210407664850647, | |
| "grad_norm": 1.1181336641311646, | |
| "learning_rate": 5.675067324265332e-05, | |
| "loss": 0.3637, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 10.22543678376855, | |
| "grad_norm": 0.9550219774246216, | |
| "learning_rate": 5.662062393073022e-05, | |
| "loss": 0.3625, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 10.240465902686456, | |
| "grad_norm": 0.9461958408355713, | |
| "learning_rate": 5.6490529014053405e-05, | |
| "loss": 0.3719, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 10.255495021604359, | |
| "grad_norm": 0.9581360816955566, | |
| "learning_rate": 5.636038938875391e-05, | |
| "loss": 0.3711, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 10.270524140522262, | |
| "grad_norm": 0.9395859837532043, | |
| "learning_rate": 5.623020595127073e-05, | |
| "loss": 0.3624, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 10.285553259440166, | |
| "grad_norm": 1.146485447883606, | |
| "learning_rate": 5.609997959834471e-05, | |
| "loss": 0.3684, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 10.300582378358069, | |
| "grad_norm": 0.9923917055130005, | |
| "learning_rate": 5.596971122701221e-05, | |
| "loss": 0.3695, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 10.315611497275972, | |
| "grad_norm": 0.9672958850860596, | |
| "learning_rate": 5.583940173459913e-05, | |
| "loss": 0.3735, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 10.330640616193875, | |
| "grad_norm": 0.9627594947814941, | |
| "learning_rate": 5.5709052018714536e-05, | |
| "loss": 0.3585, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 10.345669735111779, | |
| "grad_norm": 1.0451908111572266, | |
| "learning_rate": 5.5578662977244625e-05, | |
| "loss": 0.3726, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 10.360698854029682, | |
| "grad_norm": 1.0388795137405396, | |
| "learning_rate": 5.5448235508346435e-05, | |
| "loss": 0.3778, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 10.375727972947587, | |
| "grad_norm": 0.9968121647834778, | |
| "learning_rate": 5.5317770510441745e-05, | |
| "loss": 0.3837, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 10.39075709186549, | |
| "grad_norm": 1.104638934135437, | |
| "learning_rate": 5.518726888221082e-05, | |
| "loss": 0.3719, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 10.405786210783393, | |
| "grad_norm": 1.006320595741272, | |
| "learning_rate": 5.5056731522586236e-05, | |
| "loss": 0.3664, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 10.420815329701297, | |
| "grad_norm": 1.1039286851882935, | |
| "learning_rate": 5.492615933074673e-05, | |
| "loss": 0.3768, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 10.4358444486192, | |
| "grad_norm": 0.9026983380317688, | |
| "learning_rate": 5.479555320611094e-05, | |
| "loss": 0.3661, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 10.450873567537103, | |
| "grad_norm": 1.0680197477340698, | |
| "learning_rate": 5.466491404833127e-05, | |
| "loss": 0.375, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 10.465902686455006, | |
| "grad_norm": 1.079924464225769, | |
| "learning_rate": 5.4534242757287643e-05, | |
| "loss": 0.3865, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 10.48093180537291, | |
| "grad_norm": 1.037091851234436, | |
| "learning_rate": 5.440354023308134e-05, | |
| "loss": 0.3861, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 10.495960924290813, | |
| "grad_norm": 1.0389127731323242, | |
| "learning_rate": 5.4272807376028777e-05, | |
| "loss": 0.3701, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 10.510990043208716, | |
| "grad_norm": 1.079481840133667, | |
| "learning_rate": 5.41420450866553e-05, | |
| "loss": 0.3775, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 10.52601916212662, | |
| "grad_norm": 1.3485366106033325, | |
| "learning_rate": 5.401125426568904e-05, | |
| "loss": 0.3722, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 10.541048281044525, | |
| "grad_norm": 1.0112107992172241, | |
| "learning_rate": 5.388043581405461e-05, | |
| "loss": 0.3712, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 10.556077399962428, | |
| "grad_norm": 0.9727371335029602, | |
| "learning_rate": 5.374959063286695e-05, | |
| "loss": 0.3732, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 10.571106518880331, | |
| "grad_norm": 0.9836901426315308, | |
| "learning_rate": 5.361871962342518e-05, | |
| "loss": 0.3787, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 10.586135637798234, | |
| "grad_norm": 1.0882790088653564, | |
| "learning_rate": 5.348782368720626e-05, | |
| "loss": 0.3816, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 10.601164756716138, | |
| "grad_norm": 0.9604332447052002, | |
| "learning_rate": 5.335690372585892e-05, | |
| "loss": 0.3765, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 10.61619387563404, | |
| "grad_norm": 0.9835896492004395, | |
| "learning_rate": 5.322596064119731e-05, | |
| "loss": 0.3808, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 10.631222994551944, | |
| "grad_norm": 0.9179807901382446, | |
| "learning_rate": 5.309499533519493e-05, | |
| "loss": 0.378, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 10.646252113469847, | |
| "grad_norm": 1.0876275300979614, | |
| "learning_rate": 5.2964008709978305e-05, | |
| "loss": 0.3752, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 10.66128123238775, | |
| "grad_norm": 0.9817517995834351, | |
| "learning_rate": 5.2833001667820816e-05, | |
| "loss": 0.3856, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 10.676310351305656, | |
| "grad_norm": 1.0658329725265503, | |
| "learning_rate": 5.270197511113649e-05, | |
| "loss": 0.3747, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 10.691339470223559, | |
| "grad_norm": 1.0060932636260986, | |
| "learning_rate": 5.257092994247377e-05, | |
| "loss": 0.3867, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 10.706368589141462, | |
| "grad_norm": 1.1070188283920288, | |
| "learning_rate": 5.243986706450933e-05, | |
| "loss": 0.3765, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 10.721397708059365, | |
| "grad_norm": 0.9768523573875427, | |
| "learning_rate": 5.2308787380041777e-05, | |
| "loss": 0.3852, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 10.736426826977269, | |
| "grad_norm": 0.9963809847831726, | |
| "learning_rate": 5.217769179198555e-05, | |
| "loss": 0.3924, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 10.751455945895172, | |
| "grad_norm": 0.9897161722183228, | |
| "learning_rate": 5.2046581203364586e-05, | |
| "loss": 0.3871, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 10.766485064813075, | |
| "grad_norm": 1.0196555852890015, | |
| "learning_rate": 5.191545651730616e-05, | |
| "loss": 0.3766, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 10.781514183730978, | |
| "grad_norm": 0.8715333342552185, | |
| "learning_rate": 5.1784318637034676e-05, | |
| "loss": 0.3878, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 10.796543302648882, | |
| "grad_norm": 1.0659235715866089, | |
| "learning_rate": 5.165316846586541e-05, | |
| "loss": 0.387, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 10.811572421566785, | |
| "grad_norm": 1.0283163785934448, | |
| "learning_rate": 5.15220069071983e-05, | |
| "loss": 0.3899, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 10.826601540484688, | |
| "grad_norm": 0.972322404384613, | |
| "learning_rate": 5.139083486451172e-05, | |
| "loss": 0.3916, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 10.841630659402593, | |
| "grad_norm": 1.1113601922988892, | |
| "learning_rate": 5.1259653241356276e-05, | |
| "loss": 0.3832, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 10.856659778320497, | |
| "grad_norm": 1.1082892417907715, | |
| "learning_rate": 5.1128462941348554e-05, | |
| "loss": 0.3863, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 10.8716888972384, | |
| "grad_norm": 1.0528475046157837, | |
| "learning_rate": 5.0997264868164903e-05, | |
| "loss": 0.393, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 10.886718016156303, | |
| "grad_norm": 0.9899016618728638, | |
| "learning_rate": 5.0866059925535234e-05, | |
| "loss": 0.39, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 10.901747135074206, | |
| "grad_norm": 1.1150156259536743, | |
| "learning_rate": 5.073484901723676e-05, | |
| "loss": 0.3806, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 10.91677625399211, | |
| "grad_norm": 1.0797758102416992, | |
| "learning_rate": 5.0603633047087817e-05, | |
| "loss": 0.3953, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 10.931805372910013, | |
| "grad_norm": 1.122441291809082, | |
| "learning_rate": 5.047241291894156e-05, | |
| "loss": 0.386, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 10.946834491827916, | |
| "grad_norm": 0.8962685465812683, | |
| "learning_rate": 5.034118953667982e-05, | |
| "loss": 0.3914, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 10.96186361074582, | |
| "grad_norm": 1.1607177257537842, | |
| "learning_rate": 5.020996380420685e-05, | |
| "loss": 0.3995, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 10.976892729663723, | |
| "grad_norm": 1.0731902122497559, | |
| "learning_rate": 5.0078736625443054e-05, | |
| "loss": 0.3836, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 10.991921848581628, | |
| "grad_norm": 1.0019197463989258, | |
| "learning_rate": 4.994750890431884e-05, | |
| "loss": 0.3845, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 11.006011647567162, | |
| "grad_norm": 0.9175123572349548, | |
| "learning_rate": 4.9816281544768326e-05, | |
| "loss": 0.3611, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 11.021040766485065, | |
| "grad_norm": 0.8413906097412109, | |
| "learning_rate": 4.968505545072313e-05, | |
| "loss": 0.3021, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 11.036069885402968, | |
| "grad_norm": 1.0692964792251587, | |
| "learning_rate": 4.955383152610621e-05, | |
| "loss": 0.2892, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 11.051099004320871, | |
| "grad_norm": 1.0013508796691895, | |
| "learning_rate": 4.9422610674825495e-05, | |
| "loss": 0.2979, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 11.066128123238775, | |
| "grad_norm": 1.0104172229766846, | |
| "learning_rate": 4.929139380076783e-05, | |
| "loss": 0.2995, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 11.081157242156678, | |
| "grad_norm": 1.0872989892959595, | |
| "learning_rate": 4.9160181807792586e-05, | |
| "loss": 0.2909, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 11.096186361074581, | |
| "grad_norm": 1.1095547676086426, | |
| "learning_rate": 4.90289755997256e-05, | |
| "loss": 0.29, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 11.111215479992486, | |
| "grad_norm": 1.0950359106063843, | |
| "learning_rate": 4.889777608035273e-05, | |
| "loss": 0.3107, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 11.12624459891039, | |
| "grad_norm": 1.060843586921692, | |
| "learning_rate": 4.876658415341393e-05, | |
| "loss": 0.3128, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 11.141273717828293, | |
| "grad_norm": 1.0450581312179565, | |
| "learning_rate": 4.863540072259668e-05, | |
| "loss": 0.3099, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 11.156302836746196, | |
| "grad_norm": 0.9836236238479614, | |
| "learning_rate": 4.850422669153009e-05, | |
| "loss": 0.3038, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 11.1713319556641, | |
| "grad_norm": 0.9338634610176086, | |
| "learning_rate": 4.837306296377841e-05, | |
| "loss": 0.2983, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 11.186361074582003, | |
| "grad_norm": 0.9969077706336975, | |
| "learning_rate": 4.824191044283498e-05, | |
| "loss": 0.3041, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 11.201390193499906, | |
| "grad_norm": 1.1370275020599365, | |
| "learning_rate": 4.811077003211592e-05, | |
| "loss": 0.3124, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 11.216419312417809, | |
| "grad_norm": 1.122521162033081, | |
| "learning_rate": 4.797964263495394e-05, | |
| "loss": 0.3077, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 11.231448431335712, | |
| "grad_norm": 1.1988801956176758, | |
| "learning_rate": 4.78485291545921e-05, | |
| "loss": 0.3154, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 11.246477550253616, | |
| "grad_norm": 1.1286782026290894, | |
| "learning_rate": 4.771743049417761e-05, | |
| "loss": 0.2994, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 11.26150666917152, | |
| "grad_norm": 1.0577936172485352, | |
| "learning_rate": 4.7586347556755573e-05, | |
| "loss": 0.3036, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 11.276535788089424, | |
| "grad_norm": 1.0209895372390747, | |
| "learning_rate": 4.745528124526282e-05, | |
| "loss": 0.3043, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 11.291564907007327, | |
| "grad_norm": 0.9786052107810974, | |
| "learning_rate": 4.7324232462521634e-05, | |
| "loss": 0.3089, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 11.30659402592523, | |
| "grad_norm": 1.1310527324676514, | |
| "learning_rate": 4.719320211123358e-05, | |
| "loss": 0.3016, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 11.321623144843134, | |
| "grad_norm": 0.9561529755592346, | |
| "learning_rate": 4.706219109397319e-05, | |
| "loss": 0.3154, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 11.336652263761037, | |
| "grad_norm": 0.9974495768547058, | |
| "learning_rate": 4.6931200313181944e-05, | |
| "loss": 0.3208, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 11.35168138267894, | |
| "grad_norm": 0.9916987419128418, | |
| "learning_rate": 4.6800230671161784e-05, | |
| "loss": 0.3069, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 11.366710501596843, | |
| "grad_norm": 1.231939435005188, | |
| "learning_rate": 4.666928307006918e-05, | |
| "loss": 0.3063, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 11.381739620514747, | |
| "grad_norm": 1.0125497579574585, | |
| "learning_rate": 4.6538358411908646e-05, | |
| "loss": 0.318, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 11.39676873943265, | |
| "grad_norm": 1.0557286739349365, | |
| "learning_rate": 4.640745759852677e-05, | |
| "loss": 0.3112, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 11.411797858350555, | |
| "grad_norm": 1.0968514680862427, | |
| "learning_rate": 4.6276581531605824e-05, | |
| "loss": 0.3163, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 11.426826977268458, | |
| "grad_norm": 1.0451496839523315, | |
| "learning_rate": 4.6145731112657644e-05, | |
| "loss": 0.3096, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 11.441856096186362, | |
| "grad_norm": 1.1789813041687012, | |
| "learning_rate": 4.601490724301738e-05, | |
| "loss": 0.3024, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 11.456885215104265, | |
| "grad_norm": 1.1728602647781372, | |
| "learning_rate": 4.5884110823837334e-05, | |
| "loss": 0.3052, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 11.471914334022168, | |
| "grad_norm": 1.032285451889038, | |
| "learning_rate": 4.5753342756080666e-05, | |
| "loss": 0.3108, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 11.486943452940071, | |
| "grad_norm": 1.1014740467071533, | |
| "learning_rate": 4.5622603940515326e-05, | |
| "loss": 0.3049, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 11.501972571857975, | |
| "grad_norm": 1.2548887729644775, | |
| "learning_rate": 4.549189527770767e-05, | |
| "loss": 0.3204, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 11.517001690775878, | |
| "grad_norm": 1.0855730772018433, | |
| "learning_rate": 4.5361217668016446e-05, | |
| "loss": 0.3136, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 11.532030809693781, | |
| "grad_norm": 0.9988487362861633, | |
| "learning_rate": 4.52305720115864e-05, | |
| "loss": 0.3173, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 11.547059928611684, | |
| "grad_norm": 1.1315146684646606, | |
| "learning_rate": 4.509995920834229e-05, | |
| "loss": 0.3138, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 11.56208904752959, | |
| "grad_norm": 0.9927186965942383, | |
| "learning_rate": 4.496938015798246e-05, | |
| "loss": 0.3079, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 11.577118166447493, | |
| "grad_norm": 1.1122972965240479, | |
| "learning_rate": 4.483883575997284e-05, | |
| "loss": 0.3179, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 11.592147285365396, | |
| "grad_norm": 1.007947564125061, | |
| "learning_rate": 4.47083269135406e-05, | |
| "loss": 0.3276, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 11.6071764042833, | |
| "grad_norm": 1.00367271900177, | |
| "learning_rate": 4.4577854517668075e-05, | |
| "loss": 0.3202, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 11.622205523201202, | |
| "grad_norm": 1.1806467771530151, | |
| "learning_rate": 4.4447419471086484e-05, | |
| "loss": 0.3203, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 11.637234642119106, | |
| "grad_norm": 1.2128424644470215, | |
| "learning_rate": 4.431702267226979e-05, | |
| "loss": 0.3188, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 11.652263761037009, | |
| "grad_norm": 1.2076245546340942, | |
| "learning_rate": 4.418666501942848e-05, | |
| "loss": 0.3093, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 11.667292879954912, | |
| "grad_norm": 1.1673307418823242, | |
| "learning_rate": 4.4056347410503414e-05, | |
| "loss": 0.3204, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 11.682321998872816, | |
| "grad_norm": 0.9249235987663269, | |
| "learning_rate": 4.392607074315957e-05, | |
| "loss": 0.3167, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 11.697351117790719, | |
| "grad_norm": 1.0417946577072144, | |
| "learning_rate": 4.379583591477999e-05, | |
| "loss": 0.3157, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 11.712380236708622, | |
| "grad_norm": 1.1642825603485107, | |
| "learning_rate": 4.366564382245943e-05, | |
| "loss": 0.3145, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 11.727409355626527, | |
| "grad_norm": 1.1535450220108032, | |
| "learning_rate": 4.353549536299835e-05, | |
| "loss": 0.3144, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 11.74243847454443, | |
| "grad_norm": 0.992770254611969, | |
| "learning_rate": 4.3405391432896555e-05, | |
| "loss": 0.3084, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 11.757467593462334, | |
| "grad_norm": 1.064002275466919, | |
| "learning_rate": 4.327533292834723e-05, | |
| "loss": 0.3186, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 11.772496712380237, | |
| "grad_norm": 1.1059247255325317, | |
| "learning_rate": 4.314532074523057e-05, | |
| "loss": 0.3233, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 11.78752583129814, | |
| "grad_norm": 1.1188381910324097, | |
| "learning_rate": 4.3015355779107734e-05, | |
| "loss": 0.3361, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 11.802554950216043, | |
| "grad_norm": 1.0294090509414673, | |
| "learning_rate": 4.288543892521463e-05, | |
| "loss": 0.3144, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 11.817584069133947, | |
| "grad_norm": 1.265080451965332, | |
| "learning_rate": 4.275557107845576e-05, | |
| "loss": 0.3171, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 11.83261318805185, | |
| "grad_norm": 1.3412435054779053, | |
| "learning_rate": 4.262575313339803e-05, | |
| "loss": 0.3249, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 11.847642306969753, | |
| "grad_norm": 1.074264407157898, | |
| "learning_rate": 4.249598598426465e-05, | |
| "loss": 0.3241, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 11.862671425887658, | |
| "grad_norm": 1.2046911716461182, | |
| "learning_rate": 4.236627052492889e-05, | |
| "loss": 0.3202, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 11.877700544805561, | |
| "grad_norm": 1.1616815328598022, | |
| "learning_rate": 4.2236607648907984e-05, | |
| "loss": 0.3185, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 11.892729663723465, | |
| "grad_norm": 1.1158292293548584, | |
| "learning_rate": 4.210699824935695e-05, | |
| "loss": 0.3209, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 11.907758782641368, | |
| "grad_norm": 1.0398184061050415, | |
| "learning_rate": 4.197744321906247e-05, | |
| "loss": 0.3124, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 11.922787901559271, | |
| "grad_norm": 1.1969057321548462, | |
| "learning_rate": 4.1847943450436686e-05, | |
| "loss": 0.3432, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 11.937817020477175, | |
| "grad_norm": 1.1535173654556274, | |
| "learning_rate": 4.17184998355111e-05, | |
| "loss": 0.3143, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 11.952846139395078, | |
| "grad_norm": 1.0445293188095093, | |
| "learning_rate": 4.158911326593037e-05, | |
| "loss": 0.3222, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 11.967875258312981, | |
| "grad_norm": 1.1093374490737915, | |
| "learning_rate": 4.14597846329463e-05, | |
| "loss": 0.3311, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 11.982904377230884, | |
| "grad_norm": 1.1024218797683716, | |
| "learning_rate": 4.133051482741149e-05, | |
| "loss": 0.3153, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 11.997933496148788, | |
| "grad_norm": 1.0923748016357422, | |
| "learning_rate": 4.120130473977343e-05, | |
| "loss": 0.3194, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 12.012023295134323, | |
| "grad_norm": 1.1858222484588623, | |
| "learning_rate": 4.107215526006817e-05, | |
| "loss": 0.2696, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 12.027052414052227, | |
| "grad_norm": 0.9616860151290894, | |
| "learning_rate": 4.094306727791436e-05, | |
| "loss": 0.2594, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 12.04208153297013, | |
| "grad_norm": 0.9500885009765625, | |
| "learning_rate": 4.081404168250694e-05, | |
| "loss": 0.2461, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 12.057110651888033, | |
| "grad_norm": 1.0713434219360352, | |
| "learning_rate": 4.0685079362611204e-05, | |
| "loss": 0.2645, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 12.072139770805936, | |
| "grad_norm": 1.0027638673782349, | |
| "learning_rate": 4.055618120655652e-05, | |
| "loss": 0.2624, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 12.08716888972384, | |
| "grad_norm": 1.0205668210983276, | |
| "learning_rate": 4.0427348102230314e-05, | |
| "loss": 0.2464, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 12.102198008641743, | |
| "grad_norm": 0.970747172832489, | |
| "learning_rate": 4.029858093707189e-05, | |
| "loss": 0.2406, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 12.117227127559646, | |
| "grad_norm": 1.1178600788116455, | |
| "learning_rate": 4.01698805980664e-05, | |
| "loss": 0.2533, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 12.13225624647755, | |
| "grad_norm": 1.0586788654327393, | |
| "learning_rate": 4.004124797173857e-05, | |
| "loss": 0.2549, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 12.147285365395454, | |
| "grad_norm": 1.0152502059936523, | |
| "learning_rate": 3.991268394414685e-05, | |
| "loss": 0.2499, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 12.162314484313358, | |
| "grad_norm": 1.0560377836227417, | |
| "learning_rate": 3.9784189400877005e-05, | |
| "loss": 0.2591, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 12.177343603231261, | |
| "grad_norm": 1.1126878261566162, | |
| "learning_rate": 3.965576522703631e-05, | |
| "loss": 0.2593, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 12.192372722149164, | |
| "grad_norm": 0.9110709428787231, | |
| "learning_rate": 3.9527412307247205e-05, | |
| "loss": 0.2623, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 12.207401841067067, | |
| "grad_norm": 1.153400182723999, | |
| "learning_rate": 3.9399131525641405e-05, | |
| "loss": 0.2598, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 12.22243095998497, | |
| "grad_norm": 0.8933331966400146, | |
| "learning_rate": 3.927092376585363e-05, | |
| "loss": 0.2529, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 12.237460078902874, | |
| "grad_norm": 1.031607747077942, | |
| "learning_rate": 3.914278991101568e-05, | |
| "loss": 0.2554, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 12.252489197820777, | |
| "grad_norm": 1.1537200212478638, | |
| "learning_rate": 3.901473084375023e-05, | |
| "loss": 0.2474, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 12.26751831673868, | |
| "grad_norm": 1.024788498878479, | |
| "learning_rate": 3.88867474461648e-05, | |
| "loss": 0.2475, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 12.282547435656584, | |
| "grad_norm": 1.087825059890747, | |
| "learning_rate": 3.875884059984571e-05, | |
| "loss": 0.2568, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 12.297576554574489, | |
| "grad_norm": 1.000375509262085, | |
| "learning_rate": 3.863101118585194e-05, | |
| "loss": 0.259, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 12.312605673492392, | |
| "grad_norm": 1.0344016551971436, | |
| "learning_rate": 3.850326008470908e-05, | |
| "loss": 0.2553, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 12.327634792410295, | |
| "grad_norm": 0.9918733835220337, | |
| "learning_rate": 3.8375588176403345e-05, | |
| "loss": 0.2597, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 12.342663911328199, | |
| "grad_norm": 1.0089991092681885, | |
| "learning_rate": 3.8247996340375344e-05, | |
| "loss": 0.2477, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 12.357693030246102, | |
| "grad_norm": 1.012367606163025, | |
| "learning_rate": 3.812048545551426e-05, | |
| "loss": 0.2585, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 12.372722149164005, | |
| "grad_norm": 1.1676548719406128, | |
| "learning_rate": 3.799305640015152e-05, | |
| "loss": 0.2534, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 12.387751268081908, | |
| "grad_norm": 1.1742953062057495, | |
| "learning_rate": 3.786571005205498e-05, | |
| "loss": 0.2577, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 12.402780386999812, | |
| "grad_norm": 1.2898715734481812, | |
| "learning_rate": 3.773844728842275e-05, | |
| "loss": 0.2534, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 12.417809505917715, | |
| "grad_norm": 1.093583583831787, | |
| "learning_rate": 3.7611268985877215e-05, | |
| "loss": 0.259, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 12.432838624835618, | |
| "grad_norm": 0.9623090624809265, | |
| "learning_rate": 3.7484176020458906e-05, | |
| "loss": 0.2647, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 12.447867743753523, | |
| "grad_norm": 1.0669386386871338, | |
| "learning_rate": 3.735716926762059e-05, | |
| "loss": 0.2628, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 12.462896862671426, | |
| "grad_norm": 1.136635184288025, | |
| "learning_rate": 3.723024960222116e-05, | |
| "loss": 0.264, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 12.47792598158933, | |
| "grad_norm": 1.2198032140731812, | |
| "learning_rate": 3.710341789851962e-05, | |
| "loss": 0.2575, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 12.492955100507233, | |
| "grad_norm": 1.1004136800765991, | |
| "learning_rate": 3.697667503016904e-05, | |
| "loss": 0.2573, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 12.507984219425136, | |
| "grad_norm": 0.9815653562545776, | |
| "learning_rate": 3.685002187021064e-05, | |
| "loss": 0.2693, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 12.52301333834304, | |
| "grad_norm": 1.23141348361969, | |
| "learning_rate": 3.6723459291067615e-05, | |
| "loss": 0.2632, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 12.538042457260943, | |
| "grad_norm": 1.0357614755630493, | |
| "learning_rate": 3.65969881645393e-05, | |
| "loss": 0.2582, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 12.553071576178846, | |
| "grad_norm": 1.283329963684082, | |
| "learning_rate": 3.647060936179497e-05, | |
| "loss": 0.2654, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 12.56810069509675, | |
| "grad_norm": 1.062829613685608, | |
| "learning_rate": 3.63443237533681e-05, | |
| "loss": 0.2652, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 12.583129814014653, | |
| "grad_norm": 1.0494091510772705, | |
| "learning_rate": 3.6218132209150045e-05, | |
| "loss": 0.2664, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 12.598158932932558, | |
| "grad_norm": 1.1577351093292236, | |
| "learning_rate": 3.6092035598384354e-05, | |
| "loss": 0.2765, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 12.61318805185046, | |
| "grad_norm": 1.1229662895202637, | |
| "learning_rate": 3.5966034789660574e-05, | |
| "loss": 0.2658, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 12.628217170768364, | |
| "grad_norm": 1.1747732162475586, | |
| "learning_rate": 3.584013065090837e-05, | |
| "loss": 0.2631, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 12.643246289686267, | |
| "grad_norm": 1.2156236171722412, | |
| "learning_rate": 3.571432404939149e-05, | |
| "loss": 0.2618, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 12.65827540860417, | |
| "grad_norm": 1.2369886636734009, | |
| "learning_rate": 3.5588615851701855e-05, | |
| "loss": 0.2637, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 12.673304527522074, | |
| "grad_norm": 0.9820154905319214, | |
| "learning_rate": 3.546300692375352e-05, | |
| "loss": 0.2675, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 12.688333646439977, | |
| "grad_norm": 1.0225483179092407, | |
| "learning_rate": 3.533749813077677e-05, | |
| "loss": 0.2634, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 12.70336276535788, | |
| "grad_norm": 0.9450991153717041, | |
| "learning_rate": 3.5212090337312095e-05, | |
| "loss": 0.2713, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 12.718391884275784, | |
| "grad_norm": 1.1000279188156128, | |
| "learning_rate": 3.508678440720431e-05, | |
| "loss": 0.2728, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 12.733421003193687, | |
| "grad_norm": 1.1958969831466675, | |
| "learning_rate": 3.496158120359653e-05, | |
| "loss": 0.2546, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 12.748450122111592, | |
| "grad_norm": 1.0161027908325195, | |
| "learning_rate": 3.483648158892431e-05, | |
| "loss": 0.265, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 12.763479241029495, | |
| "grad_norm": 1.069886326789856, | |
| "learning_rate": 3.471148642490957e-05, | |
| "loss": 0.2605, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 12.778508359947399, | |
| "grad_norm": 1.082297444343567, | |
| "learning_rate": 3.4586596572554856e-05, | |
| "loss": 0.2739, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 12.793537478865302, | |
| "grad_norm": 1.0885424613952637, | |
| "learning_rate": 3.4461812892137196e-05, | |
| "loss": 0.2708, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 12.808566597783205, | |
| "grad_norm": 1.0391422510147095, | |
| "learning_rate": 3.433713624320234e-05, | |
| "loss": 0.2655, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 12.823595716701108, | |
| "grad_norm": 1.225851058959961, | |
| "learning_rate": 3.421256748455873e-05, | |
| "loss": 0.2542, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 12.838624835619012, | |
| "grad_norm": 0.993791401386261, | |
| "learning_rate": 3.408810747427169e-05, | |
| "loss": 0.2697, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 12.853653954536915, | |
| "grad_norm": 1.0382951498031616, | |
| "learning_rate": 3.396375706965738e-05, | |
| "loss": 0.2706, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 12.868683073454818, | |
| "grad_norm": 1.0424343347549438, | |
| "learning_rate": 3.383951712727701e-05, | |
| "loss": 0.2755, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 12.883712192372721, | |
| "grad_norm": 1.1532506942749023, | |
| "learning_rate": 3.371538850293088e-05, | |
| "loss": 0.2628, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 12.898741311290626, | |
| "grad_norm": 1.1272519826889038, | |
| "learning_rate": 3.359137205165251e-05, | |
| "loss": 0.2699, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 12.91377043020853, | |
| "grad_norm": 1.073285698890686, | |
| "learning_rate": 3.3467468627702734e-05, | |
| "loss": 0.2677, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 12.928799549126433, | |
| "grad_norm": 1.2244044542312622, | |
| "learning_rate": 3.334367908456384e-05, | |
| "loss": 0.2673, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 12.943828668044336, | |
| "grad_norm": 1.1868269443511963, | |
| "learning_rate": 3.32200042749336e-05, | |
| "loss": 0.2671, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 12.95885778696224, | |
| "grad_norm": 1.1779018640518188, | |
| "learning_rate": 3.309644505071959e-05, | |
| "loss": 0.2744, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 12.973886905880143, | |
| "grad_norm": 1.1692800521850586, | |
| "learning_rate": 3.297300226303306e-05, | |
| "loss": 0.2741, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 12.988916024798046, | |
| "grad_norm": 1.0709041357040405, | |
| "learning_rate": 3.284967676218336e-05, | |
| "loss": 0.2672, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 13.00300582378358, | |
| "grad_norm": 0.9654292464256287, | |
| "learning_rate": 3.272646939767179e-05, | |
| "loss": 0.255, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 13.018034942701485, | |
| "grad_norm": 0.9214917421340942, | |
| "learning_rate": 3.2603381018186016e-05, | |
| "loss": 0.2085, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 13.033064061619388, | |
| "grad_norm": 0.9971623420715332, | |
| "learning_rate": 3.248041247159401e-05, | |
| "loss": 0.2158, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 13.048093180537292, | |
| "grad_norm": 0.8868154287338257, | |
| "learning_rate": 3.235756460493836e-05, | |
| "loss": 0.2225, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 13.063122299455195, | |
| "grad_norm": 0.9371384382247925, | |
| "learning_rate": 3.2234838264430346e-05, | |
| "loss": 0.2194, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 13.078151418373098, | |
| "grad_norm": 0.933928370475769, | |
| "learning_rate": 3.211223429544415e-05, | |
| "loss": 0.2087, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 13.093180537291001, | |
| "grad_norm": 1.1291043758392334, | |
| "learning_rate": 3.198975354251101e-05, | |
| "loss": 0.214, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 13.108209656208905, | |
| "grad_norm": 0.9412780404090881, | |
| "learning_rate": 3.1867396849313466e-05, | |
| "loss": 0.2059, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 13.123238775126808, | |
| "grad_norm": 0.9674059748649597, | |
| "learning_rate": 3.174516505867943e-05, | |
| "loss": 0.2118, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 13.138267894044711, | |
| "grad_norm": 1.1346533298492432, | |
| "learning_rate": 3.16230590125765e-05, | |
| "loss": 0.2191, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 13.153297012962614, | |
| "grad_norm": 0.9253365993499756, | |
| "learning_rate": 3.150107955210606e-05, | |
| "loss": 0.2137, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 13.16832613188052, | |
| "grad_norm": 1.0744667053222656, | |
| "learning_rate": 3.137922751749762e-05, | |
| "loss": 0.2194, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 13.183355250798423, | |
| "grad_norm": 0.9793460965156555, | |
| "learning_rate": 3.125750374810283e-05, | |
| "loss": 0.2131, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 13.198384369716326, | |
| "grad_norm": 0.923272430896759, | |
| "learning_rate": 3.113590908238994e-05, | |
| "loss": 0.228, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 13.21341348863423, | |
| "grad_norm": 1.0247244834899902, | |
| "learning_rate": 3.101444435793777e-05, | |
| "loss": 0.2104, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 13.228442607552132, | |
| "grad_norm": 1.0090657472610474, | |
| "learning_rate": 3.089311041143017e-05, | |
| "loss": 0.2161, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 13.243471726470036, | |
| "grad_norm": 0.9428199529647827, | |
| "learning_rate": 3.077190807865009e-05, | |
| "loss": 0.2165, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 13.258500845387939, | |
| "grad_norm": 1.083084225654602, | |
| "learning_rate": 3.065083819447393e-05, | |
| "loss": 0.2135, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 13.273529964305842, | |
| "grad_norm": 1.0958205461502075, | |
| "learning_rate": 3.0529901592865705e-05, | |
| "loss": 0.2128, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 13.288559083223745, | |
| "grad_norm": 0.9356290698051453, | |
| "learning_rate": 3.0409099106871374e-05, | |
| "loss": 0.2136, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 13.303588202141649, | |
| "grad_norm": 1.1614493131637573, | |
| "learning_rate": 3.0288431568613053e-05, | |
| "loss": 0.2256, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 13.318617321059552, | |
| "grad_norm": 1.0191394090652466, | |
| "learning_rate": 3.0167899809283308e-05, | |
| "loss": 0.2183, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 13.333646439977457, | |
| "grad_norm": 1.0032422542572021, | |
| "learning_rate": 3.0047504659139404e-05, | |
| "loss": 0.214, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 13.34867555889536, | |
| "grad_norm": 0.9819022417068481, | |
| "learning_rate": 2.9927246947497644e-05, | |
| "loss": 0.2169, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 13.363704677813264, | |
| "grad_norm": 1.050058364868164, | |
| "learning_rate": 2.9807127502727537e-05, | |
| "loss": 0.2249, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 13.378733796731167, | |
| "grad_norm": 0.9431155920028687, | |
| "learning_rate": 2.9687147152246276e-05, | |
| "loss": 0.2148, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 13.39376291564907, | |
| "grad_norm": 0.8861021399497986, | |
| "learning_rate": 2.9567306722512833e-05, | |
| "loss": 0.2202, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 13.408792034566973, | |
| "grad_norm": 1.0134702920913696, | |
| "learning_rate": 2.944760703902244e-05, | |
| "loss": 0.2214, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 13.423821153484877, | |
| "grad_norm": 1.1062716245651245, | |
| "learning_rate": 2.9328048926300766e-05, | |
| "loss": 0.2238, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 13.43885027240278, | |
| "grad_norm": 1.0837918519973755, | |
| "learning_rate": 2.9208633207898372e-05, | |
| "loss": 0.2142, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 13.453879391320683, | |
| "grad_norm": 1.1653366088867188, | |
| "learning_rate": 2.908936070638487e-05, | |
| "loss": 0.2172, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 13.468908510238588, | |
| "grad_norm": 1.0416685342788696, | |
| "learning_rate": 2.8970232243343482e-05, | |
| "loss": 0.2185, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 13.483937629156491, | |
| "grad_norm": 1.0021854639053345, | |
| "learning_rate": 2.8851248639365114e-05, | |
| "loss": 0.2166, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 13.498966748074395, | |
| "grad_norm": 1.0365519523620605, | |
| "learning_rate": 2.8732410714042957e-05, | |
| "loss": 0.2209, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 13.513995866992298, | |
| "grad_norm": 1.008899211883545, | |
| "learning_rate": 2.8613719285966623e-05, | |
| "loss": 0.2254, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 13.529024985910201, | |
| "grad_norm": 0.8905879855155945, | |
| "learning_rate": 2.8495175172716692e-05, | |
| "loss": 0.2204, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 13.544054104828104, | |
| "grad_norm": 1.0459271669387817, | |
| "learning_rate": 2.837677919085896e-05, | |
| "loss": 0.217, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 13.559083223746008, | |
| "grad_norm": 1.0746241807937622, | |
| "learning_rate": 2.8258532155938875e-05, | |
| "loss": 0.2154, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 13.574112342663911, | |
| "grad_norm": 1.0592225790023804, | |
| "learning_rate": 2.8140434882475847e-05, | |
| "loss": 0.2232, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 13.589141461581814, | |
| "grad_norm": 0.9885957837104797, | |
| "learning_rate": 2.802248818395773e-05, | |
| "loss": 0.2158, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 13.604170580499718, | |
| "grad_norm": 1.1569939851760864, | |
| "learning_rate": 2.790469287283517e-05, | |
| "loss": 0.2218, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 13.61919969941762, | |
| "grad_norm": 1.135467529296875, | |
| "learning_rate": 2.7787049760516013e-05, | |
| "loss": 0.2214, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 13.634228818335526, | |
| "grad_norm": 1.140293002128601, | |
| "learning_rate": 2.766955965735968e-05, | |
| "loss": 0.2174, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 13.649257937253429, | |
| "grad_norm": 1.062946081161499, | |
| "learning_rate": 2.755222337267168e-05, | |
| "loss": 0.2245, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 13.664287056171332, | |
| "grad_norm": 1.142333984375, | |
| "learning_rate": 2.74350417146979e-05, | |
| "loss": 0.2159, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 13.679316175089236, | |
| "grad_norm": 1.206817388534546, | |
| "learning_rate": 2.731801549061923e-05, | |
| "loss": 0.2213, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 13.694345294007139, | |
| "grad_norm": 1.0265262126922607, | |
| "learning_rate": 2.7201145506545756e-05, | |
| "loss": 0.2307, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 13.709374412925042, | |
| "grad_norm": 1.2109159231185913, | |
| "learning_rate": 2.7084432567511443e-05, | |
| "loss": 0.2188, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 13.724403531842945, | |
| "grad_norm": 1.3201031684875488, | |
| "learning_rate": 2.6967877477468397e-05, | |
| "loss": 0.2243, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 13.739432650760849, | |
| "grad_norm": 1.1013463735580444, | |
| "learning_rate": 2.6851481039281478e-05, | |
| "loss": 0.2285, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 13.754461769678752, | |
| "grad_norm": 1.1080180406570435, | |
| "learning_rate": 2.6735244054722697e-05, | |
| "loss": 0.2289, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 13.769490888596657, | |
| "grad_norm": 1.0649311542510986, | |
| "learning_rate": 2.66191673244657e-05, | |
| "loss": 0.2243, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 13.78452000751456, | |
| "grad_norm": 1.1212127208709717, | |
| "learning_rate": 2.6503251648080212e-05, | |
| "loss": 0.217, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 13.799549126432463, | |
| "grad_norm": 1.0007354021072388, | |
| "learning_rate": 2.6387497824026637e-05, | |
| "loss": 0.2213, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 13.814578245350367, | |
| "grad_norm": 0.9835550785064697, | |
| "learning_rate": 2.6271906649650457e-05, | |
| "loss": 0.2206, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 13.82960736426827, | |
| "grad_norm": 1.1858932971954346, | |
| "learning_rate": 2.6156478921176807e-05, | |
| "loss": 0.2285, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 13.844636483186173, | |
| "grad_norm": 1.2049376964569092, | |
| "learning_rate": 2.6041215433704903e-05, | |
| "loss": 0.2236, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 13.859665602104076, | |
| "grad_norm": 0.9520084261894226, | |
| "learning_rate": 2.5926116981202688e-05, | |
| "loss": 0.233, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 13.87469472102198, | |
| "grad_norm": 1.0784698724746704, | |
| "learning_rate": 2.581118435650121e-05, | |
| "loss": 0.2284, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 13.889723839939883, | |
| "grad_norm": 1.1517982482910156, | |
| "learning_rate": 2.5696418351289387e-05, | |
| "loss": 0.2209, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 13.904752958857786, | |
| "grad_norm": 1.0725606679916382, | |
| "learning_rate": 2.558181975610827e-05, | |
| "loss": 0.2179, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 13.91978207777569, | |
| "grad_norm": 1.0226749181747437, | |
| "learning_rate": 2.546738936034585e-05, | |
| "loss": 0.2247, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 13.934811196693595, | |
| "grad_norm": 1.1553442478179932, | |
| "learning_rate": 2.5353127952231404e-05, | |
| "loss": 0.2179, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 13.949840315611498, | |
| "grad_norm": 1.0485488176345825, | |
| "learning_rate": 2.5239036318830278e-05, | |
| "loss": 0.2179, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 13.964869434529401, | |
| "grad_norm": 1.2220666408538818, | |
| "learning_rate": 2.51251152460383e-05, | |
| "loss": 0.2247, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 13.979898553447304, | |
| "grad_norm": 1.1536996364593506, | |
| "learning_rate": 2.5011365518576467e-05, | |
| "loss": 0.2331, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 13.994927672365208, | |
| "grad_norm": 1.0037457942962646, | |
| "learning_rate": 2.4897787919985454e-05, | |
| "loss": 0.2266, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 14.009017471350742, | |
| "grad_norm": 0.900565505027771, | |
| "learning_rate": 2.4784383232620295e-05, | |
| "loss": 0.1914, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 14.024046590268645, | |
| "grad_norm": 0.9061153531074524, | |
| "learning_rate": 2.467115223764495e-05, | |
| "loss": 0.1753, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 14.039075709186548, | |
| "grad_norm": 0.8884809613227844, | |
| "learning_rate": 2.4558095715026973e-05, | |
| "loss": 0.1721, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 14.054104828104453, | |
| "grad_norm": 0.9852058291435242, | |
| "learning_rate": 2.4445214443532027e-05, | |
| "loss": 0.1734, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 14.069133947022356, | |
| "grad_norm": 0.8632417321205139, | |
| "learning_rate": 2.4332509200718673e-05, | |
| "loss": 0.1898, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 14.08416306594026, | |
| "grad_norm": 0.9666391015052795, | |
| "learning_rate": 2.421998076293285e-05, | |
| "loss": 0.1835, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 14.099192184858163, | |
| "grad_norm": 0.8072938919067383, | |
| "learning_rate": 2.4107629905302738e-05, | |
| "loss": 0.1845, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 14.114221303776066, | |
| "grad_norm": 1.2991918325424194, | |
| "learning_rate": 2.3995457401733158e-05, | |
| "loss": 0.1809, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 14.12925042269397, | |
| "grad_norm": 0.8927931785583496, | |
| "learning_rate": 2.3883464024900482e-05, | |
| "loss": 0.1743, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 14.144279541611873, | |
| "grad_norm": 0.9115880727767944, | |
| "learning_rate": 2.3771650546247128e-05, | |
| "loss": 0.1742, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 14.159308660529776, | |
| "grad_norm": 0.904136061668396, | |
| "learning_rate": 2.3660017735976374e-05, | |
| "loss": 0.1873, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 14.17433777944768, | |
| "grad_norm": 0.9878782629966736, | |
| "learning_rate": 2.3548566363046992e-05, | |
| "loss": 0.1839, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 14.189366898365583, | |
| "grad_norm": 1.261094093322754, | |
| "learning_rate": 2.343729719516798e-05, | |
| "loss": 0.1722, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 14.204396017283488, | |
| "grad_norm": 0.959791362285614, | |
| "learning_rate": 2.332621099879318e-05, | |
| "loss": 0.1797, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 14.21942513620139, | |
| "grad_norm": 1.0712839365005493, | |
| "learning_rate": 2.321530853911616e-05, | |
| "loss": 0.1779, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 14.234454255119294, | |
| "grad_norm": 0.9205087423324585, | |
| "learning_rate": 2.3104590580064823e-05, | |
| "loss": 0.1978, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 14.249483374037197, | |
| "grad_norm": 0.9004307985305786, | |
| "learning_rate": 2.299405788429619e-05, | |
| "loss": 0.1792, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 14.2645124929551, | |
| "grad_norm": 0.9223144054412842, | |
| "learning_rate": 2.288371121319109e-05, | |
| "loss": 0.1795, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 14.279541611873004, | |
| "grad_norm": 0.8646677732467651, | |
| "learning_rate": 2.2773551326849036e-05, | |
| "loss": 0.1778, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 14.294570730790907, | |
| "grad_norm": 1.060955286026001, | |
| "learning_rate": 2.266357898408282e-05, | |
| "loss": 0.1864, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 14.30959984970881, | |
| "grad_norm": 0.9104660153388977, | |
| "learning_rate": 2.2553794942413503e-05, | |
| "loss": 0.1825, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 14.324628968626714, | |
| "grad_norm": 0.945350170135498, | |
| "learning_rate": 2.2444199958064955e-05, | |
| "loss": 0.1836, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 14.339658087544617, | |
| "grad_norm": 1.2413114309310913, | |
| "learning_rate": 2.2334794785958845e-05, | |
| "loss": 0.1769, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 14.354687206462522, | |
| "grad_norm": 0.9645456671714783, | |
| "learning_rate": 2.2225580179709303e-05, | |
| "loss": 0.1845, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 14.369716325380425, | |
| "grad_norm": 0.9362895488739014, | |
| "learning_rate": 2.2116556891617825e-05, | |
| "loss": 0.1813, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 14.384745444298328, | |
| "grad_norm": 1.0554242134094238, | |
| "learning_rate": 2.200772567266805e-05, | |
| "loss": 0.1932, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 14.399774563216232, | |
| "grad_norm": 1.0449492931365967, | |
| "learning_rate": 2.1899087272520595e-05, | |
| "loss": 0.1882, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 14.414803682134135, | |
| "grad_norm": 1.107164978981018, | |
| "learning_rate": 2.179064243950784e-05, | |
| "loss": 0.1878, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 14.429832801052038, | |
| "grad_norm": 1.010380506515503, | |
| "learning_rate": 2.1682391920628868e-05, | |
| "loss": 0.1784, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 14.444861919969942, | |
| "grad_norm": 1.1067860126495361, | |
| "learning_rate": 2.1574336461544258e-05, | |
| "loss": 0.1823, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 14.459891038887845, | |
| "grad_norm": 1.0193742513656616, | |
| "learning_rate": 2.1466476806570972e-05, | |
| "loss": 0.1887, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 14.474920157805748, | |
| "grad_norm": 0.9946687817573547, | |
| "learning_rate": 2.1358813698677178e-05, | |
| "loss": 0.1956, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 14.489949276723651, | |
| "grad_norm": 1.2227554321289062, | |
| "learning_rate": 2.125134787947722e-05, | |
| "loss": 0.1815, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 14.504978395641556, | |
| "grad_norm": 1.002421259880066, | |
| "learning_rate": 2.114408008922639e-05, | |
| "loss": 0.1851, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 14.52000751455946, | |
| "grad_norm": 1.0360831022262573, | |
| "learning_rate": 2.103701106681602e-05, | |
| "loss": 0.1838, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 14.535036633477363, | |
| "grad_norm": 0.9968597292900085, | |
| "learning_rate": 2.0930141549768144e-05, | |
| "loss": 0.1842, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 14.550065752395266, | |
| "grad_norm": 1.0610520839691162, | |
| "learning_rate": 2.082347227423064e-05, | |
| "loss": 0.1844, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 14.56509487131317, | |
| "grad_norm": 0.9733484983444214, | |
| "learning_rate": 2.071700397497199e-05, | |
| "loss": 0.1877, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 14.580123990231073, | |
| "grad_norm": 1.059486746788025, | |
| "learning_rate": 2.061073738537635e-05, | |
| "loss": 0.1917, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 14.595153109148976, | |
| "grad_norm": 1.0647083520889282, | |
| "learning_rate": 2.0504673237438422e-05, | |
| "loss": 0.1935, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 14.61018222806688, | |
| "grad_norm": 1.005767583847046, | |
| "learning_rate": 2.0398812261758444e-05, | |
| "loss": 0.1868, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 14.625211346984782, | |
| "grad_norm": 1.0666831731796265, | |
| "learning_rate": 2.029315518753711e-05, | |
| "loss": 0.1863, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 14.640240465902686, | |
| "grad_norm": 1.0782824754714966, | |
| "learning_rate": 2.018770274257062e-05, | |
| "loss": 0.2028, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 14.65526958482059, | |
| "grad_norm": 0.9997120499610901, | |
| "learning_rate": 2.0082455653245612e-05, | |
| "loss": 0.1945, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 14.670298703738494, | |
| "grad_norm": 1.096117615699768, | |
| "learning_rate": 1.9977414644534205e-05, | |
| "loss": 0.1876, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 14.685327822656397, | |
| "grad_norm": 0.9982436895370483, | |
| "learning_rate": 1.98725804399889e-05, | |
| "loss": 0.1847, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 14.7003569415743, | |
| "grad_norm": 1.2439534664154053, | |
| "learning_rate": 1.9767953761737772e-05, | |
| "loss": 0.189, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 14.715386060492204, | |
| "grad_norm": 1.0233805179595947, | |
| "learning_rate": 1.9663535330479305e-05, | |
| "loss": 0.1905, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 14.730415179410107, | |
| "grad_norm": 0.9537500739097595, | |
| "learning_rate": 1.9559325865477573e-05, | |
| "loss": 0.1757, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 14.74544429832801, | |
| "grad_norm": 1.0633177757263184, | |
| "learning_rate": 1.9455326084557213e-05, | |
| "loss": 0.1926, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 14.760473417245914, | |
| "grad_norm": 0.9927921295166016, | |
| "learning_rate": 1.9351536704098527e-05, | |
| "loss": 0.1907, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 14.775502536163817, | |
| "grad_norm": 1.0007320642471313, | |
| "learning_rate": 1.9247958439032448e-05, | |
| "loss": 0.189, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 14.79053165508172, | |
| "grad_norm": 1.1696594953536987, | |
| "learning_rate": 1.9144592002835756e-05, | |
| "loss": 0.1894, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 14.805560773999623, | |
| "grad_norm": 4.139706611633301, | |
| "learning_rate": 1.9041438107526056e-05, | |
| "loss": 0.1839, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 14.820589892917528, | |
| "grad_norm": 0.9341458678245544, | |
| "learning_rate": 1.8938497463656945e-05, | |
| "loss": 0.1991, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 14.835619011835432, | |
| "grad_norm": 1.1703625917434692, | |
| "learning_rate": 1.8835770780313027e-05, | |
| "loss": 0.1837, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 14.850648130753335, | |
| "grad_norm": 0.9725760221481323, | |
| "learning_rate": 1.8733258765105126e-05, | |
| "loss": 0.1831, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 14.865677249671238, | |
| "grad_norm": 0.9153964519500732, | |
| "learning_rate": 1.8630962124165375e-05, | |
| "loss": 0.1955, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 14.880706368589141, | |
| "grad_norm": 1.1788238286972046, | |
| "learning_rate": 1.852888156214233e-05, | |
| "loss": 0.1869, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 14.895735487507045, | |
| "grad_norm": 0.9835808873176575, | |
| "learning_rate": 1.8427017782196127e-05, | |
| "loss": 0.1915, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 14.910764606424948, | |
| "grad_norm": 1.1048306226730347, | |
| "learning_rate": 1.832537148599367e-05, | |
| "loss": 0.1851, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 14.925793725342851, | |
| "grad_norm": 1.847183108329773, | |
| "learning_rate": 1.8223943373703734e-05, | |
| "loss": 0.1848, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 14.940822844260754, | |
| "grad_norm": 0.9361986517906189, | |
| "learning_rate": 1.8122734143992214e-05, | |
| "loss": 0.1946, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 14.95585196317866, | |
| "grad_norm": 1.007897973060608, | |
| "learning_rate": 1.8021744494017283e-05, | |
| "loss": 0.1917, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 14.970881082096563, | |
| "grad_norm": 1.0453609228134155, | |
| "learning_rate": 1.7920975119424576e-05, | |
| "loss": 0.1956, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 14.985910201014466, | |
| "grad_norm": 1.3399736881256104, | |
| "learning_rate": 1.7820426714342374e-05, | |
| "loss": 0.1963, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 1.1934865713119507, | |
| "learning_rate": 1.7720099971376907e-05, | |
| "loss": 0.192, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 15.015029118917903, | |
| "grad_norm": 0.9646713733673096, | |
| "learning_rate": 1.7619995581607516e-05, | |
| "loss": 0.1614, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 15.030058237835807, | |
| "grad_norm": 0.815608561038971, | |
| "learning_rate": 1.7520114234581912e-05, | |
| "loss": 0.1628, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 15.04508735675371, | |
| "grad_norm": 0.9114384055137634, | |
| "learning_rate": 1.7420456618311405e-05, | |
| "loss": 0.1567, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 15.060116475671613, | |
| "grad_norm": 0.9106918573379517, | |
| "learning_rate": 1.7321023419266193e-05, | |
| "loss": 0.1582, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 15.075145594589518, | |
| "grad_norm": 0.7602341771125793, | |
| "learning_rate": 1.7221815322370632e-05, | |
| "loss": 0.1563, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 15.090174713507421, | |
| "grad_norm": 0.7736881971359253, | |
| "learning_rate": 1.7122833010998535e-05, | |
| "loss": 0.1533, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 15.105203832425325, | |
| "grad_norm": 0.9630312919616699, | |
| "learning_rate": 1.702407716696836e-05, | |
| "loss": 0.1533, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 15.120232951343228, | |
| "grad_norm": 0.8553804755210876, | |
| "learning_rate": 1.6925548470538695e-05, | |
| "loss": 0.1629, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 15.135262070261131, | |
| "grad_norm": 1.0749071836471558, | |
| "learning_rate": 1.6827247600403366e-05, | |
| "loss": 0.1605, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 15.150291189179034, | |
| "grad_norm": 0.8994390964508057, | |
| "learning_rate": 1.6729175233686955e-05, | |
| "loss": 0.1506, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 15.165320308096938, | |
| "grad_norm": 1.0106632709503174, | |
| "learning_rate": 1.6631332045939996e-05, | |
| "loss": 0.1652, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 15.180349427014841, | |
| "grad_norm": 1.0532327890396118, | |
| "learning_rate": 1.6533718711134412e-05, | |
| "loss": 0.1603, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 15.195378545932744, | |
| "grad_norm": 0.821412205696106, | |
| "learning_rate": 1.6436335901658766e-05, | |
| "loss": 0.1511, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 15.210407664850647, | |
| "grad_norm": 0.8959778547286987, | |
| "learning_rate": 1.633918428831377e-05, | |
| "loss": 0.1609, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 15.22543678376855, | |
| "grad_norm": 0.8607751131057739, | |
| "learning_rate": 1.6242264540307552e-05, | |
| "loss": 0.1579, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 15.240465902686456, | |
| "grad_norm": 0.8581548929214478, | |
| "learning_rate": 1.614557732525111e-05, | |
| "loss": 0.1563, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 15.255495021604359, | |
| "grad_norm": 0.8387672901153564, | |
| "learning_rate": 1.604912330915364e-05, | |
| "loss": 0.1576, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 15.270524140522262, | |
| "grad_norm": 0.871376097202301, | |
| "learning_rate": 1.595290315641806e-05, | |
| "loss": 0.1621, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 15.285553259440166, | |
| "grad_norm": 1.072432279586792, | |
| "learning_rate": 1.585691752983629e-05, | |
| "loss": 0.153, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 15.300582378358069, | |
| "grad_norm": 0.9539718627929688, | |
| "learning_rate": 1.5761167090584882e-05, | |
| "loss": 0.1551, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 15.315611497275972, | |
| "grad_norm": 0.9477748274803162, | |
| "learning_rate": 1.5665652498220236e-05, | |
| "loss": 0.1596, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 15.330640616193875, | |
| "grad_norm": 1.0767313241958618, | |
| "learning_rate": 1.5570374410674243e-05, | |
| "loss": 0.1597, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 15.345669735111779, | |
| "grad_norm": 0.8535225987434387, | |
| "learning_rate": 1.547533348424963e-05, | |
| "loss": 0.1653, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 15.360698854029682, | |
| "grad_norm": 0.92160964012146, | |
| "learning_rate": 1.5380530373615542e-05, | |
| "loss": 0.1487, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 15.375727972947587, | |
| "grad_norm": 0.840239942073822, | |
| "learning_rate": 1.5285965731802944e-05, | |
| "loss": 0.1545, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 15.39075709186549, | |
| "grad_norm": 1.0626702308654785, | |
| "learning_rate": 1.5191640210200187e-05, | |
| "loss": 0.1559, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 15.405786210783393, | |
| "grad_norm": 0.9364585280418396, | |
| "learning_rate": 1.5097554458548452e-05, | |
| "loss": 0.1646, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 15.420815329701297, | |
| "grad_norm": 1.0330567359924316, | |
| "learning_rate": 1.5003709124937354e-05, | |
| "loss": 0.1625, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 15.4358444486192, | |
| "grad_norm": 0.9339507818222046, | |
| "learning_rate": 1.4910104855800427e-05, | |
| "loss": 0.1515, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 15.450873567537103, | |
| "grad_norm": 0.7912824153900146, | |
| "learning_rate": 1.4816742295910708e-05, | |
| "loss": 0.162, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 15.465902686455006, | |
| "grad_norm": 0.9348452687263489, | |
| "learning_rate": 1.4723622088376205e-05, | |
| "loss": 0.1572, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 15.48093180537291, | |
| "grad_norm": 0.8750469088554382, | |
| "learning_rate": 1.463074487463561e-05, | |
| "loss": 0.1485, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 15.495960924290813, | |
| "grad_norm": 0.9709532260894775, | |
| "learning_rate": 1.4538111294453732e-05, | |
| "loss": 0.1583, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 15.510990043208716, | |
| "grad_norm": 0.9631896018981934, | |
| "learning_rate": 1.4445721985917254e-05, | |
| "loss": 0.1606, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 15.52601916212662, | |
| "grad_norm": 0.8176620006561279, | |
| "learning_rate": 1.435357758543015e-05, | |
| "loss": 0.1583, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 15.541048281044525, | |
| "grad_norm": 0.8556742668151855, | |
| "learning_rate": 1.426167872770947e-05, | |
| "loss": 0.1593, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 15.556077399962428, | |
| "grad_norm": 1.2856311798095703, | |
| "learning_rate": 1.4170026045780832e-05, | |
| "loss": 0.169, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 15.571106518880331, | |
| "grad_norm": 1.07082200050354, | |
| "learning_rate": 1.4078620170974177e-05, | |
| "loss": 0.1581, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 15.586135637798234, | |
| "grad_norm": 0.9026190042495728, | |
| "learning_rate": 1.3987461732919343e-05, | |
| "loss": 0.1704, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 15.601164756716138, | |
| "grad_norm": 0.9147086143493652, | |
| "learning_rate": 1.3896551359541782e-05, | |
| "loss": 0.1566, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 15.61619387563404, | |
| "grad_norm": 0.9676672220230103, | |
| "learning_rate": 1.3805889677058149e-05, | |
| "loss": 0.1668, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 15.631222994551944, | |
| "grad_norm": 0.9647960066795349, | |
| "learning_rate": 1.3715477309972086e-05, | |
| "loss": 0.1603, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 15.646252113469847, | |
| "grad_norm": 0.9588443636894226, | |
| "learning_rate": 1.3625314881069873e-05, | |
| "loss": 0.1614, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 15.66128123238775, | |
| "grad_norm": 0.921419084072113, | |
| "learning_rate": 1.3535403011416158e-05, | |
| "loss": 0.1574, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 15.676310351305656, | |
| "grad_norm": 0.9163838624954224, | |
| "learning_rate": 1.3445742320349625e-05, | |
| "loss": 0.1521, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 15.691339470223559, | |
| "grad_norm": 0.9288631081581116, | |
| "learning_rate": 1.3356333425478817e-05, | |
| "loss": 0.159, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 15.706368589141462, | |
| "grad_norm": 0.9103051424026489, | |
| "learning_rate": 1.3267176942677761e-05, | |
| "loss": 0.1648, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 15.721397708059365, | |
| "grad_norm": 0.8684786558151245, | |
| "learning_rate": 1.317827348608191e-05, | |
| "loss": 0.1598, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 15.736426826977269, | |
| "grad_norm": 1.129595160484314, | |
| "learning_rate": 1.3089623668083683e-05, | |
| "loss": 0.1595, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 15.751455945895172, | |
| "grad_norm": 0.8634871244430542, | |
| "learning_rate": 1.3001228099328443e-05, | |
| "loss": 0.1642, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 15.766485064813075, | |
| "grad_norm": 0.932549774646759, | |
| "learning_rate": 1.2913087388710165e-05, | |
| "loss": 0.1541, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 15.781514183730978, | |
| "grad_norm": 0.9329362511634827, | |
| "learning_rate": 1.282520214336731e-05, | |
| "loss": 0.1523, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 15.796543302648882, | |
| "grad_norm": 0.9856179356575012, | |
| "learning_rate": 1.2737572968678623e-05, | |
| "loss": 0.1597, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 15.811572421566785, | |
| "grad_norm": 0.9236768484115601, | |
| "learning_rate": 1.2650200468258966e-05, | |
| "loss": 0.161, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 15.826601540484688, | |
| "grad_norm": 1.0709694623947144, | |
| "learning_rate": 1.256308524395512e-05, | |
| "loss": 0.1641, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 15.841630659402593, | |
| "grad_norm": 0.8838292956352234, | |
| "learning_rate": 1.2476227895841713e-05, | |
| "loss": 0.1683, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 15.856659778320497, | |
| "grad_norm": 1.0665549039840698, | |
| "learning_rate": 1.238962902221703e-05, | |
| "loss": 0.165, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 15.8716888972384, | |
| "grad_norm": 0.876946210861206, | |
| "learning_rate": 1.2303289219598934e-05, | |
| "loss": 0.1645, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 15.886718016156303, | |
| "grad_norm": 0.8602812886238098, | |
| "learning_rate": 1.2217209082720677e-05, | |
| "loss": 0.1648, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 15.901747135074206, | |
| "grad_norm": 0.9444336295127869, | |
| "learning_rate": 1.2131389204526927e-05, | |
| "loss": 0.1531, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 15.91677625399211, | |
| "grad_norm": 0.8952954411506653, | |
| "learning_rate": 1.2045830176169542e-05, | |
| "loss": 0.1653, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 15.931805372910013, | |
| "grad_norm": 0.9685820937156677, | |
| "learning_rate": 1.1960532587003664e-05, | |
| "loss": 0.1683, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 15.946834491827916, | |
| "grad_norm": 0.9807755351066589, | |
| "learning_rate": 1.1875497024583476e-05, | |
| "loss": 0.1588, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 15.96186361074582, | |
| "grad_norm": 0.986831784248352, | |
| "learning_rate": 1.1790724074658315e-05, | |
| "loss": 0.1734, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 15.976892729663723, | |
| "grad_norm": 0.932146430015564, | |
| "learning_rate": 1.1706214321168513e-05, | |
| "loss": 0.1581, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 15.991921848581628, | |
| "grad_norm": 0.9639928936958313, | |
| "learning_rate": 1.1621968346241457e-05, | |
| "loss": 0.1595, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 16.00601164756716, | |
| "grad_norm": 0.7162834405899048, | |
| "learning_rate": 1.1537986730187566e-05, | |
| "loss": 0.1529, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 16.021040766485065, | |
| "grad_norm": 0.9273526072502136, | |
| "learning_rate": 1.1454270051496264e-05, | |
| "loss": 0.1424, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 16.03606988540297, | |
| "grad_norm": 0.7194620370864868, | |
| "learning_rate": 1.1370818886831985e-05, | |
| "loss": 0.147, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 16.05109900432087, | |
| "grad_norm": 0.8820509910583496, | |
| "learning_rate": 1.1287633811030268e-05, | |
| "loss": 0.1394, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 16.066128123238776, | |
| "grad_norm": 0.9373833537101746, | |
| "learning_rate": 1.1204715397093734e-05, | |
| "loss": 0.1347, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 16.081157242156678, | |
| "grad_norm": 0.7921836376190186, | |
| "learning_rate": 1.1122064216188183e-05, | |
| "loss": 0.1368, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 16.096186361074583, | |
| "grad_norm": 0.7020202875137329, | |
| "learning_rate": 1.1039680837638594e-05, | |
| "loss": 0.1403, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 16.111215479992484, | |
| "grad_norm": 0.7879025340080261, | |
| "learning_rate": 1.0957565828925293e-05, | |
| "loss": 0.1319, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 16.12624459891039, | |
| "grad_norm": 0.7713704705238342, | |
| "learning_rate": 1.0875719755679936e-05, | |
| "loss": 0.1335, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 16.14127371782829, | |
| "grad_norm": 0.8271151185035706, | |
| "learning_rate": 1.0794143181681782e-05, | |
| "loss": 0.1357, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 16.156302836746196, | |
| "grad_norm": 0.7664535641670227, | |
| "learning_rate": 1.0712836668853582e-05, | |
| "loss": 0.137, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 16.171331955664098, | |
| "grad_norm": 0.8511399626731873, | |
| "learning_rate": 1.063180077725791e-05, | |
| "loss": 0.151, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 16.186361074582003, | |
| "grad_norm": 0.8683989644050598, | |
| "learning_rate": 1.0551036065093172e-05, | |
| "loss": 0.1416, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 16.201390193499908, | |
| "grad_norm": 0.8145375847816467, | |
| "learning_rate": 1.0470543088689855e-05, | |
| "loss": 0.1364, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 16.21641931241781, | |
| "grad_norm": 0.9890855550765991, | |
| "learning_rate": 1.0390322402506619e-05, | |
| "loss": 0.1312, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 16.231448431335714, | |
| "grad_norm": 0.7960677742958069, | |
| "learning_rate": 1.0310374559126551e-05, | |
| "loss": 0.1259, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 16.246477550253616, | |
| "grad_norm": 0.7810579538345337, | |
| "learning_rate": 1.0230700109253256e-05, | |
| "loss": 0.1476, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 16.26150666917152, | |
| "grad_norm": 0.7869362235069275, | |
| "learning_rate": 1.0151299601707187e-05, | |
| "loss": 0.1326, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 16.276535788089422, | |
| "grad_norm": 0.7896257042884827, | |
| "learning_rate": 1.0072173583421769e-05, | |
| "loss": 0.1414, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 16.291564907007327, | |
| "grad_norm": 0.8226996660232544, | |
| "learning_rate": 9.993322599439692e-06, | |
| "loss": 0.1437, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 16.30659402592523, | |
| "grad_norm": 0.8732724785804749, | |
| "learning_rate": 9.914747192909096e-06, | |
| "loss": 0.1286, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 16.321623144843134, | |
| "grad_norm": 0.8967133164405823, | |
| "learning_rate": 9.836447905079905e-06, | |
| "loss": 0.1476, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 16.33665226376104, | |
| "grad_norm": 0.8874047994613647, | |
| "learning_rate": 9.758425275299999e-06, | |
| "loss": 0.1301, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 16.35168138267894, | |
| "grad_norm": 0.7454355359077454, | |
| "learning_rate": 9.680679841011652e-06, | |
| "loss": 0.1466, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 16.366710501596845, | |
| "grad_norm": 0.9600047469139099, | |
| "learning_rate": 9.603212137747641e-06, | |
| "loss": 0.1384, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 16.381739620514747, | |
| "grad_norm": 1.0687470436096191, | |
| "learning_rate": 9.526022699127718e-06, | |
| "loss": 0.1337, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 16.396768739432652, | |
| "grad_norm": 0.7660526633262634, | |
| "learning_rate": 9.449112056854813e-06, | |
| "loss": 0.1372, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 16.411797858350553, | |
| "grad_norm": 0.7811424136161804, | |
| "learning_rate": 9.372480740711475e-06, | |
| "loss": 0.1368, | |
| "step": 10930 | |
| }, | |
| { | |
| "epoch": 16.42682697726846, | |
| "grad_norm": 0.9468358159065247, | |
| "learning_rate": 9.296129278556155e-06, | |
| "loss": 0.1399, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 16.44185609618636, | |
| "grad_norm": 0.799017071723938, | |
| "learning_rate": 9.220058196319598e-06, | |
| "loss": 0.1439, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 16.456885215104265, | |
| "grad_norm": 0.811414361000061, | |
| "learning_rate": 9.144268018001184e-06, | |
| "loss": 0.1445, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 16.471914334022166, | |
| "grad_norm": 0.8114548325538635, | |
| "learning_rate": 9.068759265665384e-06, | |
| "loss": 0.1478, | |
| "step": 10970 | |
| }, | |
| { | |
| "epoch": 16.48694345294007, | |
| "grad_norm": 0.753917932510376, | |
| "learning_rate": 8.993532459438098e-06, | |
| "loss": 0.1432, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 16.501972571857976, | |
| "grad_norm": 0.8858105540275574, | |
| "learning_rate": 8.91858811750313e-06, | |
| "loss": 0.1367, | |
| "step": 10990 | |
| }, | |
| { | |
| "epoch": 16.517001690775878, | |
| "grad_norm": 0.7127811312675476, | |
| "learning_rate": 8.843926756098547e-06, | |
| "loss": 0.1342, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 16.532030809693783, | |
| "grad_norm": 0.8266831636428833, | |
| "learning_rate": 8.769548889513212e-06, | |
| "loss": 0.1492, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 16.547059928611684, | |
| "grad_norm": 0.9057301878929138, | |
| "learning_rate": 8.695455030083144e-06, | |
| "loss": 0.1474, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 16.56208904752959, | |
| "grad_norm": 0.7918298840522766, | |
| "learning_rate": 8.621645688188085e-06, | |
| "loss": 0.1388, | |
| "step": 11030 | |
| }, | |
| { | |
| "epoch": 16.57711816644749, | |
| "grad_norm": 0.8264976739883423, | |
| "learning_rate": 8.548121372247918e-06, | |
| "loss": 0.1449, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 16.592147285365396, | |
| "grad_norm": 0.9591594934463501, | |
| "learning_rate": 8.474882588719196e-06, | |
| "loss": 0.1436, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 16.607176404283297, | |
| "grad_norm": 0.8288829326629639, | |
| "learning_rate": 8.401929842091616e-06, | |
| "loss": 0.1291, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 16.622205523201202, | |
| "grad_norm": 0.865283191204071, | |
| "learning_rate": 8.329263634884598e-06, | |
| "loss": 0.1443, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 16.637234642119104, | |
| "grad_norm": 0.8038478493690491, | |
| "learning_rate": 8.256884467643788e-06, | |
| "loss": 0.1409, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 16.65226376103701, | |
| "grad_norm": 0.7755337357521057, | |
| "learning_rate": 8.184792838937633e-06, | |
| "loss": 0.1378, | |
| "step": 11090 | |
| }, | |
| { | |
| "epoch": 16.667292879954914, | |
| "grad_norm": 0.7843419313430786, | |
| "learning_rate": 8.112989245353896e-06, | |
| "loss": 0.1532, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 16.682321998872816, | |
| "grad_norm": 0.7573866248130798, | |
| "learning_rate": 8.0414741814963e-06, | |
| "loss": 0.1451, | |
| "step": 11110 | |
| }, | |
| { | |
| "epoch": 16.69735111779072, | |
| "grad_norm": 0.8233633637428284, | |
| "learning_rate": 7.97024813998109e-06, | |
| "loss": 0.1364, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 16.712380236708622, | |
| "grad_norm": 0.8834894895553589, | |
| "learning_rate": 7.899311611433646e-06, | |
| "loss": 0.1431, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 16.727409355626527, | |
| "grad_norm": 0.8282538056373596, | |
| "learning_rate": 7.828665084485076e-06, | |
| "loss": 0.1316, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 16.74243847454443, | |
| "grad_norm": 0.7527298927307129, | |
| "learning_rate": 7.758309045768908e-06, | |
| "loss": 0.1465, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 16.757467593462334, | |
| "grad_norm": 0.7522730827331543, | |
| "learning_rate": 7.688243979917664e-06, | |
| "loss": 0.1386, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 16.772496712380235, | |
| "grad_norm": 0.949739933013916, | |
| "learning_rate": 7.6184703695595936e-06, | |
| "loss": 0.1317, | |
| "step": 11170 | |
| }, | |
| { | |
| "epoch": 16.78752583129814, | |
| "grad_norm": 0.8552820086479187, | |
| "learning_rate": 7.5489886953153125e-06, | |
| "loss": 0.1313, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 16.802554950216045, | |
| "grad_norm": 0.7522038817405701, | |
| "learning_rate": 7.479799435794499e-06, | |
| "loss": 0.1399, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 16.817584069133947, | |
| "grad_norm": 0.8218302726745605, | |
| "learning_rate": 7.410903067592562e-06, | |
| "loss": 0.139, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 16.83261318805185, | |
| "grad_norm": 0.7487614154815674, | |
| "learning_rate": 7.342300065287439e-06, | |
| "loss": 0.1462, | |
| "step": 11210 | |
| }, | |
| { | |
| "epoch": 16.847642306969753, | |
| "grad_norm": 0.8830420970916748, | |
| "learning_rate": 7.273990901436245e-06, | |
| "loss": 0.1466, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 16.862671425887658, | |
| "grad_norm": 1.094682216644287, | |
| "learning_rate": 7.2059760465720825e-06, | |
| "loss": 0.1473, | |
| "step": 11230 | |
| }, | |
| { | |
| "epoch": 16.87770054480556, | |
| "grad_norm": 0.7629777789115906, | |
| "learning_rate": 7.1382559692007245e-06, | |
| "loss": 0.1385, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 16.892729663723465, | |
| "grad_norm": 0.7562497854232788, | |
| "learning_rate": 7.070831135797473e-06, | |
| "loss": 0.1454, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 16.907758782641366, | |
| "grad_norm": 0.8945866823196411, | |
| "learning_rate": 7.003702010803892e-06, | |
| "loss": 0.1405, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 16.92278790155927, | |
| "grad_norm": 0.7205698490142822, | |
| "learning_rate": 6.936869056624623e-06, | |
| "loss": 0.1475, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 16.937817020477176, | |
| "grad_norm": 0.8356210589408875, | |
| "learning_rate": 6.870332733624174e-06, | |
| "loss": 0.1431, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 16.952846139395078, | |
| "grad_norm": 0.8396646976470947, | |
| "learning_rate": 6.8040935001238256e-06, | |
| "loss": 0.1426, | |
| "step": 11290 | |
| }, | |
| { | |
| "epoch": 16.967875258312983, | |
| "grad_norm": 0.9201752543449402, | |
| "learning_rate": 6.738151812398352e-06, | |
| "loss": 0.1434, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 16.982904377230884, | |
| "grad_norm": 0.9603893756866455, | |
| "learning_rate": 6.67250812467301e-06, | |
| "loss": 0.142, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 16.99793349614879, | |
| "grad_norm": 0.7966869473457336, | |
| "learning_rate": 6.607162889120305e-06, | |
| "loss": 0.155, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 17.012023295134323, | |
| "grad_norm": 0.5946935415267944, | |
| "learning_rate": 6.542116555856953e-06, | |
| "loss": 0.1274, | |
| "step": 11330 | |
| }, | |
| { | |
| "epoch": 17.027052414052225, | |
| "grad_norm": 0.774712324142456, | |
| "learning_rate": 6.477369572940706e-06, | |
| "loss": 0.1221, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 17.04208153297013, | |
| "grad_norm": 0.7754786610603333, | |
| "learning_rate": 6.412922386367332e-06, | |
| "loss": 0.1317, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 17.05711065188803, | |
| "grad_norm": 0.6870192885398865, | |
| "learning_rate": 6.348775440067506e-06, | |
| "loss": 0.1174, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 17.072139770805936, | |
| "grad_norm": 0.8024049401283264, | |
| "learning_rate": 6.284929175903786e-06, | |
| "loss": 0.127, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 17.08716888972384, | |
| "grad_norm": 0.752888023853302, | |
| "learning_rate": 6.2213840336674936e-06, | |
| "loss": 0.1207, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 17.102198008641743, | |
| "grad_norm": 0.7125491499900818, | |
| "learning_rate": 6.158140451075795e-06, | |
| "loss": 0.1351, | |
| "step": 11390 | |
| }, | |
| { | |
| "epoch": 17.117227127559648, | |
| "grad_norm": 0.7468791007995605, | |
| "learning_rate": 6.095198863768564e-06, | |
| "loss": 0.131, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 17.13225624647755, | |
| "grad_norm": 0.8037786483764648, | |
| "learning_rate": 6.032559705305523e-06, | |
| "loss": 0.1308, | |
| "step": 11410 | |
| }, | |
| { | |
| "epoch": 17.147285365395454, | |
| "grad_norm": 0.7919206023216248, | |
| "learning_rate": 5.9702234071631e-06, | |
| "loss": 0.1234, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 17.162314484313356, | |
| "grad_norm": 0.7676987051963806, | |
| "learning_rate": 5.9081903987316e-06, | |
| "loss": 0.1197, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 17.17734360323126, | |
| "grad_norm": 1.1687105894088745, | |
| "learning_rate": 5.8464611073121235e-06, | |
| "loss": 0.1241, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 17.192372722149162, | |
| "grad_norm": 0.7436251044273376, | |
| "learning_rate": 5.785035958113716e-06, | |
| "loss": 0.1288, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 17.207401841067067, | |
| "grad_norm": 0.656187117099762, | |
| "learning_rate": 5.7239153742503995e-06, | |
| "loss": 0.1187, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 17.222430959984973, | |
| "grad_norm": 0.6904690265655518, | |
| "learning_rate": 5.663099776738273e-06, | |
| "loss": 0.1366, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 17.237460078902874, | |
| "grad_norm": 0.8284912109375, | |
| "learning_rate": 5.602589584492562e-06, | |
| "loss": 0.1242, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 17.25248919782078, | |
| "grad_norm": 0.8081623911857605, | |
| "learning_rate": 5.542385214324819e-06, | |
| "loss": 0.1234, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 17.26751831673868, | |
| "grad_norm": 1.1938631534576416, | |
| "learning_rate": 5.48248708093998e-06, | |
| "loss": 0.1326, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 17.282547435656586, | |
| "grad_norm": 0.6938109993934631, | |
| "learning_rate": 5.422895596933558e-06, | |
| "loss": 0.1305, | |
| "step": 11510 | |
| }, | |
| { | |
| "epoch": 17.297576554574487, | |
| "grad_norm": 0.7339420914649963, | |
| "learning_rate": 5.36361117278874e-06, | |
| "loss": 0.1206, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 17.312605673492392, | |
| "grad_norm": 0.7437239289283752, | |
| "learning_rate": 5.304634216873633e-06, | |
| "loss": 0.1205, | |
| "step": 11530 | |
| }, | |
| { | |
| "epoch": 17.327634792410294, | |
| "grad_norm": 0.7222012281417847, | |
| "learning_rate": 5.24596513543838e-06, | |
| "loss": 0.1219, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 17.3426639113282, | |
| "grad_norm": 0.8264778852462769, | |
| "learning_rate": 5.187604332612445e-06, | |
| "loss": 0.1318, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 17.3576930302461, | |
| "grad_norm": 0.7213618159294128, | |
| "learning_rate": 5.129552210401728e-06, | |
| "loss": 0.1203, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 17.372722149164005, | |
| "grad_norm": 0.7722398638725281, | |
| "learning_rate": 5.071809168685887e-06, | |
| "loss": 0.1266, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 17.38775126808191, | |
| "grad_norm": 0.8326044678688049, | |
| "learning_rate": 5.014375605215521e-06, | |
| "loss": 0.1267, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 17.40278038699981, | |
| "grad_norm": 0.886371374130249, | |
| "learning_rate": 4.957251915609462e-06, | |
| "loss": 0.119, | |
| "step": 11590 | |
| }, | |
| { | |
| "epoch": 17.417809505917717, | |
| "grad_norm": 0.7517515420913696, | |
| "learning_rate": 4.900438493352055e-06, | |
| "loss": 0.1291, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 17.432838624835618, | |
| "grad_norm": 0.8436376452445984, | |
| "learning_rate": 4.843935729790422e-06, | |
| "loss": 0.1336, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 17.447867743753523, | |
| "grad_norm": 0.8188118934631348, | |
| "learning_rate": 4.7877440141317675e-06, | |
| "loss": 0.1276, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 17.462896862671425, | |
| "grad_norm": 0.7850053310394287, | |
| "learning_rate": 4.731863733440733e-06, | |
| "loss": 0.1263, | |
| "step": 11630 | |
| }, | |
| { | |
| "epoch": 17.47792598158933, | |
| "grad_norm": 0.7156862616539001, | |
| "learning_rate": 4.676295272636688e-06, | |
| "loss": 0.1371, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 17.49295510050723, | |
| "grad_norm": 0.9043847322463989, | |
| "learning_rate": 4.621039014491119e-06, | |
| "loss": 0.136, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 17.507984219425136, | |
| "grad_norm": 0.7520122528076172, | |
| "learning_rate": 4.566095339624943e-06, | |
| "loss": 0.1278, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 17.52301333834304, | |
| "grad_norm": 0.8322932124137878, | |
| "learning_rate": 4.511464626505935e-06, | |
| "loss": 0.1178, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 17.538042457260943, | |
| "grad_norm": 0.7075957655906677, | |
| "learning_rate": 4.457147251446075e-06, | |
| "loss": 0.1295, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 17.553071576178848, | |
| "grad_norm": 0.7323919534683228, | |
| "learning_rate": 4.403143588599029e-06, | |
| "loss": 0.1272, | |
| "step": 11690 | |
| }, | |
| { | |
| "epoch": 17.56810069509675, | |
| "grad_norm": 0.9109891653060913, | |
| "learning_rate": 4.349454009957471e-06, | |
| "loss": 0.1236, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 17.583129814014654, | |
| "grad_norm": 0.8152607679367065, | |
| "learning_rate": 4.296078885350607e-06, | |
| "loss": 0.1267, | |
| "step": 11710 | |
| }, | |
| { | |
| "epoch": 17.598158932932556, | |
| "grad_norm": 0.7224797606468201, | |
| "learning_rate": 4.2430185824415715e-06, | |
| "loss": 0.1355, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 17.61318805185046, | |
| "grad_norm": 0.7984783053398132, | |
| "learning_rate": 4.190273466724925e-06, | |
| "loss": 0.1364, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 17.628217170768362, | |
| "grad_norm": 0.9017600417137146, | |
| "learning_rate": 4.137843901524141e-06, | |
| "loss": 0.1281, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 17.643246289686267, | |
| "grad_norm": 0.7681065797805786, | |
| "learning_rate": 4.085730247989078e-06, | |
| "loss": 0.1234, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 17.65827540860417, | |
| "grad_norm": 0.7442010045051575, | |
| "learning_rate": 4.033932865093499e-06, | |
| "loss": 0.1331, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 17.673304527522074, | |
| "grad_norm": 0.7311212420463562, | |
| "learning_rate": 3.982452109632617e-06, | |
| "loss": 0.1336, | |
| "step": 11770 | |
| }, | |
| { | |
| "epoch": 17.68833364643998, | |
| "grad_norm": 0.7073860764503479, | |
| "learning_rate": 3.931288336220617e-06, | |
| "loss": 0.1263, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 17.70336276535788, | |
| "grad_norm": 0.6838569641113281, | |
| "learning_rate": 3.880441897288234e-06, | |
| "loss": 0.1299, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 17.718391884275785, | |
| "grad_norm": 0.9706346988677979, | |
| "learning_rate": 3.829913143080283e-06, | |
| "loss": 0.1276, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 17.733421003193687, | |
| "grad_norm": 0.7603088617324829, | |
| "learning_rate": 3.7797024216533138e-06, | |
| "loss": 0.1263, | |
| "step": 11810 | |
| }, | |
| { | |
| "epoch": 17.748450122111592, | |
| "grad_norm": 0.7066922187805176, | |
| "learning_rate": 3.729810078873125e-06, | |
| "loss": 0.1284, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 17.763479241029493, | |
| "grad_norm": 0.7454369068145752, | |
| "learning_rate": 3.6802364584124947e-06, | |
| "loss": 0.124, | |
| "step": 11830 | |
| }, | |
| { | |
| "epoch": 17.7785083599474, | |
| "grad_norm": 0.7552350759506226, | |
| "learning_rate": 3.6309819017487034e-06, | |
| "loss": 0.1259, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 17.7935374788653, | |
| "grad_norm": 0.8061559200286865, | |
| "learning_rate": 3.5820467481612496e-06, | |
| "loss": 0.126, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 17.808566597783205, | |
| "grad_norm": 0.6990138292312622, | |
| "learning_rate": 3.5334313347294757e-06, | |
| "loss": 0.1271, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 17.82359571670111, | |
| "grad_norm": 0.7601016163825989, | |
| "learning_rate": 3.4851359963302798e-06, | |
| "loss": 0.1397, | |
| "step": 11870 | |
| }, | |
| { | |
| "epoch": 17.83862483561901, | |
| "grad_norm": 0.7683603167533875, | |
| "learning_rate": 3.43716106563578e-06, | |
| "loss": 0.1376, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 17.853653954536917, | |
| "grad_norm": 0.8137221932411194, | |
| "learning_rate": 3.3895068731110534e-06, | |
| "loss": 0.122, | |
| "step": 11890 | |
| }, | |
| { | |
| "epoch": 17.868683073454818, | |
| "grad_norm": 0.8366261124610901, | |
| "learning_rate": 3.342173747011801e-06, | |
| "loss": 0.1273, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 17.883712192372723, | |
| "grad_norm": 0.8289967179298401, | |
| "learning_rate": 3.295162013382164e-06, | |
| "loss": 0.1274, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 17.898741311290625, | |
| "grad_norm": 0.6871482133865356, | |
| "learning_rate": 3.248471996052432e-06, | |
| "loss": 0.1357, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 17.91377043020853, | |
| "grad_norm": 0.7140630483627319, | |
| "learning_rate": 3.202104016636814e-06, | |
| "loss": 0.1247, | |
| "step": 11930 | |
| }, | |
| { | |
| "epoch": 17.92879954912643, | |
| "grad_norm": 0.7578158974647522, | |
| "learning_rate": 3.156058394531225e-06, | |
| "loss": 0.1285, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 17.943828668044336, | |
| "grad_norm": 0.718285858631134, | |
| "learning_rate": 3.1103354469111056e-06, | |
| "loss": 0.1285, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 17.958857786962238, | |
| "grad_norm": 0.7415304780006409, | |
| "learning_rate": 3.0649354887291925e-06, | |
| "loss": 0.1259, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 17.973886905880143, | |
| "grad_norm": 0.7331326007843018, | |
| "learning_rate": 3.019858832713435e-06, | |
| "loss": 0.1264, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 17.988916024798048, | |
| "grad_norm": 0.7621225714683533, | |
| "learning_rate": 2.9751057893647237e-06, | |
| "loss": 0.1306, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 18.00300582378358, | |
| "grad_norm": 0.6445237994194031, | |
| "learning_rate": 2.930676666954846e-06, | |
| "loss": 0.1289, | |
| "step": 11990 | |
| }, | |
| { | |
| "epoch": 18.018034942701483, | |
| "grad_norm": 0.6551523208618164, | |
| "learning_rate": 2.8865717715243212e-06, | |
| "loss": 0.123, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 18.03306406161939, | |
| "grad_norm": 0.6718552708625793, | |
| "learning_rate": 2.842791406880291e-06, | |
| "loss": 0.1254, | |
| "step": 12010 | |
| }, | |
| { | |
| "epoch": 18.04809318053729, | |
| "grad_norm": 0.653846263885498, | |
| "learning_rate": 2.7993358745944608e-06, | |
| "loss": 0.1237, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 18.063122299455195, | |
| "grad_norm": 0.7196510434150696, | |
| "learning_rate": 2.756205474000978e-06, | |
| "loss": 0.1162, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 18.078151418373096, | |
| "grad_norm": 0.6618478894233704, | |
| "learning_rate": 2.7134005021943852e-06, | |
| "loss": 0.117, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 18.093180537291, | |
| "grad_norm": 0.8368316292762756, | |
| "learning_rate": 2.670921254027592e-06, | |
| "loss": 0.1205, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 18.108209656208906, | |
| "grad_norm": 0.6879215836524963, | |
| "learning_rate": 2.6287680221098233e-06, | |
| "loss": 0.1171, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 18.123238775126808, | |
| "grad_norm": 0.7069093585014343, | |
| "learning_rate": 2.5869410968046294e-06, | |
| "loss": 0.1235, | |
| "step": 12070 | |
| }, | |
| { | |
| "epoch": 18.138267894044713, | |
| "grad_norm": 0.6723190546035767, | |
| "learning_rate": 2.5454407662278244e-06, | |
| "loss": 0.1085, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 18.153297012962614, | |
| "grad_norm": 0.6698660850524902, | |
| "learning_rate": 2.5042673162455954e-06, | |
| "loss": 0.1195, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 18.16832613188052, | |
| "grad_norm": 0.6730449795722961, | |
| "learning_rate": 2.463421030472429e-06, | |
| "loss": 0.1139, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 18.18335525079842, | |
| "grad_norm": 0.805294394493103, | |
| "learning_rate": 2.422902190269266e-06, | |
| "loss": 0.1242, | |
| "step": 12110 | |
| }, | |
| { | |
| "epoch": 18.198384369716326, | |
| "grad_norm": 1.0811830759048462, | |
| "learning_rate": 2.3827110747414785e-06, | |
| "loss": 0.1195, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 18.213413488634227, | |
| "grad_norm": 0.6854028105735779, | |
| "learning_rate": 2.342847960736966e-06, | |
| "loss": 0.119, | |
| "step": 12130 | |
| }, | |
| { | |
| "epoch": 18.228442607552132, | |
| "grad_norm": 0.6735851764678955, | |
| "learning_rate": 2.303313122844286e-06, | |
| "loss": 0.1321, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 18.243471726470037, | |
| "grad_norm": 0.7301083207130432, | |
| "learning_rate": 2.264106833390722e-06, | |
| "loss": 0.1204, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 18.25850084538794, | |
| "grad_norm": 0.7372903823852539, | |
| "learning_rate": 2.2252293624404176e-06, | |
| "loss": 0.1201, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 18.273529964305844, | |
| "grad_norm": 0.6305893659591675, | |
| "learning_rate": 2.1866809777925324e-06, | |
| "loss": 0.1128, | |
| "step": 12170 | |
| }, | |
| { | |
| "epoch": 18.288559083223745, | |
| "grad_norm": 0.7112670540809631, | |
| "learning_rate": 2.148461944979385e-06, | |
| "loss": 0.1172, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 18.30358820214165, | |
| "grad_norm": 0.6915646195411682, | |
| "learning_rate": 2.1105725272646094e-06, | |
| "loss": 0.1197, | |
| "step": 12190 | |
| }, | |
| { | |
| "epoch": 18.318617321059552, | |
| "grad_norm": 0.6650305986404419, | |
| "learning_rate": 2.0730129856413707e-06, | |
| "loss": 0.121, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 18.333646439977457, | |
| "grad_norm": 0.6500080823898315, | |
| "learning_rate": 2.0357835788305467e-06, | |
| "loss": 0.1209, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 18.34867555889536, | |
| "grad_norm": 0.7032843828201294, | |
| "learning_rate": 1.998884563278963e-06, | |
| "loss": 0.1194, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 18.363704677813264, | |
| "grad_norm": 0.6876169443130493, | |
| "learning_rate": 1.962316193157593e-06, | |
| "loss": 0.117, | |
| "step": 12230 | |
| }, | |
| { | |
| "epoch": 18.378733796731165, | |
| "grad_norm": 0.6640487909317017, | |
| "learning_rate": 1.926078720359853e-06, | |
| "loss": 0.1246, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 18.39376291564907, | |
| "grad_norm": 0.7534406185150146, | |
| "learning_rate": 1.8901723944998118e-06, | |
| "loss": 0.1175, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 18.408792034566975, | |
| "grad_norm": 0.7041878700256348, | |
| "learning_rate": 1.8545974629105622e-06, | |
| "loss": 0.1191, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 18.423821153484877, | |
| "grad_norm": 0.6589450240135193, | |
| "learning_rate": 1.81935417064239e-06, | |
| "loss": 0.1155, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 18.43885027240278, | |
| "grad_norm": 0.6730456352233887, | |
| "learning_rate": 1.7844427604612024e-06, | |
| "loss": 0.1283, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 18.453879391320683, | |
| "grad_norm": 0.7545807361602783, | |
| "learning_rate": 1.74986347284678e-06, | |
| "loss": 0.114, | |
| "step": 12290 | |
| }, | |
| { | |
| "epoch": 18.468908510238588, | |
| "grad_norm": 0.720689058303833, | |
| "learning_rate": 1.7156165459911665e-06, | |
| "loss": 0.1228, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 18.48393762915649, | |
| "grad_norm": 0.6629992723464966, | |
| "learning_rate": 1.6817022157970042e-06, | |
| "loss": 0.1171, | |
| "step": 12310 | |
| }, | |
| { | |
| "epoch": 18.498966748074395, | |
| "grad_norm": 0.6659217476844788, | |
| "learning_rate": 1.648120715875906e-06, | |
| "loss": 0.1133, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 18.513995866992296, | |
| "grad_norm": 0.6609564423561096, | |
| "learning_rate": 1.6148722775468639e-06, | |
| "loss": 0.1343, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 18.5290249859102, | |
| "grad_norm": 0.6903553009033203, | |
| "learning_rate": 1.581957129834638e-06, | |
| "loss": 0.1182, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 18.544054104828106, | |
| "grad_norm": 0.7767003178596497, | |
| "learning_rate": 1.5493754994681976e-06, | |
| "loss": 0.122, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 18.559083223746008, | |
| "grad_norm": 0.6776891350746155, | |
| "learning_rate": 1.5171276108791544e-06, | |
| "loss": 0.1129, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 18.574112342663913, | |
| "grad_norm": 0.6937426924705505, | |
| "learning_rate": 1.4852136862001764e-06, | |
| "loss": 0.1136, | |
| "step": 12370 | |
| }, | |
| { | |
| "epoch": 18.589141461581814, | |
| "grad_norm": 0.7074488401412964, | |
| "learning_rate": 1.4536339452635384e-06, | |
| "loss": 0.1126, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 18.60417058049972, | |
| "grad_norm": 0.6760552525520325, | |
| "learning_rate": 1.4223886055995172e-06, | |
| "loss": 0.1227, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 18.61919969941762, | |
| "grad_norm": 0.7237436175346375, | |
| "learning_rate": 1.3914778824349884e-06, | |
| "loss": 0.1208, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 18.634228818335526, | |
| "grad_norm": 0.6534668803215027, | |
| "learning_rate": 1.3609019886918427e-06, | |
| "loss": 0.1171, | |
| "step": 12410 | |
| }, | |
| { | |
| "epoch": 18.649257937253427, | |
| "grad_norm": 0.6551641225814819, | |
| "learning_rate": 1.3306611349856112e-06, | |
| "loss": 0.1184, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 18.664287056171332, | |
| "grad_norm": 0.681528627872467, | |
| "learning_rate": 1.300755529623937e-06, | |
| "loss": 0.1203, | |
| "step": 12430 | |
| }, | |
| { | |
| "epoch": 18.679316175089234, | |
| "grad_norm": 0.7110047340393066, | |
| "learning_rate": 1.2711853786052109e-06, | |
| "loss": 0.1227, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 18.69434529400714, | |
| "grad_norm": 0.7127984166145325, | |
| "learning_rate": 1.241950885617088e-06, | |
| "loss": 0.1192, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 18.709374412925044, | |
| "grad_norm": 0.9400015473365784, | |
| "learning_rate": 1.2130522520351405e-06, | |
| "loss": 0.1206, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 18.724403531842945, | |
| "grad_norm": 0.640738844871521, | |
| "learning_rate": 1.1844896769214186e-06, | |
| "loss": 0.125, | |
| "step": 12470 | |
| }, | |
| { | |
| "epoch": 18.73943265076085, | |
| "grad_norm": 0.6960272789001465, | |
| "learning_rate": 1.1562633570231352e-06, | |
| "loss": 0.1181, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 18.754461769678752, | |
| "grad_norm": 0.7713277339935303, | |
| "learning_rate": 1.128373486771256e-06, | |
| "loss": 0.1183, | |
| "step": 12490 | |
| }, | |
| { | |
| "epoch": 18.769490888596657, | |
| "grad_norm": 0.6949428915977478, | |
| "learning_rate": 1.1008202582792004e-06, | |
| "loss": 0.1308, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 18.78452000751456, | |
| "grad_norm": 0.6489851474761963, | |
| "learning_rate": 1.0736038613414878e-06, | |
| "loss": 0.1288, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 18.799549126432463, | |
| "grad_norm": 0.7511118054389954, | |
| "learning_rate": 1.0467244834324707e-06, | |
| "loss": 0.1098, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 18.814578245350365, | |
| "grad_norm": 0.7278922200202942, | |
| "learning_rate": 1.0201823097049812e-06, | |
| "loss": 0.1248, | |
| "step": 12530 | |
| }, | |
| { | |
| "epoch": 18.82960736426827, | |
| "grad_norm": 0.7048822641372681, | |
| "learning_rate": 9.939775229891313e-07, | |
| "loss": 0.1201, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 18.84463648318617, | |
| "grad_norm": 0.7828486561775208, | |
| "learning_rate": 9.681103037909866e-07, | |
| "loss": 0.1271, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 18.859665602104076, | |
| "grad_norm": 0.6916821002960205, | |
| "learning_rate": 9.42580830291373e-07, | |
| "loss": 0.1151, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 18.87469472102198, | |
| "grad_norm": 0.7299247980117798, | |
| "learning_rate": 9.173892783445992e-07, | |
| "loss": 0.1287, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 18.889723839939883, | |
| "grad_norm": 0.8514544367790222, | |
| "learning_rate": 8.925358214772972e-07, | |
| "loss": 0.1261, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 18.904752958857788, | |
| "grad_norm": 0.6913233995437622, | |
| "learning_rate": 8.680206308871952e-07, | |
| "loss": 0.1091, | |
| "step": 12590 | |
| }, | |
| { | |
| "epoch": 18.91978207777569, | |
| "grad_norm": 0.7069427967071533, | |
| "learning_rate": 8.43843875441952e-07, | |
| "loss": 0.1242, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 18.934811196693595, | |
| "grad_norm": 0.6860793232917786, | |
| "learning_rate": 8.2000572167798e-07, | |
| "loss": 0.1245, | |
| "step": 12610 | |
| }, | |
| { | |
| "epoch": 18.949840315611496, | |
| "grad_norm": 0.6952442526817322, | |
| "learning_rate": 7.965063337993017e-07, | |
| "loss": 0.1194, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 18.9648694345294, | |
| "grad_norm": 0.7195196747779846, | |
| "learning_rate": 7.733458736764398e-07, | |
| "loss": 0.1266, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 18.979898553447303, | |
| "grad_norm": 0.685310959815979, | |
| "learning_rate": 7.505245008452788e-07, | |
| "loss": 0.1153, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 18.994927672365208, | |
| "grad_norm": 0.6967130899429321, | |
| "learning_rate": 7.280423725059604e-07, | |
| "loss": 0.1331, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 19.00901747135074, | |
| "grad_norm": 0.5955845713615417, | |
| "learning_rate": 7.058996435218346e-07, | |
| "loss": 0.1032, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 19.024046590268647, | |
| "grad_norm": 0.6826702356338501, | |
| "learning_rate": 6.840964664183436e-07, | |
| "loss": 0.1116, | |
| "step": 12670 | |
| }, | |
| { | |
| "epoch": 19.039075709186548, | |
| "grad_norm": 0.6504730582237244, | |
| "learning_rate": 6.626329913820339e-07, | |
| "loss": 0.1218, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 19.054104828104453, | |
| "grad_norm": 0.6690040230751038, | |
| "learning_rate": 6.415093662594629e-07, | |
| "loss": 0.1218, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 19.069133947022355, | |
| "grad_norm": 0.7162594199180603, | |
| "learning_rate": 6.207257365562047e-07, | |
| "loss": 0.1148, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 19.08416306594026, | |
| "grad_norm": 0.6570801734924316, | |
| "learning_rate": 6.00282245435857e-07, | |
| "loss": 0.1138, | |
| "step": 12710 | |
| }, | |
| { | |
| "epoch": 19.09919218485816, | |
| "grad_norm": 0.6705721616744995, | |
| "learning_rate": 5.80179033719036e-07, | |
| "loss": 0.1241, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 19.114221303776066, | |
| "grad_norm": 0.7230423092842102, | |
| "learning_rate": 5.604162398824275e-07, | |
| "loss": 0.1122, | |
| "step": 12730 | |
| }, | |
| { | |
| "epoch": 19.12925042269397, | |
| "grad_norm": 0.6463306546211243, | |
| "learning_rate": 5.409940000578206e-07, | |
| "loss": 0.1085, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 19.144279541611873, | |
| "grad_norm": 0.7528629302978516, | |
| "learning_rate": 5.219124480311532e-07, | |
| "loss": 0.1186, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 19.159308660529778, | |
| "grad_norm": 1.4888911247253418, | |
| "learning_rate": 5.031717152416238e-07, | |
| "loss": 0.1158, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 19.17433777944768, | |
| "grad_norm": 0.6441943645477295, | |
| "learning_rate": 4.847719307807752e-07, | |
| "loss": 0.1197, | |
| "step": 12770 | |
| }, | |
| { | |
| "epoch": 19.189366898365584, | |
| "grad_norm": 0.6627583503723145, | |
| "learning_rate": 4.6671322139158477e-07, | |
| "loss": 0.1168, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 19.204396017283486, | |
| "grad_norm": 0.6732495427131653, | |
| "learning_rate": 4.4899571146761467e-07, | |
| "loss": 0.1104, | |
| "step": 12790 | |
| }, | |
| { | |
| "epoch": 19.21942513620139, | |
| "grad_norm": 0.6743932366371155, | |
| "learning_rate": 4.3161952305215136e-07, | |
| "loss": 0.1185, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 19.234454255119292, | |
| "grad_norm": 0.7038917541503906, | |
| "learning_rate": 4.145847758373511e-07, | |
| "loss": 0.1216, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 19.249483374037197, | |
| "grad_norm": 0.6505002975463867, | |
| "learning_rate": 3.9789158716343475e-07, | |
| "loss": 0.1247, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 19.2645124929551, | |
| "grad_norm": 0.6234051585197449, | |
| "learning_rate": 3.815400720178719e-07, | |
| "loss": 0.1122, | |
| "step": 12830 | |
| }, | |
| { | |
| "epoch": 19.279541611873004, | |
| "grad_norm": 0.6669496297836304, | |
| "learning_rate": 3.6553034303457577e-07, | |
| "loss": 0.1127, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 19.29457073079091, | |
| "grad_norm": 0.7005789279937744, | |
| "learning_rate": 3.49862510493143e-07, | |
| "loss": 0.1135, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 19.30959984970881, | |
| "grad_norm": 0.7209417223930359, | |
| "learning_rate": 3.3453668231809286e-07, | |
| "loss": 0.115, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 19.324628968626715, | |
| "grad_norm": 0.670708179473877, | |
| "learning_rate": 3.1955296407811807e-07, | |
| "loss": 0.1147, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 19.339658087544617, | |
| "grad_norm": 0.6531425714492798, | |
| "learning_rate": 3.0491145898536856e-07, | |
| "loss": 0.1153, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 19.354687206462522, | |
| "grad_norm": 0.6748098134994507, | |
| "learning_rate": 2.9061226789471873e-07, | |
| "loss": 0.1098, | |
| "step": 12890 | |
| }, | |
| { | |
| "epoch": 19.369716325380423, | |
| "grad_norm": 0.7407058477401733, | |
| "learning_rate": 2.7665548930308484e-07, | |
| "loss": 0.1186, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 19.38474544429833, | |
| "grad_norm": 0.7474448680877686, | |
| "learning_rate": 2.6304121934876966e-07, | |
| "loss": 0.1167, | |
| "step": 12910 | |
| }, | |
| { | |
| "epoch": 19.39977456321623, | |
| "grad_norm": 0.710455596446991, | |
| "learning_rate": 2.497695518107579e-07, | |
| "loss": 0.1256, | |
| "step": 12920 | |
| }, | |
| { | |
| "epoch": 19.414803682134135, | |
| "grad_norm": 0.674196183681488, | |
| "learning_rate": 2.3684057810808847e-07, | |
| "loss": 0.1199, | |
| "step": 12930 | |
| }, | |
| { | |
| "epoch": 19.42983280105204, | |
| "grad_norm": 0.6443490982055664, | |
| "learning_rate": 2.2425438729924419e-07, | |
| "loss": 0.1134, | |
| "step": 12940 | |
| }, | |
| { | |
| "epoch": 19.44486191996994, | |
| "grad_norm": 0.6689858436584473, | |
| "learning_rate": 2.120110660815078e-07, | |
| "loss": 0.1213, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 19.459891038887847, | |
| "grad_norm": 0.6597970128059387, | |
| "learning_rate": 2.0011069879038447e-07, | |
| "loss": 0.127, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 19.474920157805748, | |
| "grad_norm": 0.6606748104095459, | |
| "learning_rate": 1.8855336739901363e-07, | |
| "loss": 0.1184, | |
| "step": 12970 | |
| }, | |
| { | |
| "epoch": 19.489949276723653, | |
| "grad_norm": 0.6770042181015015, | |
| "learning_rate": 1.773391515176026e-07, | |
| "loss": 0.1199, | |
| "step": 12980 | |
| }, | |
| { | |
| "epoch": 19.504978395641555, | |
| "grad_norm": 0.6483029723167419, | |
| "learning_rate": 1.6646812839287706e-07, | |
| "loss": 0.1094, | |
| "step": 12990 | |
| }, | |
| { | |
| "epoch": 19.52000751455946, | |
| "grad_norm": 0.6776772737503052, | |
| "learning_rate": 1.5594037290755925e-07, | |
| "loss": 0.115, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 19.53503663347736, | |
| "grad_norm": 0.6734815835952759, | |
| "learning_rate": 1.4575595757985173e-07, | |
| "loss": 0.1176, | |
| "step": 13010 | |
| }, | |
| { | |
| "epoch": 19.550065752395266, | |
| "grad_norm": 0.671363353729248, | |
| "learning_rate": 1.3591495256291554e-07, | |
| "loss": 0.1158, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 19.565094871313168, | |
| "grad_norm": 0.7096564769744873, | |
| "learning_rate": 1.2641742564441506e-07, | |
| "loss": 0.1178, | |
| "step": 13030 | |
| }, | |
| { | |
| "epoch": 19.580123990231073, | |
| "grad_norm": 0.7112547755241394, | |
| "learning_rate": 1.1726344224603502e-07, | |
| "loss": 0.1186, | |
| "step": 13040 | |
| }, | |
| { | |
| "epoch": 19.595153109148978, | |
| "grad_norm": 0.9371479153633118, | |
| "learning_rate": 1.0845306542303645e-07, | |
| "loss": 0.1158, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 19.61018222806688, | |
| "grad_norm": 0.666856050491333, | |
| "learning_rate": 9.998635586381255e-08, | |
| "loss": 0.1151, | |
| "step": 13060 | |
| }, | |
| { | |
| "epoch": 19.625211346984784, | |
| "grad_norm": 0.6255350708961487, | |
| "learning_rate": 9.186337188949457e-08, | |
| "loss": 0.1287, | |
| "step": 13070 | |
| }, | |
| { | |
| "epoch": 19.640240465902686, | |
| "grad_norm": 0.6888746619224548, | |
| "learning_rate": 8.408416945351328e-08, | |
| "loss": 0.119, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 19.65526958482059, | |
| "grad_norm": 0.6902468204498291, | |
| "learning_rate": 7.664880214123815e-08, | |
| "loss": 0.1199, | |
| "step": 13090 | |
| }, | |
| { | |
| "epoch": 19.670298703738492, | |
| "grad_norm": 0.6694928407669067, | |
| "learning_rate": 6.95573211696221e-08, | |
| "loss": 0.1262, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 19.685327822656397, | |
| "grad_norm": 0.6304376125335693, | |
| "learning_rate": 6.280977538681288e-08, | |
| "loss": 0.1196, | |
| "step": 13110 | |
| }, | |
| { | |
| "epoch": 19.7003569415743, | |
| "grad_norm": 0.7109536528587341, | |
| "learning_rate": 5.64062112718311e-08, | |
| "loss": 0.1158, | |
| "step": 13120 | |
| }, | |
| { | |
| "epoch": 19.715386060492204, | |
| "grad_norm": 0.6978461146354675, | |
| "learning_rate": 5.0346672934270534e-08, | |
| "loss": 0.1139, | |
| "step": 13130 | |
| }, | |
| { | |
| "epoch": 19.73041517941011, | |
| "grad_norm": 0.6379060745239258, | |
| "learning_rate": 4.4631202113953886e-08, | |
| "loss": 0.1157, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 19.74544429832801, | |
| "grad_norm": 0.6268938779830933, | |
| "learning_rate": 3.925983818069412e-08, | |
| "loss": 0.1086, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 19.760473417245915, | |
| "grad_norm": 0.7297201156616211, | |
| "learning_rate": 3.4232618133978044e-08, | |
| "loss": 0.1132, | |
| "step": 13160 | |
| }, | |
| { | |
| "epoch": 19.775502536163817, | |
| "grad_norm": 0.6648380756378174, | |
| "learning_rate": 2.9549576602733164e-08, | |
| "loss": 0.1124, | |
| "step": 13170 | |
| }, | |
| { | |
| "epoch": 19.790531655081722, | |
| "grad_norm": 0.7137235999107361, | |
| "learning_rate": 2.5210745845100082e-08, | |
| "loss": 0.1165, | |
| "step": 13180 | |
| }, | |
| { | |
| "epoch": 19.805560773999623, | |
| "grad_norm": 0.6801294684410095, | |
| "learning_rate": 2.1216155748182696e-08, | |
| "loss": 0.1155, | |
| "step": 13190 | |
| }, | |
| { | |
| "epoch": 19.82058989291753, | |
| "grad_norm": 0.719840407371521, | |
| "learning_rate": 1.756583382785948e-08, | |
| "loss": 0.1261, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 19.83561901183543, | |
| "grad_norm": 0.6777321696281433, | |
| "learning_rate": 1.4259805228594713e-08, | |
| "loss": 0.1172, | |
| "step": 13210 | |
| }, | |
| { | |
| "epoch": 19.850648130753335, | |
| "grad_norm": 0.6588504314422607, | |
| "learning_rate": 1.129809272326643e-08, | |
| "loss": 0.1151, | |
| "step": 13220 | |
| }, | |
| { | |
| "epoch": 19.865677249671236, | |
| "grad_norm": 0.6828821897506714, | |
| "learning_rate": 8.680716712988756e-09, | |
| "loss": 0.1176, | |
| "step": 13230 | |
| }, | |
| { | |
| "epoch": 19.88070636858914, | |
| "grad_norm": 0.7881568670272827, | |
| "learning_rate": 6.40769522700091e-09, | |
| "loss": 0.1212, | |
| "step": 13240 | |
| }, | |
| { | |
| "epoch": 19.895735487507046, | |
| "grad_norm": 0.6444976329803467, | |
| "learning_rate": 4.479043922528403e-09, | |
| "loss": 0.1141, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 19.910764606424948, | |
| "grad_norm": 0.6598045825958252, | |
| "learning_rate": 2.894776084672035e-09, | |
| "loss": 0.1181, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 19.925793725342853, | |
| "grad_norm": 0.6139656901359558, | |
| "learning_rate": 1.654902626324617e-09, | |
| "loss": 0.1222, | |
| "step": 13270 | |
| }, | |
| { | |
| "epoch": 19.940822844260754, | |
| "grad_norm": 0.6389946341514587, | |
| "learning_rate": 7.594320880821571e-10, | |
| "loss": 0.1218, | |
| "step": 13280 | |
| }, | |
| { | |
| "epoch": 19.95585196317866, | |
| "grad_norm": 0.6922657489776611, | |
| "learning_rate": 2.0837063821055326e-10, | |
| "loss": 0.1139, | |
| "step": 13290 | |
| }, | |
| { | |
| "epoch": 19.97088108209656, | |
| "grad_norm": 0.6712486743927002, | |
| "learning_rate": 1.7220725789801607e-12, | |
| "loss": 0.1172, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 19.97088108209656, | |
| "step": 13300, | |
| "total_flos": 1.732273085924172e+20, | |
| "train_loss": 0.5264352306477109, | |
| "train_runtime": 198006.0463, | |
| "train_samples_per_second": 4.301, | |
| "train_steps_per_second": 0.067 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 13300, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.732273085924172e+20, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |