| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9998204829009962, | |
| "eval_steps": 500, | |
| "global_step": 5570, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003590341980073602, | |
| "grad_norm": 6.1201090812683105, | |
| "learning_rate": 1.7953321364452425e-06, | |
| "loss": 2.9926, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.007180683960147204, | |
| "grad_norm": 4.0763983726501465, | |
| "learning_rate": 3.590664272890485e-06, | |
| "loss": 2.7866, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.010771025940220806, | |
| "grad_norm": 3.385845184326172, | |
| "learning_rate": 5.385996409335727e-06, | |
| "loss": 2.0378, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.014361367920294408, | |
| "grad_norm": 5.195909023284912, | |
| "learning_rate": 7.18132854578097e-06, | |
| "loss": 1.2251, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01795170990036801, | |
| "grad_norm": 2.219606637954712, | |
| "learning_rate": 8.976660682226211e-06, | |
| "loss": 0.6834, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02154205188044161, | |
| "grad_norm": 16.839906692504883, | |
| "learning_rate": 1.0771992818671454e-05, | |
| "loss": 0.4754, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.025132393860515214, | |
| "grad_norm": 25.55668067932129, | |
| "learning_rate": 1.2567324955116697e-05, | |
| "loss": 0.3818, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.028722735840588817, | |
| "grad_norm": 1.359479546546936, | |
| "learning_rate": 1.436265709156194e-05, | |
| "loss": 0.3797, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.032313077820662416, | |
| "grad_norm": 1.2728756666183472, | |
| "learning_rate": 1.615798922800718e-05, | |
| "loss": 0.3712, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03590341980073602, | |
| "grad_norm": 1.9393813610076904, | |
| "learning_rate": 1.7953321364452423e-05, | |
| "loss": 0.3564, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03949376178080962, | |
| "grad_norm": 1.4643720388412476, | |
| "learning_rate": 1.9748653500897668e-05, | |
| "loss": 0.3438, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04308410376088322, | |
| "grad_norm": 1.4880571365356445, | |
| "learning_rate": 2.154398563734291e-05, | |
| "loss": 0.321, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.046674445740956826, | |
| "grad_norm": 1.239957571029663, | |
| "learning_rate": 2.3339317773788153e-05, | |
| "loss": 0.319, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05026478772103043, | |
| "grad_norm": 1.2627112865447998, | |
| "learning_rate": 2.5134649910233395e-05, | |
| "loss": 0.3128, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.05385512970110403, | |
| "grad_norm": 1.1520243883132935, | |
| "learning_rate": 2.6929982046678636e-05, | |
| "loss": 0.31, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05744547168117763, | |
| "grad_norm": 1.8554497957229614, | |
| "learning_rate": 2.872531418312388e-05, | |
| "loss": 0.3167, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.061035813661251236, | |
| "grad_norm": 1.8501205444335938, | |
| "learning_rate": 3.0520646319569125e-05, | |
| "loss": 0.3177, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.06462615564132483, | |
| "grad_norm": 1.249617099761963, | |
| "learning_rate": 3.231597845601436e-05, | |
| "loss": 0.3081, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06821649762139843, | |
| "grad_norm": 1.1702481508255005, | |
| "learning_rate": 3.411131059245961e-05, | |
| "loss": 0.3122, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.07180683960147204, | |
| "grad_norm": 1.0217711925506592, | |
| "learning_rate": 3.5906642728904846e-05, | |
| "loss": 0.3047, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07539718158154564, | |
| "grad_norm": 0.8885968923568726, | |
| "learning_rate": 3.770197486535009e-05, | |
| "loss": 0.3042, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07898752356161924, | |
| "grad_norm": 1.0739161968231201, | |
| "learning_rate": 3.9497307001795335e-05, | |
| "loss": 0.2957, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.08257786554169284, | |
| "grad_norm": 1.963072419166565, | |
| "learning_rate": 4.129263913824057e-05, | |
| "loss": 0.2967, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08616820752176645, | |
| "grad_norm": 0.9546407461166382, | |
| "learning_rate": 4.308797127468582e-05, | |
| "loss": 0.2834, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08975854950184005, | |
| "grad_norm": 1.82941734790802, | |
| "learning_rate": 4.488330341113106e-05, | |
| "loss": 0.2864, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09334889148191365, | |
| "grad_norm": 1.4494279623031616, | |
| "learning_rate": 4.667863554757631e-05, | |
| "loss": 0.2891, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.09693923346198725, | |
| "grad_norm": 1.195784330368042, | |
| "learning_rate": 4.847396768402155e-05, | |
| "loss": 0.2904, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.10052957544206086, | |
| "grad_norm": 1.0053528547286987, | |
| "learning_rate": 5.026929982046679e-05, | |
| "loss": 0.2804, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.10411991742213446, | |
| "grad_norm": 4.148128986358643, | |
| "learning_rate": 5.2064631956912034e-05, | |
| "loss": 0.3165, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.10771025940220806, | |
| "grad_norm": 1.4303346872329712, | |
| "learning_rate": 5.385996409335727e-05, | |
| "loss": 0.2747, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.11130060138228166, | |
| "grad_norm": 1.128341794013977, | |
| "learning_rate": 5.565529622980251e-05, | |
| "loss": 0.297, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.11489094336235527, | |
| "grad_norm": 1.2280890941619873, | |
| "learning_rate": 5.745062836624776e-05, | |
| "loss": 0.2821, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.11848128534242887, | |
| "grad_norm": 1.4685401916503906, | |
| "learning_rate": 5.9245960502693e-05, | |
| "loss": 0.2815, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.12207162732250247, | |
| "grad_norm": 2.4324777126312256, | |
| "learning_rate": 6.104129263913825e-05, | |
| "loss": 0.291, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.12566196930257606, | |
| "grad_norm": 1.2875359058380127, | |
| "learning_rate": 6.283662477558349e-05, | |
| "loss": 0.2852, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.12925231128264966, | |
| "grad_norm": 2.257322072982788, | |
| "learning_rate": 6.463195691202873e-05, | |
| "loss": 0.2804, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.13284265326272326, | |
| "grad_norm": 1.3770567178726196, | |
| "learning_rate": 6.642728904847398e-05, | |
| "loss": 0.2873, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.13643299524279687, | |
| "grad_norm": 1.6921864748001099, | |
| "learning_rate": 6.822262118491922e-05, | |
| "loss": 0.2974, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.14002333722287047, | |
| "grad_norm": 0.9520618915557861, | |
| "learning_rate": 7.001795332136445e-05, | |
| "loss": 0.2939, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.14361367920294407, | |
| "grad_norm": 0.812728762626648, | |
| "learning_rate": 7.181328545780969e-05, | |
| "loss": 0.2702, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.14720402118301767, | |
| "grad_norm": 1.7924541234970093, | |
| "learning_rate": 7.360861759425493e-05, | |
| "loss": 0.2969, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.15079436316309128, | |
| "grad_norm": 2.439558982849121, | |
| "learning_rate": 7.540394973070018e-05, | |
| "loss": 0.2893, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.15438470514316488, | |
| "grad_norm": 0.8057828545570374, | |
| "learning_rate": 7.719928186714542e-05, | |
| "loss": 0.2808, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.15797504712323848, | |
| "grad_norm": 1.2622177600860596, | |
| "learning_rate": 7.899461400359067e-05, | |
| "loss": 0.282, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.16156538910331208, | |
| "grad_norm": 1.1095036268234253, | |
| "learning_rate": 8.078994614003591e-05, | |
| "loss": 0.2691, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1651557310833857, | |
| "grad_norm": 0.7493880987167358, | |
| "learning_rate": 8.258527827648115e-05, | |
| "loss": 0.2748, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.1687460730634593, | |
| "grad_norm": 0.7199195623397827, | |
| "learning_rate": 8.43806104129264e-05, | |
| "loss": 0.2876, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.1723364150435329, | |
| "grad_norm": 0.9257749915122986, | |
| "learning_rate": 8.617594254937164e-05, | |
| "loss": 0.2801, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.1759267570236065, | |
| "grad_norm": 0.9219655394554138, | |
| "learning_rate": 8.797127468581689e-05, | |
| "loss": 0.2717, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.1795170990036801, | |
| "grad_norm": 1.5916101932525635, | |
| "learning_rate": 8.976660682226212e-05, | |
| "loss": 0.275, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1831074409837537, | |
| "grad_norm": 1.1832544803619385, | |
| "learning_rate": 9.156193895870736e-05, | |
| "loss": 0.2774, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1866977829638273, | |
| "grad_norm": 0.8959478735923767, | |
| "learning_rate": 9.335727109515261e-05, | |
| "loss": 0.3003, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.1902881249439009, | |
| "grad_norm": 0.7720569968223572, | |
| "learning_rate": 9.515260323159785e-05, | |
| "loss": 0.2771, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.1938784669239745, | |
| "grad_norm": 1.263458013534546, | |
| "learning_rate": 9.69479353680431e-05, | |
| "loss": 0.2737, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1974688089040481, | |
| "grad_norm": 1.6316909790039062, | |
| "learning_rate": 9.874326750448834e-05, | |
| "loss": 0.2899, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.2010591508841217, | |
| "grad_norm": 0.8948745131492615, | |
| "learning_rate": 9.999991163368873e-05, | |
| "loss": 0.2703, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.20464949286419531, | |
| "grad_norm": 1.680094599723816, | |
| "learning_rate": 9.999834068573299e-05, | |
| "loss": 0.2828, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.20823983484426892, | |
| "grad_norm": 1.1262023448944092, | |
| "learning_rate": 9.999480611298721e-05, | |
| "loss": 0.2651, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.21183017682434252, | |
| "grad_norm": 1.2514327764511108, | |
| "learning_rate": 9.998930805426751e-05, | |
| "loss": 0.2828, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.21542051880441612, | |
| "grad_norm": 0.8650713562965393, | |
| "learning_rate": 9.998184672550354e-05, | |
| "loss": 0.2641, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.21901086078448972, | |
| "grad_norm": 1.3188605308532715, | |
| "learning_rate": 9.997242241973004e-05, | |
| "loss": 0.2791, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.22260120276456333, | |
| "grad_norm": 1.677878737449646, | |
| "learning_rate": 9.996103550707527e-05, | |
| "loss": 0.2803, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.22619154474463693, | |
| "grad_norm": 1.9317690134048462, | |
| "learning_rate": 9.994768643474658e-05, | |
| "loss": 0.263, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.22978188672471053, | |
| "grad_norm": 0.8656140565872192, | |
| "learning_rate": 9.993237572701274e-05, | |
| "loss": 0.2723, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.23337222870478413, | |
| "grad_norm": 0.7631008625030518, | |
| "learning_rate": 9.991510398518341e-05, | |
| "loss": 0.2958, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.23696257068485774, | |
| "grad_norm": 0.6852580308914185, | |
| "learning_rate": 9.989587188758552e-05, | |
| "loss": 0.2612, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.24055291266493134, | |
| "grad_norm": 0.6097802519798279, | |
| "learning_rate": 9.987468018953661e-05, | |
| "loss": 0.2607, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.24414325464500494, | |
| "grad_norm": 1.254186987876892, | |
| "learning_rate": 9.985152972331516e-05, | |
| "loss": 0.2662, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.24773359662507854, | |
| "grad_norm": 0.8868479132652283, | |
| "learning_rate": 9.982642139812793e-05, | |
| "loss": 0.2705, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2513239386051521, | |
| "grad_norm": 1.5867512226104736, | |
| "learning_rate": 9.979935620007424e-05, | |
| "loss": 0.2735, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.25491428058522575, | |
| "grad_norm": 0.7384280562400818, | |
| "learning_rate": 9.977033519210725e-05, | |
| "loss": 0.2676, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.2585046225652993, | |
| "grad_norm": 0.7617084383964539, | |
| "learning_rate": 9.97393595139922e-05, | |
| "loss": 0.2655, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.26209496454537295, | |
| "grad_norm": 0.6475211381912231, | |
| "learning_rate": 9.970643038226166e-05, | |
| "loss": 0.2629, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.26568530652544653, | |
| "grad_norm": 1.3059916496276855, | |
| "learning_rate": 9.967154909016772e-05, | |
| "loss": 0.2548, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.26927564850552016, | |
| "grad_norm": 1.1138116121292114, | |
| "learning_rate": 9.963471700763123e-05, | |
| "loss": 0.2525, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.27286599048559373, | |
| "grad_norm": 1.0550082921981812, | |
| "learning_rate": 9.959593558118803e-05, | |
| "loss": 0.2622, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.27645633246566736, | |
| "grad_norm": 0.8017902374267578, | |
| "learning_rate": 9.955520633393205e-05, | |
| "loss": 0.2649, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.28004667444574094, | |
| "grad_norm": 1.235143780708313, | |
| "learning_rate": 9.951253086545558e-05, | |
| "loss": 0.2747, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.28363701642581457, | |
| "grad_norm": 0.7427018284797668, | |
| "learning_rate": 9.946791085178639e-05, | |
| "loss": 0.242, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.28722735840588814, | |
| "grad_norm": 0.6972371935844421, | |
| "learning_rate": 9.942134804532193e-05, | |
| "loss": 0.2423, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2908177003859618, | |
| "grad_norm": 0.9071277976036072, | |
| "learning_rate": 9.937284427476052e-05, | |
| "loss": 0.2425, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.29440804236603535, | |
| "grad_norm": 0.8345310688018799, | |
| "learning_rate": 9.932240144502952e-05, | |
| "loss": 0.2864, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.297998384346109, | |
| "grad_norm": 1.1392581462860107, | |
| "learning_rate": 9.927002153721044e-05, | |
| "loss": 0.2366, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.30158872632618255, | |
| "grad_norm": 0.9356684684753418, | |
| "learning_rate": 9.921570660846131e-05, | |
| "loss": 0.2464, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3051790683062562, | |
| "grad_norm": 1.5248229503631592, | |
| "learning_rate": 9.915945879193571e-05, | |
| "loss": 0.2809, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.30876941028632976, | |
| "grad_norm": 1.0663933753967285, | |
| "learning_rate": 9.91012802966991e-05, | |
| "loss": 0.2779, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3123597522664034, | |
| "grad_norm": 0.9292562007904053, | |
| "learning_rate": 9.904117340764201e-05, | |
| "loss": 0.2465, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.31595009424647696, | |
| "grad_norm": 0.7365911602973938, | |
| "learning_rate": 9.897914048539032e-05, | |
| "loss": 0.2688, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.3195404362265506, | |
| "grad_norm": 1.0190156698226929, | |
| "learning_rate": 9.891518396621258e-05, | |
| "loss": 0.2471, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.32313077820662417, | |
| "grad_norm": 1.167611837387085, | |
| "learning_rate": 9.884930636192426e-05, | |
| "loss": 0.2468, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3267211201866978, | |
| "grad_norm": 1.1509454250335693, | |
| "learning_rate": 9.878151025978918e-05, | |
| "loss": 0.2528, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.3303114621667714, | |
| "grad_norm": 1.0654162168502808, | |
| "learning_rate": 9.871179832241781e-05, | |
| "loss": 0.2669, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.333901804146845, | |
| "grad_norm": 0.9040902853012085, | |
| "learning_rate": 9.86401732876628e-05, | |
| "loss": 0.2513, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.3374921461269186, | |
| "grad_norm": 2.8603482246398926, | |
| "learning_rate": 9.856663796851137e-05, | |
| "loss": 0.2526, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.3410824881069922, | |
| "grad_norm": 0.7283102869987488, | |
| "learning_rate": 9.849119525297488e-05, | |
| "loss": 0.2565, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3446728300870658, | |
| "grad_norm": 1.1231544017791748, | |
| "learning_rate": 9.841384810397538e-05, | |
| "loss": 0.2591, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.3482631720671394, | |
| "grad_norm": 1.3341351747512817, | |
| "learning_rate": 9.833459955922926e-05, | |
| "loss": 0.2426, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.351853514047213, | |
| "grad_norm": 0.7382979393005371, | |
| "learning_rate": 9.825345273112796e-05, | |
| "loss": 0.2404, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3554438560272866, | |
| "grad_norm": 0.9196600914001465, | |
| "learning_rate": 9.817041080661571e-05, | |
| "loss": 0.269, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.3590341980073602, | |
| "grad_norm": 4.254228115081787, | |
| "learning_rate": 9.808547704706437e-05, | |
| "loss": 0.2498, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3626245399874338, | |
| "grad_norm": 0.6999326348304749, | |
| "learning_rate": 9.799865478814535e-05, | |
| "loss": 0.242, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.3662148819675074, | |
| "grad_norm": 1.5552287101745605, | |
| "learning_rate": 9.790994743969864e-05, | |
| "loss": 0.2663, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.36980522394758103, | |
| "grad_norm": 0.6971444487571716, | |
| "learning_rate": 9.781935848559878e-05, | |
| "loss": 0.2549, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.3733955659276546, | |
| "grad_norm": 1.180908441543579, | |
| "learning_rate": 9.772689148361817e-05, | |
| "loss": 0.2313, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.37698590790772823, | |
| "grad_norm": 0.633343517780304, | |
| "learning_rate": 9.763255006528731e-05, | |
| "loss": 0.2395, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.3805762498878018, | |
| "grad_norm": 0.9181081056594849, | |
| "learning_rate": 9.753633793575206e-05, | |
| "loss": 0.2512, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.38416659186787544, | |
| "grad_norm": 1.1254559755325317, | |
| "learning_rate": 9.743825887362832e-05, | |
| "loss": 0.2467, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.387756933847949, | |
| "grad_norm": 0.8145197629928589, | |
| "learning_rate": 9.733831673085344e-05, | |
| "loss": 0.2421, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.39134727582802264, | |
| "grad_norm": 0.5483050346374512, | |
| "learning_rate": 9.723651543253509e-05, | |
| "loss": 0.2578, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.3949376178080962, | |
| "grad_norm": 0.7891978621482849, | |
| "learning_rate": 9.713285897679699e-05, | |
| "loss": 0.2339, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.39852795978816985, | |
| "grad_norm": 0.6310613751411438, | |
| "learning_rate": 9.702735143462198e-05, | |
| "loss": 0.2379, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.4021183017682434, | |
| "grad_norm": 0.8631925582885742, | |
| "learning_rate": 9.691999694969208e-05, | |
| "loss": 0.2413, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.40570864374831705, | |
| "grad_norm": 0.7224175930023193, | |
| "learning_rate": 9.681079973822576e-05, | |
| "loss": 0.2343, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.40929898572839063, | |
| "grad_norm": 0.8189213871955872, | |
| "learning_rate": 9.669976408881238e-05, | |
| "loss": 0.2513, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.4128893277084642, | |
| "grad_norm": 0.8129417300224304, | |
| "learning_rate": 9.658689436224373e-05, | |
| "loss": 0.2547, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.41647966968853783, | |
| "grad_norm": 1.1440197229385376, | |
| "learning_rate": 9.647219499134277e-05, | |
| "loss": 0.2427, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.4200700116686114, | |
| "grad_norm": 0.9682267308235168, | |
| "learning_rate": 9.635567048078958e-05, | |
| "loss": 0.2411, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.42366035364868504, | |
| "grad_norm": 0.7513495683670044, | |
| "learning_rate": 9.623732540694437e-05, | |
| "loss": 0.252, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.4272506956287586, | |
| "grad_norm": 3.1498029232025146, | |
| "learning_rate": 9.61171644176678e-05, | |
| "loss": 0.2486, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.43084103760883224, | |
| "grad_norm": 0.6250784397125244, | |
| "learning_rate": 9.599519223213842e-05, | |
| "loss": 0.2459, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4344313795889058, | |
| "grad_norm": 0.548052966594696, | |
| "learning_rate": 9.587141364066736e-05, | |
| "loss": 0.2334, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.43802172156897945, | |
| "grad_norm": 0.6549167037010193, | |
| "learning_rate": 9.574583350451016e-05, | |
| "loss": 0.2399, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.441612063549053, | |
| "grad_norm": 0.7177796363830566, | |
| "learning_rate": 9.561845675567586e-05, | |
| "loss": 0.2574, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.44520240552912665, | |
| "grad_norm": 1.0265281200408936, | |
| "learning_rate": 9.548928839673334e-05, | |
| "loss": 0.2285, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.44879274750920023, | |
| "grad_norm": 1.3221251964569092, | |
| "learning_rate": 9.535833350061473e-05, | |
| "loss": 0.2293, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.45238308948927386, | |
| "grad_norm": 0.9542430639266968, | |
| "learning_rate": 9.522559721041636e-05, | |
| "loss": 0.2367, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.45597343146934743, | |
| "grad_norm": 2.0089797973632812, | |
| "learning_rate": 9.509108473919662e-05, | |
| "loss": 0.2166, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.45956377344942106, | |
| "grad_norm": 1.2323672771453857, | |
| "learning_rate": 9.495480136977127e-05, | |
| "loss": 0.2253, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.46315411542949464, | |
| "grad_norm": 1.155745506286621, | |
| "learning_rate": 9.4816752454506e-05, | |
| "loss": 0.2236, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.46674445740956827, | |
| "grad_norm": 0.5866098403930664, | |
| "learning_rate": 9.46769434151062e-05, | |
| "loss": 0.2346, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.47033479938964184, | |
| "grad_norm": 0.8677975535392761, | |
| "learning_rate": 9.4535379742404e-05, | |
| "loss": 0.2229, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.4739251413697155, | |
| "grad_norm": 0.8805405497550964, | |
| "learning_rate": 9.439206699614263e-05, | |
| "loss": 0.2279, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.47751548334978905, | |
| "grad_norm": 0.5903385877609253, | |
| "learning_rate": 9.424701080475811e-05, | |
| "loss": 0.2454, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.4811058253298627, | |
| "grad_norm": 0.9364457726478577, | |
| "learning_rate": 9.410021686515815e-05, | |
| "loss": 0.2454, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.48469616730993625, | |
| "grad_norm": 1.4409586191177368, | |
| "learning_rate": 9.39516909424985e-05, | |
| "loss": 0.2417, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.4882865092900099, | |
| "grad_norm": 0.705747663974762, | |
| "learning_rate": 9.380143886995636e-05, | |
| "loss": 0.2253, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.49187685127008346, | |
| "grad_norm": 1.2557168006896973, | |
| "learning_rate": 9.364946654850148e-05, | |
| "loss": 0.2332, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.4954671932501571, | |
| "grad_norm": 1.4732472896575928, | |
| "learning_rate": 9.349577994666427e-05, | |
| "loss": 0.2202, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.49905753523023066, | |
| "grad_norm": 1.1212490797042847, | |
| "learning_rate": 9.33403851003015e-05, | |
| "loss": 0.2064, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.5026478772103042, | |
| "grad_norm": 0.825175404548645, | |
| "learning_rate": 9.31832881123591e-05, | |
| "loss": 0.2148, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5062382191903779, | |
| "grad_norm": 0.8229523301124573, | |
| "learning_rate": 9.302449515263268e-05, | |
| "loss": 0.2307, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.5098285611704515, | |
| "grad_norm": 0.8145741820335388, | |
| "learning_rate": 9.286401245752501e-05, | |
| "loss": 0.2405, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.5134189031505251, | |
| "grad_norm": 0.7511823177337646, | |
| "learning_rate": 9.270184632980121e-05, | |
| "loss": 0.2311, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5170092451305986, | |
| "grad_norm": 0.7575204968452454, | |
| "learning_rate": 9.253800313834127e-05, | |
| "loss": 0.2068, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5205995871106723, | |
| "grad_norm": 0.6711773872375488, | |
| "learning_rate": 9.237248931788972e-05, | |
| "loss": 0.2336, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5241899290907459, | |
| "grad_norm": 0.7057952880859375, | |
| "learning_rate": 9.220531136880314e-05, | |
| "loss": 0.2332, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5277802710708195, | |
| "grad_norm": 0.7404478788375854, | |
| "learning_rate": 9.203647585679471e-05, | |
| "loss": 0.2204, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.5313706130508931, | |
| "grad_norm": 0.6271808743476868, | |
| "learning_rate": 9.186598941267642e-05, | |
| "loss": 0.207, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.5349609550309667, | |
| "grad_norm": 0.7089178562164307, | |
| "learning_rate": 9.169385873209863e-05, | |
| "loss": 0.2259, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.5385512970110403, | |
| "grad_norm": 0.949642539024353, | |
| "learning_rate": 9.152009057528714e-05, | |
| "loss": 0.229, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5421416389911139, | |
| "grad_norm": 0.7554659247398376, | |
| "learning_rate": 9.134469176677762e-05, | |
| "loss": 0.2208, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.5457319809711875, | |
| "grad_norm": 0.713874340057373, | |
| "learning_rate": 9.116766919514765e-05, | |
| "loss": 0.2177, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.5493223229512612, | |
| "grad_norm": 0.6753556728363037, | |
| "learning_rate": 9.098902981274615e-05, | |
| "loss": 0.2202, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.5529126649313347, | |
| "grad_norm": 1.2491189241409302, | |
| "learning_rate": 9.080878063542035e-05, | |
| "loss": 0.2118, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.5565030069114083, | |
| "grad_norm": 0.6264563798904419, | |
| "learning_rate": 9.062692874224024e-05, | |
| "loss": 0.2211, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5600933488914819, | |
| "grad_norm": 0.4661034941673279, | |
| "learning_rate": 9.044348127522054e-05, | |
| "loss": 0.2168, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.5636836908715556, | |
| "grad_norm": 0.6062325835227966, | |
| "learning_rate": 9.025844543904022e-05, | |
| "loss": 0.214, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.5672740328516291, | |
| "grad_norm": 0.6374778747558594, | |
| "learning_rate": 9.007182850075956e-05, | |
| "loss": 0.2083, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.5708643748317027, | |
| "grad_norm": 1.131443738937378, | |
| "learning_rate": 8.98836377895347e-05, | |
| "loss": 0.2005, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.5744547168117763, | |
| "grad_norm": 0.6167281866073608, | |
| "learning_rate": 8.969388069632987e-05, | |
| "loss": 0.2122, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.57804505879185, | |
| "grad_norm": 0.9362030625343323, | |
| "learning_rate": 8.950256467362699e-05, | |
| "loss": 0.2275, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.5816354007719235, | |
| "grad_norm": 0.9304684996604919, | |
| "learning_rate": 8.930969723513312e-05, | |
| "loss": 0.2027, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.5852257427519971, | |
| "grad_norm": 0.62895268201828, | |
| "learning_rate": 8.911528595548533e-05, | |
| "loss": 0.2266, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.5888160847320707, | |
| "grad_norm": 1.480999231338501, | |
| "learning_rate": 8.891933846995312e-05, | |
| "loss": 0.2052, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.5924064267121444, | |
| "grad_norm": 1.3081512451171875, | |
| "learning_rate": 8.872186247413874e-05, | |
| "loss": 0.212, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.595996768692218, | |
| "grad_norm": 2.765312671661377, | |
| "learning_rate": 8.852286572367476e-05, | |
| "loss": 0.2233, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.5995871106722915, | |
| "grad_norm": 1.2033319473266602, | |
| "learning_rate": 8.832235603391958e-05, | |
| "loss": 0.2199, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.6031774526523651, | |
| "grad_norm": 1.092360496520996, | |
| "learning_rate": 8.812034127965048e-05, | |
| "loss": 0.1994, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.6067677946324388, | |
| "grad_norm": 1.0622711181640625, | |
| "learning_rate": 8.791682939475438e-05, | |
| "loss": 0.2117, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.6103581366125124, | |
| "grad_norm": 0.722064733505249, | |
| "learning_rate": 8.771182837191613e-05, | |
| "loss": 0.2219, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6139484785925859, | |
| "grad_norm": 0.602187991142273, | |
| "learning_rate": 8.750534626230475e-05, | |
| "loss": 0.2159, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.6175388205726595, | |
| "grad_norm": 0.7628340721130371, | |
| "learning_rate": 8.729739117525715e-05, | |
| "loss": 0.2088, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.6211291625527331, | |
| "grad_norm": 0.5262313485145569, | |
| "learning_rate": 8.708797127795963e-05, | |
| "loss": 0.2285, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.6247195045328068, | |
| "grad_norm": 0.6427643299102783, | |
| "learning_rate": 8.68770947951272e-05, | |
| "loss": 0.2094, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.6283098465128804, | |
| "grad_norm": 0.5874310731887817, | |
| "learning_rate": 8.666477000868046e-05, | |
| "loss": 0.2263, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.6319001884929539, | |
| "grad_norm": 0.561213493347168, | |
| "learning_rate": 8.645100525742042e-05, | |
| "loss": 0.2025, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.6354905304730275, | |
| "grad_norm": 0.7805958390235901, | |
| "learning_rate": 8.623580893670105e-05, | |
| "loss": 0.2171, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.6390808724531012, | |
| "grad_norm": 0.5806890726089478, | |
| "learning_rate": 8.601918949809937e-05, | |
| "loss": 0.2103, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.6426712144331748, | |
| "grad_norm": 0.581363320350647, | |
| "learning_rate": 8.580115544908374e-05, | |
| "loss": 0.2129, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.6462615564132483, | |
| "grad_norm": 0.4736599326133728, | |
| "learning_rate": 8.558171535267958e-05, | |
| "loss": 0.1993, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6498518983933219, | |
| "grad_norm": 0.6482508778572083, | |
| "learning_rate": 8.536087782713318e-05, | |
| "loss": 0.193, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.6534422403733956, | |
| "grad_norm": 0.7920377850532532, | |
| "learning_rate": 8.513865154557315e-05, | |
| "loss": 0.1989, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.6570325823534692, | |
| "grad_norm": 0.7527133226394653, | |
| "learning_rate": 8.491504523566985e-05, | |
| "loss": 0.215, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.6606229243335427, | |
| "grad_norm": 0.8890761733055115, | |
| "learning_rate": 8.46900676792926e-05, | |
| "loss": 0.1972, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.6642132663136163, | |
| "grad_norm": 1.100785732269287, | |
| "learning_rate": 8.44637277121647e-05, | |
| "loss": 0.1958, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.66780360829369, | |
| "grad_norm": 0.6120195388793945, | |
| "learning_rate": 8.423603422351665e-05, | |
| "loss": 0.21, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.6713939502737636, | |
| "grad_norm": 0.9138973951339722, | |
| "learning_rate": 8.400699615573671e-05, | |
| "loss": 0.2144, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.6749842922538372, | |
| "grad_norm": 0.6855999827384949, | |
| "learning_rate": 8.377662250402e-05, | |
| "loss": 0.1949, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.6785746342339107, | |
| "grad_norm": 0.8468754291534424, | |
| "learning_rate": 8.354492231601505e-05, | |
| "loss": 0.207, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.6821649762139844, | |
| "grad_norm": 0.650043249130249, | |
| "learning_rate": 8.331190469146848e-05, | |
| "loss": 0.2029, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.685755318194058, | |
| "grad_norm": 0.7149790525436401, | |
| "learning_rate": 8.307757878186767e-05, | |
| "loss": 0.1891, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.6893456601741316, | |
| "grad_norm": 0.5650553703308105, | |
| "learning_rate": 8.284195379008137e-05, | |
| "loss": 0.2034, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.6929360021542051, | |
| "grad_norm": 0.8220282793045044, | |
| "learning_rate": 8.260503896999814e-05, | |
| "loss": 0.2004, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.6965263441342788, | |
| "grad_norm": 0.9552260041236877, | |
| "learning_rate": 8.236684362616307e-05, | |
| "loss": 0.2052, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.7001166861143524, | |
| "grad_norm": 0.643084704875946, | |
| "learning_rate": 8.212737711341223e-05, | |
| "loss": 0.2072, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.703707028094426, | |
| "grad_norm": 0.6681669354438782, | |
| "learning_rate": 8.188664883650537e-05, | |
| "loss": 0.1969, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.7072973700744996, | |
| "grad_norm": 1.1286799907684326, | |
| "learning_rate": 8.164466824975647e-05, | |
| "loss": 0.1964, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.7108877120545732, | |
| "grad_norm": 0.7001319527626038, | |
| "learning_rate": 8.14014448566625e-05, | |
| "loss": 0.1728, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.7144780540346468, | |
| "grad_norm": 0.8087079524993896, | |
| "learning_rate": 8.115698820953012e-05, | |
| "loss": 0.1879, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.7180683960147204, | |
| "grad_norm": 0.5888068079948425, | |
| "learning_rate": 8.091130790910065e-05, | |
| "loss": 0.2017, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.721658737994794, | |
| "grad_norm": 0.868241012096405, | |
| "learning_rate": 8.066441360417283e-05, | |
| "loss": 0.2002, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.7252490799748676, | |
| "grad_norm": 0.9173946976661682, | |
| "learning_rate": 8.041631499122399e-05, | |
| "loss": 0.1822, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.7288394219549412, | |
| "grad_norm": 0.7348050475120544, | |
| "learning_rate": 8.016702181402925e-05, | |
| "loss": 0.1822, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.7324297639350148, | |
| "grad_norm": 0.5974103808403015, | |
| "learning_rate": 7.991654386327877e-05, | |
| "loss": 0.1894, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.7360201059150884, | |
| "grad_norm": 1.2631843090057373, | |
| "learning_rate": 7.966489097619327e-05, | |
| "loss": 0.2005, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.7396104478951621, | |
| "grad_norm": 0.9306305050849915, | |
| "learning_rate": 7.941207303613773e-05, | |
| "loss": 0.2077, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.7432007898752356, | |
| "grad_norm": 0.6469571590423584, | |
| "learning_rate": 7.915809997223312e-05, | |
| "loss": 0.1893, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.7467911318553092, | |
| "grad_norm": 0.6804335713386536, | |
| "learning_rate": 7.89029817589665e-05, | |
| "loss": 0.1985, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.7503814738353828, | |
| "grad_norm": 0.6059459447860718, | |
| "learning_rate": 7.864672841579944e-05, | |
| "loss": 0.1856, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.7539718158154565, | |
| "grad_norm": 0.6755326390266418, | |
| "learning_rate": 7.838935000677419e-05, | |
| "loss": 0.1816, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.75756215779553, | |
| "grad_norm": 0.5813919901847839, | |
| "learning_rate": 7.813085664011873e-05, | |
| "loss": 0.1796, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.7611524997756036, | |
| "grad_norm": 0.9791029691696167, | |
| "learning_rate": 7.78712584678496e-05, | |
| "loss": 0.204, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.7647428417556772, | |
| "grad_norm": 0.6557776927947998, | |
| "learning_rate": 7.76105656853733e-05, | |
| "loss": 0.1897, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.7683331837357509, | |
| "grad_norm": 0.5696374177932739, | |
| "learning_rate": 7.73487885310858e-05, | |
| "loss": 0.1882, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.7719235257158245, | |
| "grad_norm": 0.704799473285675, | |
| "learning_rate": 7.708593728597046e-05, | |
| "loss": 0.186, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.775513867695898, | |
| "grad_norm": 0.9005138874053955, | |
| "learning_rate": 7.682202227319433e-05, | |
| "loss": 0.1938, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.7791042096759716, | |
| "grad_norm": 0.7679111957550049, | |
| "learning_rate": 7.655705385770258e-05, | |
| "loss": 0.182, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.7826945516560453, | |
| "grad_norm": 0.7027627229690552, | |
| "learning_rate": 7.629104244581156e-05, | |
| "loss": 0.1859, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.7862848936361189, | |
| "grad_norm": 0.8638216853141785, | |
| "learning_rate": 7.602399848480002e-05, | |
| "loss": 0.1945, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.7898752356161924, | |
| "grad_norm": 0.6846340894699097, | |
| "learning_rate": 7.575593246249885e-05, | |
| "loss": 0.1899, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.793465577596266, | |
| "grad_norm": 0.7671458721160889, | |
| "learning_rate": 7.548685490687919e-05, | |
| "loss": 0.1835, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.7970559195763397, | |
| "grad_norm": 1.7174897193908691, | |
| "learning_rate": 7.521677638563889e-05, | |
| "loss": 0.1742, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.8006462615564133, | |
| "grad_norm": 1.024430751800537, | |
| "learning_rate": 7.494570750578757e-05, | |
| "loss": 0.1827, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.8042366035364868, | |
| "grad_norm": 0.8393763303756714, | |
| "learning_rate": 7.467365891322995e-05, | |
| "loss": 0.1726, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.8078269455165604, | |
| "grad_norm": 3.184171438217163, | |
| "learning_rate": 7.440064129234783e-05, | |
| "loss": 0.1855, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.8114172874966341, | |
| "grad_norm": 0.7078256011009216, | |
| "learning_rate": 7.412666536558041e-05, | |
| "loss": 0.1783, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.8150076294767077, | |
| "grad_norm": 0.7265491485595703, | |
| "learning_rate": 7.385174189300323e-05, | |
| "loss": 0.19, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.8185979714567813, | |
| "grad_norm": 0.8136366605758667, | |
| "learning_rate": 7.35758816719055e-05, | |
| "loss": 0.1685, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.8221883134368548, | |
| "grad_norm": 1.0148855447769165, | |
| "learning_rate": 7.329909553636618e-05, | |
| "loss": 0.1781, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.8257786554169284, | |
| "grad_norm": 0.9568372964859009, | |
| "learning_rate": 7.302139435682831e-05, | |
| "loss": 0.1702, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.8293689973970021, | |
| "grad_norm": 1.8222324848175049, | |
| "learning_rate": 7.274278903967229e-05, | |
| "loss": 0.1823, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.8329593393770757, | |
| "grad_norm": 0.6024855375289917, | |
| "learning_rate": 7.246329052678736e-05, | |
| "loss": 0.1741, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.8365496813571492, | |
| "grad_norm": 0.9722542762756348, | |
| "learning_rate": 7.218290979514202e-05, | |
| "loss": 0.1757, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.8401400233372228, | |
| "grad_norm": 2.1216533184051514, | |
| "learning_rate": 7.190165785635273e-05, | |
| "loss": 0.1748, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.8437303653172965, | |
| "grad_norm": 0.6482483148574829, | |
| "learning_rate": 7.161954575625172e-05, | |
| "loss": 0.1799, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.8473207072973701, | |
| "grad_norm": 2.2838494777679443, | |
| "learning_rate": 7.133658457445291e-05, | |
| "loss": 0.1616, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.8509110492774437, | |
| "grad_norm": 0.6801573634147644, | |
| "learning_rate": 7.105278542391695e-05, | |
| "loss": 0.1806, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.8545013912575172, | |
| "grad_norm": 0.8442283272743225, | |
| "learning_rate": 7.076815945051465e-05, | |
| "loss": 0.1821, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.8580917332375909, | |
| "grad_norm": 1.1653680801391602, | |
| "learning_rate": 7.048271783258936e-05, | |
| "loss": 0.1773, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.8616820752176645, | |
| "grad_norm": 0.6987717151641846, | |
| "learning_rate": 7.019647178051779e-05, | |
| "loss": 0.1693, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.8652724171977381, | |
| "grad_norm": 0.6374627351760864, | |
| "learning_rate": 6.990943253626994e-05, | |
| "loss": 0.194, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.8688627591778116, | |
| "grad_norm": 0.6507960557937622, | |
| "learning_rate": 6.962161137296743e-05, | |
| "loss": 0.1568, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.8724531011578853, | |
| "grad_norm": 0.6699422597885132, | |
| "learning_rate": 6.933301959444082e-05, | |
| "loss": 0.1759, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.8760434431379589, | |
| "grad_norm": 0.48265889286994934, | |
| "learning_rate": 6.904366853478567e-05, | |
| "loss": 0.1735, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.8796337851180325, | |
| "grad_norm": 0.8710943460464478, | |
| "learning_rate": 6.875356955791735e-05, | |
| "loss": 0.1807, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.883224127098106, | |
| "grad_norm": 0.7356705069541931, | |
| "learning_rate": 6.846273405712483e-05, | |
| "loss": 0.1751, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.8868144690781797, | |
| "grad_norm": 0.6466989517211914, | |
| "learning_rate": 6.817117345462316e-05, | |
| "loss": 0.1599, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.8904048110582533, | |
| "grad_norm": 0.5134007334709167, | |
| "learning_rate": 6.787889920110488e-05, | |
| "loss": 0.1666, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.8939951530383269, | |
| "grad_norm": 0.471064954996109, | |
| "learning_rate": 6.75859227752903e-05, | |
| "loss": 0.1624, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.8975854950184005, | |
| "grad_norm": 0.606399655342102, | |
| "learning_rate": 6.729225568347677e-05, | |
| "loss": 0.1696, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.9011758369984741, | |
| "grad_norm": 0.6752104759216309, | |
| "learning_rate": 6.699790945908662e-05, | |
| "loss": 0.1607, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.9047661789785477, | |
| "grad_norm": 0.8237718939781189, | |
| "learning_rate": 6.670289566221437e-05, | |
| "loss": 0.1601, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.9083565209586213, | |
| "grad_norm": 0.7542670965194702, | |
| "learning_rate": 6.640722587917263e-05, | |
| "loss": 0.1608, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.9119468629386949, | |
| "grad_norm": 0.609646737575531, | |
| "learning_rate": 6.611091172203708e-05, | |
| "loss": 0.1586, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.9155372049187686, | |
| "grad_norm": 0.7793768644332886, | |
| "learning_rate": 6.581396482819038e-05, | |
| "loss": 0.1601, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.9191275468988421, | |
| "grad_norm": 0.9071997404098511, | |
| "learning_rate": 6.551639685986524e-05, | |
| "loss": 0.166, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.9227178888789157, | |
| "grad_norm": 1.0000146627426147, | |
| "learning_rate": 6.521821950368625e-05, | |
| "loss": 0.1702, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.9263082308589893, | |
| "grad_norm": 0.8889328241348267, | |
| "learning_rate": 6.491944447021102e-05, | |
| "loss": 0.1669, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.929898572839063, | |
| "grad_norm": 0.6329061985015869, | |
| "learning_rate": 6.462008349347022e-05, | |
| "loss": 0.1641, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.9334889148191365, | |
| "grad_norm": 0.7821244597434998, | |
| "learning_rate": 6.43201483305067e-05, | |
| "loss": 0.1643, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.9370792567992101, | |
| "grad_norm": 1.3463133573532104, | |
| "learning_rate": 6.401965076091382e-05, | |
| "loss": 0.1603, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.9406695987792837, | |
| "grad_norm": 2.534256935119629, | |
| "learning_rate": 6.371860258637278e-05, | |
| "loss": 0.1577, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.9442599407593574, | |
| "grad_norm": 0.9502484202384949, | |
| "learning_rate": 6.341701563018913e-05, | |
| "loss": 0.1529, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.947850282739431, | |
| "grad_norm": 0.5928242206573486, | |
| "learning_rate": 6.311490173682839e-05, | |
| "loss": 0.1633, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.9514406247195045, | |
| "grad_norm": 1.3390663862228394, | |
| "learning_rate": 6.281227277145093e-05, | |
| "loss": 0.1609, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.9550309666995781, | |
| "grad_norm": 0.8307391405105591, | |
| "learning_rate": 6.250914061944597e-05, | |
| "loss": 0.1654, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.9586213086796518, | |
| "grad_norm": 0.6453768610954285, | |
| "learning_rate": 6.220551718596477e-05, | |
| "loss": 0.1504, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.9622116506597254, | |
| "grad_norm": 0.9472678899765015, | |
| "learning_rate": 6.190141439545304e-05, | |
| "loss": 0.1441, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.9658019926397989, | |
| "grad_norm": 1.077405571937561, | |
| "learning_rate": 6.159684419118274e-05, | |
| "loss": 0.1574, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.9693923346198725, | |
| "grad_norm": 1.373565673828125, | |
| "learning_rate": 6.129181853478285e-05, | |
| "loss": 0.1557, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.9729826765999462, | |
| "grad_norm": 0.7159507274627686, | |
| "learning_rate": 6.0986349405769795e-05, | |
| "loss": 0.148, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.9765730185800198, | |
| "grad_norm": 0.7065421342849731, | |
| "learning_rate": 6.068044880107675e-05, | |
| "loss": 0.1481, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.9801633605600933, | |
| "grad_norm": 1.0575318336486816, | |
| "learning_rate": 6.0374128734582634e-05, | |
| "loss": 0.1546, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.9837537025401669, | |
| "grad_norm": 1.3331146240234375, | |
| "learning_rate": 6.006740123664022e-05, | |
| "loss": 0.1685, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.9873440445202406, | |
| "grad_norm": 0.712989091873169, | |
| "learning_rate": 5.976027835360366e-05, | |
| "loss": 0.1443, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.9909343865003142, | |
| "grad_norm": 0.9985840320587158, | |
| "learning_rate": 5.945277214735537e-05, | |
| "loss": 0.1381, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.9945247284803878, | |
| "grad_norm": 0.6109340786933899, | |
| "learning_rate": 5.914489469483234e-05, | |
| "loss": 0.1506, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.9981150704604613, | |
| "grad_norm": 0.5232493281364441, | |
| "learning_rate": 5.883665808755179e-05, | |
| "loss": 0.1527, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.001705412440535, | |
| "grad_norm": 1.120089054107666, | |
| "learning_rate": 5.852807443113635e-05, | |
| "loss": 0.1397, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.0052957544206085, | |
| "grad_norm": 0.9276136755943298, | |
| "learning_rate": 5.821915584483853e-05, | |
| "loss": 0.1155, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.008886096400682, | |
| "grad_norm": 0.6816973686218262, | |
| "learning_rate": 5.790991446106487e-05, | |
| "loss": 0.1111, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.0124764383807558, | |
| "grad_norm": 0.8138614296913147, | |
| "learning_rate": 5.7600362424899354e-05, | |
| "loss": 0.1107, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.0160667803608294, | |
| "grad_norm": 0.5443429350852966, | |
| "learning_rate": 5.729051189362649e-05, | |
| "loss": 0.1122, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.019657122340903, | |
| "grad_norm": 0.6204805970191956, | |
| "learning_rate": 5.698037503625379e-05, | |
| "loss": 0.1147, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.0232474643209766, | |
| "grad_norm": 0.5502025485038757, | |
| "learning_rate": 5.6669964033033905e-05, | |
| "loss": 0.1135, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.0268378063010501, | |
| "grad_norm": 0.6541283130645752, | |
| "learning_rate": 5.6359291074986244e-05, | |
| "loss": 0.1225, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.0304281482811237, | |
| "grad_norm": 0.6311090588569641, | |
| "learning_rate": 5.604836836341816e-05, | |
| "loss": 0.1063, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.0340184902611973, | |
| "grad_norm": 0.9657145738601685, | |
| "learning_rate": 5.573720810944575e-05, | |
| "loss": 0.1171, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.037608832241271, | |
| "grad_norm": 0.53743577003479, | |
| "learning_rate": 5.542582253351438e-05, | |
| "loss": 0.1128, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.0411991742213447, | |
| "grad_norm": 0.7501124739646912, | |
| "learning_rate": 5.511422386491858e-05, | |
| "loss": 0.1117, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.0447895162014182, | |
| "grad_norm": 0.7120064496994019, | |
| "learning_rate": 5.480242434132191e-05, | |
| "loss": 0.1049, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.0483798581814918, | |
| "grad_norm": 0.5755088329315186, | |
| "learning_rate": 5.4490436208276194e-05, | |
| "loss": 0.1047, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.0519702001615654, | |
| "grad_norm": 0.8773960471153259, | |
| "learning_rate": 5.4178271718740744e-05, | |
| "loss": 0.1119, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.055560542141639, | |
| "grad_norm": 0.5922686457633972, | |
| "learning_rate": 5.3865943132601e-05, | |
| "loss": 0.1092, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.0591508841217125, | |
| "grad_norm": 0.7486307621002197, | |
| "learning_rate": 5.355346271618715e-05, | |
| "loss": 0.1068, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.0627412261017861, | |
| "grad_norm": 0.8534032702445984, | |
| "learning_rate": 5.324084274179228e-05, | |
| "loss": 0.1072, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.0663315680818597, | |
| "grad_norm": 0.7270232439041138, | |
| "learning_rate": 5.292809548719049e-05, | |
| "loss": 0.1101, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.0699219100619335, | |
| "grad_norm": 0.5195777416229248, | |
| "learning_rate": 5.2615233235154616e-05, | |
| "loss": 0.1084, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.073512252042007, | |
| "grad_norm": 0.5684207081794739, | |
| "learning_rate": 5.230226827297395e-05, | |
| "loss": 0.1026, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.0771025940220806, | |
| "grad_norm": 1.3543568849563599, | |
| "learning_rate": 5.198921289197153e-05, | |
| "loss": 0.1026, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.0806929360021542, | |
| "grad_norm": 0.7514908313751221, | |
| "learning_rate": 5.167607938702154e-05, | |
| "loss": 0.1085, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.0842832779822278, | |
| "grad_norm": 0.6683730483055115, | |
| "learning_rate": 5.136288005606631e-05, | |
| "loss": 0.1012, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.0878736199623014, | |
| "grad_norm": 0.5652278065681458, | |
| "learning_rate": 5.1049627199633496e-05, | |
| "loss": 0.119, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.091463961942375, | |
| "grad_norm": 0.7017742395401001, | |
| "learning_rate": 5.073633312035287e-05, | |
| "loss": 0.1057, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.0950543039224485, | |
| "grad_norm": 0.5066478848457336, | |
| "learning_rate": 5.042301012247317e-05, | |
| "loss": 0.1127, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.0986446459025223, | |
| "grad_norm": 0.535321056842804, | |
| "learning_rate": 5.010967051137887e-05, | |
| "loss": 0.1102, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.1022349878825959, | |
| "grad_norm": 0.6270662546157837, | |
| "learning_rate": 4.979632659310695e-05, | |
| "loss": 0.1008, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.1058253298626695, | |
| "grad_norm": 0.748859703540802, | |
| "learning_rate": 4.9482990673863485e-05, | |
| "loss": 0.0995, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.109415671842743, | |
| "grad_norm": 0.500746488571167, | |
| "learning_rate": 4.916967505954046e-05, | |
| "loss": 0.1056, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.1130060138228166, | |
| "grad_norm": 0.5748748183250427, | |
| "learning_rate": 4.885639205523239e-05, | |
| "loss": 0.106, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.1165963558028902, | |
| "grad_norm": 0.593147337436676, | |
| "learning_rate": 4.854315396475304e-05, | |
| "loss": 0.1086, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.1201866977829638, | |
| "grad_norm": 0.6119722127914429, | |
| "learning_rate": 4.822997309015226e-05, | |
| "loss": 0.1035, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.1237770397630373, | |
| "grad_norm": 0.5296047925949097, | |
| "learning_rate": 4.7916861731232846e-05, | |
| "loss": 0.1083, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.127367381743111, | |
| "grad_norm": 0.7060047388076782, | |
| "learning_rate": 4.7603832185067416e-05, | |
| "loss": 0.1, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.1309577237231847, | |
| "grad_norm": 0.4993881583213806, | |
| "learning_rate": 4.729089674551547e-05, | |
| "loss": 0.1057, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.1345480657032583, | |
| "grad_norm": 0.7866911888122559, | |
| "learning_rate": 4.697806770274062e-05, | |
| "loss": 0.0997, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.1381384076833319, | |
| "grad_norm": 0.642524242401123, | |
| "learning_rate": 4.6665357342727865e-05, | |
| "loss": 0.1051, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.1417287496634054, | |
| "grad_norm": 0.5228136777877808, | |
| "learning_rate": 4.6352777946801094e-05, | |
| "loss": 0.1002, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.145319091643479, | |
| "grad_norm": 0.9493293762207031, | |
| "learning_rate": 4.604034179114067e-05, | |
| "loss": 0.1019, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.1489094336235526, | |
| "grad_norm": 0.5647363662719727, | |
| "learning_rate": 4.5728061146301476e-05, | |
| "loss": 0.0915, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.1524997756036262, | |
| "grad_norm": 0.6017284989356995, | |
| "learning_rate": 4.5415948276730805e-05, | |
| "loss": 0.1098, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.1560901175837, | |
| "grad_norm": 0.46670928597450256, | |
| "learning_rate": 4.5104015440286826e-05, | |
| "loss": 0.1056, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.1596804595637735, | |
| "grad_norm": 0.6661453247070312, | |
| "learning_rate": 4.479227488775707e-05, | |
| "loss": 0.0964, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.163270801543847, | |
| "grad_norm": 0.642352819442749, | |
| "learning_rate": 4.4480738862377444e-05, | |
| "loss": 0.0907, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.1668611435239207, | |
| "grad_norm": 2.4927215576171875, | |
| "learning_rate": 4.4169419599351186e-05, | |
| "loss": 0.0969, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.1704514855039942, | |
| "grad_norm": 0.5965277552604675, | |
| "learning_rate": 4.3858329325368536e-05, | |
| "loss": 0.0921, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.1740418274840678, | |
| "grad_norm": 0.503105103969574, | |
| "learning_rate": 4.354748025812639e-05, | |
| "loss": 0.0918, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.1776321694641414, | |
| "grad_norm": 2.0070412158966064, | |
| "learning_rate": 4.323688460584864e-05, | |
| "loss": 0.1008, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.181222511444215, | |
| "grad_norm": 0.5921032428741455, | |
| "learning_rate": 4.292655456680651e-05, | |
| "loss": 0.0992, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.1848128534242885, | |
| "grad_norm": 0.7106916308403015, | |
| "learning_rate": 4.261650232883965e-05, | |
| "loss": 0.0998, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.1884031954043623, | |
| "grad_norm": 0.7483718395233154, | |
| "learning_rate": 4.230674006887734e-05, | |
| "loss": 0.1007, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.191993537384436, | |
| "grad_norm": 0.5854814648628235, | |
| "learning_rate": 4.199727995246041e-05, | |
| "loss": 0.1001, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.1955838793645095, | |
| "grad_norm": 1.022163987159729, | |
| "learning_rate": 4.1688134133263285e-05, | |
| "loss": 0.0989, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.199174221344583, | |
| "grad_norm": 0.6698512434959412, | |
| "learning_rate": 4.1379314752616784e-05, | |
| "loss": 0.0929, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.2027645633246566, | |
| "grad_norm": 0.8445412516593933, | |
| "learning_rate": 4.107083393903126e-05, | |
| "loss": 0.0865, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.2063549053047302, | |
| "grad_norm": 0.9410879611968994, | |
| "learning_rate": 4.076270380772021e-05, | |
| "loss": 0.0942, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.2099452472848038, | |
| "grad_norm": 0.4104284346103668, | |
| "learning_rate": 4.04549364601245e-05, | |
| "loss": 0.0957, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.2135355892648776, | |
| "grad_norm": 0.8418083786964417, | |
| "learning_rate": 4.014754398343716e-05, | |
| "loss": 0.0925, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.2171259312449512, | |
| "grad_norm": 0.5773093700408936, | |
| "learning_rate": 3.984053845012858e-05, | |
| "loss": 0.0921, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.2207162732250247, | |
| "grad_norm": 1.2288339138031006, | |
| "learning_rate": 3.953393191747239e-05, | |
| "loss": 0.089, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.2243066152050983, | |
| "grad_norm": 0.5901492238044739, | |
| "learning_rate": 3.9227736427071995e-05, | |
| "loss": 0.0903, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.2278969571851719, | |
| "grad_norm": 0.6220996379852295, | |
| "learning_rate": 3.892196400438755e-05, | |
| "loss": 0.0958, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.2314872991652455, | |
| "grad_norm": 0.6737645864486694, | |
| "learning_rate": 3.8616626658263825e-05, | |
| "loss": 0.0892, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.235077641145319, | |
| "grad_norm": 0.5661391019821167, | |
| "learning_rate": 3.831173638045839e-05, | |
| "loss": 0.0888, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.2386679831253926, | |
| "grad_norm": 0.7712500095367432, | |
| "learning_rate": 3.800730514517077e-05, | |
| "loss": 0.0859, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.2422583251054662, | |
| "grad_norm": 0.7590687274932861, | |
| "learning_rate": 3.770334490857217e-05, | |
| "loss": 0.0868, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.24584866708554, | |
| "grad_norm": 0.5650063753128052, | |
| "learning_rate": 3.7399867608335895e-05, | |
| "loss": 0.0974, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.2494390090656136, | |
| "grad_norm": 0.8975266218185425, | |
| "learning_rate": 3.709688516316844e-05, | |
| "loss": 0.095, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.2530293510456871, | |
| "grad_norm": 0.5311192274093628, | |
| "learning_rate": 3.679440947234152e-05, | |
| "loss": 0.0925, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.2566196930257607, | |
| "grad_norm": 1.0144147872924805, | |
| "learning_rate": 3.649245241522468e-05, | |
| "loss": 0.0903, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.2602100350058343, | |
| "grad_norm": 0.6833083629608154, | |
| "learning_rate": 3.619102585081872e-05, | |
| "loss": 0.0929, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.2638003769859079, | |
| "grad_norm": 0.6380596160888672, | |
| "learning_rate": 3.589014161728999e-05, | |
| "loss": 0.0787, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.2673907189659814, | |
| "grad_norm": 0.7181170582771301, | |
| "learning_rate": 3.558981153150542e-05, | |
| "loss": 0.0859, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.2709810609460552, | |
| "grad_norm": 0.6842727661132812, | |
| "learning_rate": 3.529004738856853e-05, | |
| "loss": 0.0823, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.2745714029261288, | |
| "grad_norm": 1.5806798934936523, | |
| "learning_rate": 3.4990860961356044e-05, | |
| "loss": 0.085, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.2781617449062024, | |
| "grad_norm": 0.6149685978889465, | |
| "learning_rate": 3.4692264000055594e-05, | |
| "loss": 0.0818, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.281752086886276, | |
| "grad_norm": 0.797741174697876, | |
| "learning_rate": 3.4394268231704266e-05, | |
| "loss": 0.0787, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.2853424288663495, | |
| "grad_norm": 0.5583544373512268, | |
| "learning_rate": 3.4096885359728036e-05, | |
| "loss": 0.0879, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.288932770846423, | |
| "grad_norm": 1.2549068927764893, | |
| "learning_rate": 3.380012706348209e-05, | |
| "loss": 0.085, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.2925231128264967, | |
| "grad_norm": 0.56533282995224, | |
| "learning_rate": 3.350400499779214e-05, | |
| "loss": 0.0932, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.2961134548065703, | |
| "grad_norm": 0.9718196392059326, | |
| "learning_rate": 3.32085307924967e-05, | |
| "loss": 0.0901, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.2997037967866438, | |
| "grad_norm": 0.6769024133682251, | |
| "learning_rate": 3.2913716051990394e-05, | |
| "loss": 0.0845, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.3032941387667174, | |
| "grad_norm": 1.1620076894760132, | |
| "learning_rate": 3.261957235476813e-05, | |
| "loss": 0.0831, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.3068844807467912, | |
| "grad_norm": 0.5092564225196838, | |
| "learning_rate": 3.232611125297035e-05, | |
| "loss": 0.0804, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.3104748227268648, | |
| "grad_norm": 0.42432501912117004, | |
| "learning_rate": 3.2033344271929476e-05, | |
| "loss": 0.0866, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.3140651647069383, | |
| "grad_norm": 0.5998629331588745, | |
| "learning_rate": 3.17412829097171e-05, | |
| "loss": 0.0865, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.317655506687012, | |
| "grad_norm": 0.5421279072761536, | |
| "learning_rate": 3.144993863669251e-05, | |
| "loss": 0.0849, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.3212458486670855, | |
| "grad_norm": 0.6406755447387695, | |
| "learning_rate": 3.115932289505213e-05, | |
| "loss": 0.0814, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.324836190647159, | |
| "grad_norm": 0.9076423048973083, | |
| "learning_rate": 3.086944709838028e-05, | |
| "loss": 0.0898, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.3284265326272329, | |
| "grad_norm": 0.7807140350341797, | |
| "learning_rate": 3.0580322631200756e-05, | |
| "loss": 0.0828, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.3320168746073064, | |
| "grad_norm": 0.6127801537513733, | |
| "learning_rate": 3.029196084852981e-05, | |
| "loss": 0.08, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.33560721658738, | |
| "grad_norm": 0.6226149797439575, | |
| "learning_rate": 3.000437307543017e-05, | |
| "loss": 0.0774, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.3391975585674536, | |
| "grad_norm": 0.4141993820667267, | |
| "learning_rate": 2.9717570606566287e-05, | |
| "loss": 0.0817, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.3427879005475272, | |
| "grad_norm": 0.6416285634040833, | |
| "learning_rate": 2.943156470576073e-05, | |
| "loss": 0.0792, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.3463782425276007, | |
| "grad_norm": 0.6912229657173157, | |
| "learning_rate": 2.914636660555178e-05, | |
| "loss": 0.0743, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.3499685845076743, | |
| "grad_norm": 0.8113506436347961, | |
| "learning_rate": 2.886198750675233e-05, | |
| "loss": 0.0843, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.353558926487748, | |
| "grad_norm": 0.6693570613861084, | |
| "learning_rate": 2.8578438578010053e-05, | |
| "loss": 0.0718, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.3571492684678215, | |
| "grad_norm": 0.6286030411720276, | |
| "learning_rate": 2.8295730955368573e-05, | |
| "loss": 0.0821, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.360739610447895, | |
| "grad_norm": 0.5432600975036621, | |
| "learning_rate": 2.8013875741830264e-05, | |
| "loss": 0.0779, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.3643299524279688, | |
| "grad_norm": 0.5628815293312073, | |
| "learning_rate": 2.7732884006920225e-05, | |
| "loss": 0.076, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.3679202944080424, | |
| "grad_norm": 0.761500895023346, | |
| "learning_rate": 2.745276678625141e-05, | |
| "loss": 0.0869, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.371510636388116, | |
| "grad_norm": 0.5888515710830688, | |
| "learning_rate": 2.717353508109125e-05, | |
| "loss": 0.0812, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.3751009783681896, | |
| "grad_norm": 0.5477086305618286, | |
| "learning_rate": 2.6895199857929643e-05, | |
| "loss": 0.0772, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.3786913203482631, | |
| "grad_norm": 0.5078212022781372, | |
| "learning_rate": 2.6617772048048284e-05, | |
| "loss": 0.0707, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.3822816623283367, | |
| "grad_norm": 0.5893701910972595, | |
| "learning_rate": 2.634126254709125e-05, | |
| "loss": 0.081, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.3858720043084105, | |
| "grad_norm": 0.9726279973983765, | |
| "learning_rate": 2.6065682214637123e-05, | |
| "loss": 0.0868, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.389462346288484, | |
| "grad_norm": 0.5375906229019165, | |
| "learning_rate": 2.5791041873772513e-05, | |
| "loss": 0.0754, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.3930526882685577, | |
| "grad_norm": 0.5937024354934692, | |
| "learning_rate": 2.5517352310667053e-05, | |
| "loss": 0.07, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.3966430302486312, | |
| "grad_norm": 0.5695418119430542, | |
| "learning_rate": 2.524462427414967e-05, | |
| "loss": 0.0712, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.4002333722287048, | |
| "grad_norm": 0.6219804883003235, | |
| "learning_rate": 2.497286847528646e-05, | |
| "loss": 0.0771, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.4038237142087784, | |
| "grad_norm": 0.7533654570579529, | |
| "learning_rate": 2.4702095586960085e-05, | |
| "loss": 0.073, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.407414056188852, | |
| "grad_norm": 0.5750814080238342, | |
| "learning_rate": 2.443231624345061e-05, | |
| "loss": 0.0753, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.4110043981689255, | |
| "grad_norm": 0.5853593349456787, | |
| "learning_rate": 2.416354104001779e-05, | |
| "loss": 0.0754, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.414594740148999, | |
| "grad_norm": 0.4552966356277466, | |
| "learning_rate": 2.389578053248493e-05, | |
| "loss": 0.0753, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.4181850821290727, | |
| "grad_norm": 0.718437671661377, | |
| "learning_rate": 2.362904523682447e-05, | |
| "loss": 0.0758, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.4217754241091463, | |
| "grad_norm": 0.7326009273529053, | |
| "learning_rate": 2.3363345628744832e-05, | |
| "loss": 0.0756, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.42536576608922, | |
| "grad_norm": 0.9607858657836914, | |
| "learning_rate": 2.3098692143279066e-05, | |
| "loss": 0.0719, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.4289561080692936, | |
| "grad_norm": 0.7754957675933838, | |
| "learning_rate": 2.283509517437496e-05, | |
| "loss": 0.0717, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.4325464500493672, | |
| "grad_norm": 0.8900684714317322, | |
| "learning_rate": 2.2572565074486972e-05, | |
| "loss": 0.0757, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.4361367920294408, | |
| "grad_norm": 0.6538607478141785, | |
| "learning_rate": 2.2311112154169507e-05, | |
| "loss": 0.0709, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.4397271340095144, | |
| "grad_norm": 0.6442373991012573, | |
| "learning_rate": 2.2050746681672056e-05, | |
| "loss": 0.0736, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.443317475989588, | |
| "grad_norm": 0.9824745655059814, | |
| "learning_rate": 2.179147888253584e-05, | |
| "loss": 0.0741, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.4469078179696617, | |
| "grad_norm": 0.6084447503089905, | |
| "learning_rate": 2.1533318939192394e-05, | |
| "loss": 0.0675, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.4504981599497353, | |
| "grad_norm": 0.6071482300758362, | |
| "learning_rate": 2.127627699056345e-05, | |
| "loss": 0.0721, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.4540885019298089, | |
| "grad_norm": 0.5101909637451172, | |
| "learning_rate": 2.102036313166289e-05, | |
| "loss": 0.0691, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.4576788439098824, | |
| "grad_norm": 0.5907676815986633, | |
| "learning_rate": 2.076558741320016e-05, | |
| "loss": 0.0624, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.461269185889956, | |
| "grad_norm": 0.7201829552650452, | |
| "learning_rate": 2.0511959841185713e-05, | |
| "loss": 0.0749, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.4648595278700296, | |
| "grad_norm": 0.5254886150360107, | |
| "learning_rate": 2.0259490376537865e-05, | |
| "loss": 0.078, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.4684498698501032, | |
| "grad_norm": 0.4855566620826721, | |
| "learning_rate": 2.0008188934691614e-05, | |
| "loss": 0.0727, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.4720402118301767, | |
| "grad_norm": 0.68084716796875, | |
| "learning_rate": 1.975806538520937e-05, | |
| "loss": 0.0679, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.4756305538102503, | |
| "grad_norm": 0.5893229842185974, | |
| "learning_rate": 1.9509129551393145e-05, | |
| "loss": 0.0709, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.479220895790324, | |
| "grad_norm": 0.5513525605201721, | |
| "learning_rate": 1.9261391209898912e-05, | |
| "loss": 0.0664, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.4828112377703977, | |
| "grad_norm": 0.45056793093681335, | |
| "learning_rate": 1.9014860090352476e-05, | |
| "loss": 0.0635, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.4864015797504713, | |
| "grad_norm": 0.6190094947814941, | |
| "learning_rate": 1.8769545874967566e-05, | |
| "loss": 0.0693, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.4899919217305448, | |
| "grad_norm": 0.6586858034133911, | |
| "learning_rate": 1.852545819816539e-05, | |
| "loss": 0.0652, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.4935822637106184, | |
| "grad_norm": 0.9752713441848755, | |
| "learning_rate": 1.8282606646196353e-05, | |
| "loss": 0.0744, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.497172605690692, | |
| "grad_norm": 0.6681696176528931, | |
| "learning_rate": 1.8041000756763493e-05, | |
| "loss": 0.0671, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.5007629476707658, | |
| "grad_norm": 0.5906854867935181, | |
| "learning_rate": 1.7800650018648024e-05, | |
| "loss": 0.0736, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.5043532896508394, | |
| "grad_norm": 0.6534956097602844, | |
| "learning_rate": 1.7561563871336545e-05, | |
| "loss": 0.0674, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.507943631630913, | |
| "grad_norm": 0.5932891964912415, | |
| "learning_rate": 1.732375170465041e-05, | |
| "loss": 0.0672, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.5115339736109865, | |
| "grad_norm": 0.504921019077301, | |
| "learning_rate": 1.7087222858376834e-05, | |
| "loss": 0.07, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.51512431559106, | |
| "grad_norm": 0.6252205967903137, | |
| "learning_rate": 1.6851986621902265e-05, | |
| "loss": 0.0637, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.5187146575711337, | |
| "grad_norm": 0.47223180532455444, | |
| "learning_rate": 1.6618052233847404e-05, | |
| "loss": 0.0697, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.5223049995512072, | |
| "grad_norm": 0.4429969787597656, | |
| "learning_rate": 1.6385428881704405e-05, | |
| "loss": 0.0664, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.5258953415312808, | |
| "grad_norm": 0.44724294543266296, | |
| "learning_rate": 1.6154125701476092e-05, | |
| "loss": 0.0642, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.5294856835113544, | |
| "grad_norm": 0.49648982286453247, | |
| "learning_rate": 1.59241517773171e-05, | |
| "loss": 0.0616, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.533076025491428, | |
| "grad_norm": 0.3683583736419678, | |
| "learning_rate": 1.5695516141177142e-05, | |
| "loss": 0.0631, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.5366663674715015, | |
| "grad_norm": 0.7180688977241516, | |
| "learning_rate": 1.546822777244627e-05, | |
| "loss": 0.0658, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.5402567094515751, | |
| "grad_norm": 0.6510112881660461, | |
| "learning_rate": 1.5242295597602225e-05, | |
| "loss": 0.0624, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.543847051431649, | |
| "grad_norm": 0.6626403331756592, | |
| "learning_rate": 1.5017728489859862e-05, | |
| "loss": 0.0596, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.5474373934117225, | |
| "grad_norm": 0.7510163187980652, | |
| "learning_rate": 1.4794535268822673e-05, | |
| "loss": 0.0666, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.551027735391796, | |
| "grad_norm": 0.48777294158935547, | |
| "learning_rate": 1.4572724700136386e-05, | |
| "loss": 0.0623, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.5546180773718696, | |
| "grad_norm": 0.6740663647651672, | |
| "learning_rate": 1.4352305495144736e-05, | |
| "loss": 0.0699, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.5582084193519434, | |
| "grad_norm": 0.513523519039154, | |
| "learning_rate": 1.4133286310547294e-05, | |
| "loss": 0.0686, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.561798761332017, | |
| "grad_norm": 0.689508318901062, | |
| "learning_rate": 1.3915675748059537e-05, | |
| "loss": 0.0643, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.5653891033120906, | |
| "grad_norm": 0.7558987736701965, | |
| "learning_rate": 1.3699482354074989e-05, | |
| "loss": 0.0638, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.5689794452921642, | |
| "grad_norm": 1.4819414615631104, | |
| "learning_rate": 1.3484714619329574e-05, | |
| "loss": 0.0579, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.5725697872722377, | |
| "grad_norm": 0.45672255754470825, | |
| "learning_rate": 1.3271380978568187e-05, | |
| "loss": 0.0597, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.5761601292523113, | |
| "grad_norm": 0.7070518136024475, | |
| "learning_rate": 1.3059489810213371e-05, | |
| "loss": 0.0653, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.5797504712323849, | |
| "grad_norm": 0.4744075536727905, | |
| "learning_rate": 1.2849049436036326e-05, | |
| "loss": 0.0609, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.5833408132124585, | |
| "grad_norm": 0.5028963088989258, | |
| "learning_rate": 1.2640068120830035e-05, | |
| "loss": 0.0614, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.586931155192532, | |
| "grad_norm": 1.222612977027893, | |
| "learning_rate": 1.24325540720847e-05, | |
| "loss": 0.058, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.5905214971726056, | |
| "grad_norm": 0.4024209976196289, | |
| "learning_rate": 1.2226515439665392e-05, | |
| "loss": 0.0599, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.5941118391526792, | |
| "grad_norm": 0.5114520788192749, | |
| "learning_rate": 1.2021960315491975e-05, | |
| "loss": 0.0525, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.5977021811327528, | |
| "grad_norm": 0.6782193779945374, | |
| "learning_rate": 1.1818896733221318e-05, | |
| "loss": 0.0605, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.6012925231128265, | |
| "grad_norm": 0.4370103180408478, | |
| "learning_rate": 1.1617332667931763e-05, | |
| "loss": 0.0569, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.6048828650929001, | |
| "grad_norm": 0.5159808993339539, | |
| "learning_rate": 1.1417276035809926e-05, | |
| "loss": 0.0583, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.6084732070729737, | |
| "grad_norm": 0.45791277289390564, | |
| "learning_rate": 1.1218734693839794e-05, | |
| "loss": 0.0639, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.6120635490530473, | |
| "grad_norm": 0.6834966540336609, | |
| "learning_rate": 1.1021716439494156e-05, | |
| "loss": 0.0626, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.615653891033121, | |
| "grad_norm": 0.4611278176307678, | |
| "learning_rate": 1.0826229010428369e-05, | |
| "loss": 0.056, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.6192442330131946, | |
| "grad_norm": 0.6188788414001465, | |
| "learning_rate": 1.0632280084176444e-05, | |
| "loss": 0.0578, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.6228345749932682, | |
| "grad_norm": 0.5647935271263123, | |
| "learning_rate": 1.0439877277849575e-05, | |
| "loss": 0.0586, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.6264249169733418, | |
| "grad_norm": 0.6752751469612122, | |
| "learning_rate": 1.024902814783692e-05, | |
| "loss": 0.0555, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.6300152589534154, | |
| "grad_norm": 0.49796855449676514, | |
| "learning_rate": 1.0059740189508881e-05, | |
| "loss": 0.0556, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.633605600933489, | |
| "grad_norm": 0.6069309115409851, | |
| "learning_rate": 9.872020836922724e-06, | |
| "loss": 0.0564, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.6371959429135625, | |
| "grad_norm": 0.6443465948104858, | |
| "learning_rate": 9.68587746253059e-06, | |
| "loss": 0.0559, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.640786284893636, | |
| "grad_norm": 0.48786768317222595, | |
| "learning_rate": 9.501317376889985e-06, | |
| "loss": 0.0551, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.6443766268737097, | |
| "grad_norm": 0.6036781072616577, | |
| "learning_rate": 9.318347828376639e-06, | |
| "loss": 0.06, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.6479669688537832, | |
| "grad_norm": 0.7226144075393677, | |
| "learning_rate": 9.136976002899855e-06, | |
| "loss": 0.0616, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.6515573108338568, | |
| "grad_norm": 0.4328902065753937, | |
| "learning_rate": 8.957209023620277e-06, | |
| "loss": 0.0504, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.6551476528139304, | |
| "grad_norm": 0.506410539150238, | |
| "learning_rate": 8.779053950670146e-06, | |
| "loss": 0.059, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.658737994794004, | |
| "grad_norm": 0.6660659909248352, | |
| "learning_rate": 8.602517780876007e-06, | |
| "loss": 0.0528, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.6623283367740778, | |
| "grad_norm": 0.5838719606399536, | |
| "learning_rate": 8.427607447483943e-06, | |
| "loss": 0.0561, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.6659186787541513, | |
| "grad_norm": 0.7501543760299683, | |
| "learning_rate": 8.254329819887252e-06, | |
| "loss": 0.0527, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.669509020734225, | |
| "grad_norm": 0.4832637906074524, | |
| "learning_rate": 8.082691703356688e-06, | |
| "loss": 0.0512, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.6730993627142985, | |
| "grad_norm": 0.5931252241134644, | |
| "learning_rate": 7.912699838773151e-06, | |
| "loss": 0.0513, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.6766897046943723, | |
| "grad_norm": 0.5244051218032837, | |
| "learning_rate": 7.744360902363002e-06, | |
| "loss": 0.0544, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.6802800466744459, | |
| "grad_norm": 0.6513102054595947, | |
| "learning_rate": 7.577681505435813e-06, | |
| "loss": 0.054, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.6838703886545194, | |
| "grad_norm": 0.8317810297012329, | |
| "learning_rate": 7.412668194124728e-06, | |
| "loss": 0.0507, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.687460730634593, | |
| "grad_norm": 0.4875124394893646, | |
| "learning_rate": 7.2493274491294285e-06, | |
| "loss": 0.0488, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.6910510726146666, | |
| "grad_norm": 0.4913179576396942, | |
| "learning_rate": 7.087665685461497e-06, | |
| "loss": 0.0551, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.6946414145947402, | |
| "grad_norm": 0.47164708375930786, | |
| "learning_rate": 6.9276892521925816e-06, | |
| "loss": 0.0548, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.6982317565748137, | |
| "grad_norm": 0.39257460832595825, | |
| "learning_rate": 6.769404432204973e-06, | |
| "loss": 0.0532, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.7018220985548873, | |
| "grad_norm": 0.548692524433136, | |
| "learning_rate": 6.61281744194494e-06, | |
| "loss": 0.0503, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.7054124405349609, | |
| "grad_norm": 0.476531445980072, | |
| "learning_rate": 6.4579344311784475e-06, | |
| "loss": 0.0514, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.7090027825150345, | |
| "grad_norm": 0.47037366032600403, | |
| "learning_rate": 6.304761482749777e-06, | |
| "loss": 0.0497, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.712593124495108, | |
| "grad_norm": 0.7144917845726013, | |
| "learning_rate": 6.153304612342514e-06, | |
| "loss": 0.0529, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.7161834664751816, | |
| "grad_norm": 0.7041458487510681, | |
| "learning_rate": 6.003569768243411e-06, | |
| "loss": 0.0493, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.7197738084552554, | |
| "grad_norm": 0.5702252984046936, | |
| "learning_rate": 5.855562831108624e-06, | |
| "loss": 0.0491, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.723364150435329, | |
| "grad_norm": 0.697307288646698, | |
| "learning_rate": 5.709289613732888e-06, | |
| "loss": 0.0533, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.7269544924154026, | |
| "grad_norm": 0.6015498638153076, | |
| "learning_rate": 5.564755860821147e-06, | |
| "loss": 0.0521, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.7305448343954761, | |
| "grad_norm": 0.6062167882919312, | |
| "learning_rate": 5.421967248763021e-06, | |
| "loss": 0.0547, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.73413517637555, | |
| "grad_norm": 0.45276394486427307, | |
| "learning_rate": 5.2809293854097495e-06, | |
| "loss": 0.0553, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.7377255183556235, | |
| "grad_norm": 0.4024350047111511, | |
| "learning_rate": 5.14164780985405e-06, | |
| "loss": 0.0512, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.741315860335697, | |
| "grad_norm": 0.6370827555656433, | |
| "learning_rate": 5.0041279922125705e-06, | |
| "loss": 0.0562, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.7449062023157706, | |
| "grad_norm": 0.5606709122657776, | |
| "learning_rate": 4.868375333411002e-06, | |
| "loss": 0.0556, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.7484965442958442, | |
| "grad_norm": 0.8585699796676636, | |
| "learning_rate": 4.734395164971978e-06, | |
| "loss": 0.0459, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.7520868862759178, | |
| "grad_norm": 0.4308234453201294, | |
| "learning_rate": 4.6021927488057334e-06, | |
| "loss": 0.0471, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.7556772282559914, | |
| "grad_norm": 0.4660848081111908, | |
| "learning_rate": 4.471773277003427e-06, | |
| "loss": 0.0524, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.759267570236065, | |
| "grad_norm": 0.6825345158576965, | |
| "learning_rate": 4.343141871633188e-06, | |
| "loss": 0.0521, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.7628579122161385, | |
| "grad_norm": 0.6137758493423462, | |
| "learning_rate": 4.216303584538988e-06, | |
| "loss": 0.0539, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.766448254196212, | |
| "grad_norm": 0.7231915593147278, | |
| "learning_rate": 4.0912633971422425e-06, | |
| "loss": 0.0466, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.7700385961762857, | |
| "grad_norm": 0.6705979108810425, | |
| "learning_rate": 3.968026220246174e-06, | |
| "loss": 0.047, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.7736289381563592, | |
| "grad_norm": 0.5974612832069397, | |
| "learning_rate": 3.846596893842891e-06, | |
| "loss": 0.0499, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.777219280136433, | |
| "grad_norm": 0.6848942637443542, | |
| "learning_rate": 3.7269801869233845e-06, | |
| "loss": 0.0545, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.7808096221165066, | |
| "grad_norm": 0.6268109083175659, | |
| "learning_rate": 3.6091807972901624e-06, | |
| "loss": 0.0519, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.7843999640965802, | |
| "grad_norm": 0.8246615529060364, | |
| "learning_rate": 3.49320335137282e-06, | |
| "loss": 0.0495, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 1.7879903060766538, | |
| "grad_norm": 0.7163103222846985, | |
| "learning_rate": 3.3790524040462566e-06, | |
| "loss": 0.0465, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.7915806480567276, | |
| "grad_norm": 0.5779036283493042, | |
| "learning_rate": 3.266732438451842e-06, | |
| "loss": 0.0493, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 1.7951709900368011, | |
| "grad_norm": 0.5178433060646057, | |
| "learning_rate": 3.1562478658213656e-06, | |
| "loss": 0.0499, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.7987613320168747, | |
| "grad_norm": 0.7967355847358704, | |
| "learning_rate": 3.0476030253037415e-06, | |
| "loss": 0.0502, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 1.8023516739969483, | |
| "grad_norm": 0.8158264756202698, | |
| "learning_rate": 2.9408021837945942e-06, | |
| "loss": 0.0481, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 1.8059420159770219, | |
| "grad_norm": 0.43987634778022766, | |
| "learning_rate": 2.8358495357687364e-06, | |
| "loss": 0.0456, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 1.8095323579570954, | |
| "grad_norm": 0.45231232047080994, | |
| "learning_rate": 2.7327492031153866e-06, | |
| "loss": 0.0474, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.813122699937169, | |
| "grad_norm": 0.799350917339325, | |
| "learning_rate": 2.631505234976311e-06, | |
| "loss": 0.0489, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.8167130419172426, | |
| "grad_norm": 0.5466026663780212, | |
| "learning_rate": 2.5321216075867626e-06, | |
| "loss": 0.0474, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 1.8203033838973162, | |
| "grad_norm": 0.7424982190132141, | |
| "learning_rate": 2.4346022241193643e-06, | |
| "loss": 0.0452, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 1.8238937258773897, | |
| "grad_norm": 0.7979154586791992, | |
| "learning_rate": 2.3389509145308076e-06, | |
| "loss": 0.05, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 1.8274840678574633, | |
| "grad_norm": 0.6414862275123596, | |
| "learning_rate": 2.245171435411414e-06, | |
| "loss": 0.0487, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 1.8310744098375369, | |
| "grad_norm": 0.5069670081138611, | |
| "learning_rate": 2.1532674698376e-06, | |
| "loss": 0.0464, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.8346647518176105, | |
| "grad_norm": 0.4745350480079651, | |
| "learning_rate": 2.0632426272272464e-06, | |
| "loss": 0.0467, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 1.8382550937976843, | |
| "grad_norm": 0.5952518582344055, | |
| "learning_rate": 1.975100443197958e-06, | |
| "loss": 0.0508, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 1.8418454357777578, | |
| "grad_norm": 0.5413398146629333, | |
| "learning_rate": 1.8888443794281618e-06, | |
| "loss": 0.0426, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.8454357777578314, | |
| "grad_norm": 0.6297146677970886, | |
| "learning_rate": 1.8044778235211723e-06, | |
| "loss": 0.0523, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 1.849026119737905, | |
| "grad_norm": 0.458870530128479, | |
| "learning_rate": 1.72200408887217e-06, | |
| "loss": 0.0462, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.8526164617179788, | |
| "grad_norm": 0.6490904688835144, | |
| "learning_rate": 1.6414264145380442e-06, | |
| "loss": 0.0484, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 1.8562068036980524, | |
| "grad_norm": 0.7383233904838562, | |
| "learning_rate": 1.562747965110195e-06, | |
| "loss": 0.0484, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 1.859797145678126, | |
| "grad_norm": 2.4921016693115234, | |
| "learning_rate": 1.4859718305902326e-06, | |
| "loss": 0.046, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 1.8633874876581995, | |
| "grad_norm": 1.6146339178085327, | |
| "learning_rate": 1.411101026268652e-06, | |
| "loss": 0.043, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 1.866977829638273, | |
| "grad_norm": 0.47561097145080566, | |
| "learning_rate": 1.3381384926063833e-06, | |
| "loss": 0.0467, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.8705681716183467, | |
| "grad_norm": 0.5113374590873718, | |
| "learning_rate": 1.2670870951193292e-06, | |
| "loss": 0.0475, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 1.8741585135984202, | |
| "grad_norm": 0.5401134490966797, | |
| "learning_rate": 1.197949624265776e-06, | |
| "loss": 0.0482, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 1.8777488555784938, | |
| "grad_norm": 0.4193181097507477, | |
| "learning_rate": 1.1307287953368995e-06, | |
| "loss": 0.0472, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 1.8813391975585674, | |
| "grad_norm": 0.45812806487083435, | |
| "learning_rate": 1.065427248350015e-06, | |
| "loss": 0.0477, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 1.884929539538641, | |
| "grad_norm": 0.8749078512191772, | |
| "learning_rate": 1.0020475479449731e-06, | |
| "loss": 0.0507, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.8885198815187145, | |
| "grad_norm": 0.48960697650909424, | |
| "learning_rate": 9.405921832833841e-07, | |
| "loss": 0.046, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 1.892110223498788, | |
| "grad_norm": 0.7578288316726685, | |
| "learning_rate": 8.810635679509071e-07, | |
| "loss": 0.0471, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 1.895700565478862, | |
| "grad_norm": 0.6842608451843262, | |
| "learning_rate": 8.23464039862426e-07, | |
| "loss": 0.0445, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 1.8992909074589355, | |
| "grad_norm": 0.5089036226272583, | |
| "learning_rate": 7.67795861170234e-07, | |
| "loss": 0.0457, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 1.902881249439009, | |
| "grad_norm": 0.5393949151039124, | |
| "learning_rate": 7.140612181752048e-07, | |
| "loss": 0.0456, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.9064715914190826, | |
| "grad_norm": 0.9976809024810791, | |
| "learning_rate": 6.622622212409058e-07, | |
| "loss": 0.047, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 1.9100619333991564, | |
| "grad_norm": 0.5556519031524658, | |
| "learning_rate": 6.124009047107471e-07, | |
| "loss": 0.0517, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 1.91365227537923, | |
| "grad_norm": 0.534712553024292, | |
| "learning_rate": 5.644792268280574e-07, | |
| "loss": 0.0427, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 1.9172426173593036, | |
| "grad_norm": 0.7053726315498352, | |
| "learning_rate": 5.18499069659184e-07, | |
| "loss": 0.0455, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 1.9208329593393771, | |
| "grad_norm": 0.5793641209602356, | |
| "learning_rate": 4.744622390195963e-07, | |
| "loss": 0.0513, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.9244233013194507, | |
| "grad_norm": 0.4043155908584595, | |
| "learning_rate": 4.323704644029203e-07, | |
| "loss": 0.0501, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 1.9280136432995243, | |
| "grad_norm": 0.4776788353919983, | |
| "learning_rate": 3.9222539891307086e-07, | |
| "loss": 0.0415, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 1.9316039852795979, | |
| "grad_norm": 0.6649408340454102, | |
| "learning_rate": 3.5402861919928697e-07, | |
| "loss": 0.0451, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 1.9351943272596714, | |
| "grad_norm": 3.3624627590179443, | |
| "learning_rate": 3.1778162539421453e-07, | |
| "loss": 0.0472, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 1.938784669239745, | |
| "grad_norm": 0.5529268980026245, | |
| "learning_rate": 2.8348584105501453e-07, | |
| "loss": 0.045, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.9423750112198186, | |
| "grad_norm": 0.6905925273895264, | |
| "learning_rate": 2.511426131074246e-07, | |
| "loss": 0.0452, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 1.9459653531998922, | |
| "grad_norm": 0.6144551038742065, | |
| "learning_rate": 2.2075321179289565e-07, | |
| "loss": 0.0422, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 1.9495556951799657, | |
| "grad_norm": 1.2887723445892334, | |
| "learning_rate": 1.9231883061866517e-07, | |
| "loss": 0.0441, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 1.9531460371600395, | |
| "grad_norm": 0.7968602776527405, | |
| "learning_rate": 1.6584058631090582e-07, | |
| "loss": 0.0455, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 1.9567363791401131, | |
| "grad_norm": 0.7239225506782532, | |
| "learning_rate": 1.4131951877087158e-07, | |
| "loss": 0.0461, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.9603267211201867, | |
| "grad_norm": 0.6258605718612671, | |
| "learning_rate": 1.1875659103404157e-07, | |
| "loss": 0.0449, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 1.9639170631002603, | |
| "grad_norm": 0.7048450708389282, | |
| "learning_rate": 9.815268923230592e-08, | |
| "loss": 0.0469, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 1.967507405080334, | |
| "grad_norm": 0.6698242425918579, | |
| "learning_rate": 7.95086225591657e-08, | |
| "loss": 0.0469, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 1.9710977470604076, | |
| "grad_norm": 0.612483561038971, | |
| "learning_rate": 6.282512323795287e-08, | |
| "loss": 0.0432, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 1.9746880890404812, | |
| "grad_norm": 1.0906122922897339, | |
| "learning_rate": 4.81028464930755e-08, | |
| "loss": 0.0439, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.9782784310205548, | |
| "grad_norm": 0.5854030847549438, | |
| "learning_rate": 3.534237052426059e-08, | |
| "loss": 0.0461, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 1.9818687730006284, | |
| "grad_norm": 0.5965482592582703, | |
| "learning_rate": 2.4544196483888837e-08, | |
| "loss": 0.0449, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.985459114980702, | |
| "grad_norm": 1.0227429866790771, | |
| "learning_rate": 1.5708748457271548e-08, | |
| "loss": 0.0476, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 1.9890494569607755, | |
| "grad_norm": 0.506277859210968, | |
| "learning_rate": 8.836373446019507e-09, | |
| "loss": 0.0477, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 1.992639798940849, | |
| "grad_norm": 0.4811525344848633, | |
| "learning_rate": 3.927341354420522e-09, | |
| "loss": 0.0468, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.9962301409209227, | |
| "grad_norm": 0.4584663212299347, | |
| "learning_rate": 9.818449787979412e-10, | |
| "loss": 0.0424, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 1.9998204829009962, | |
| "grad_norm": 0.6924448609352112, | |
| "learning_rate": 0.0, | |
| "loss": 0.0443, | |
| "step": 5570 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 5570, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.008182835124896e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |