| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.96, |
| "eval_steps": 500, |
| "global_step": 4800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.001, |
| "grad_norm": 13.726217269897461, |
| "learning_rate": 4e-06, |
| "loss": 10.4133, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.002, |
| "grad_norm": 4.9693684577941895, |
| "learning_rate": 8.999999999999999e-06, |
| "loss": 9.8384, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.003, |
| "grad_norm": 2.879361152648926, |
| "learning_rate": 1.4e-05, |
| "loss": 9.5182, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 3.023117780685425, |
| "learning_rate": 1.9e-05, |
| "loss": 9.3697, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.005, |
| "grad_norm": 3.9188971519470215, |
| "learning_rate": 2.3999999999999997e-05, |
| "loss": 9.1674, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 4.881481647491455, |
| "learning_rate": 2.8999999999999997e-05, |
| "loss": 9.0057, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.007, |
| "grad_norm": 5.366204261779785, |
| "learning_rate": 3.399999999999999e-05, |
| "loss": 8.8219, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 12.275991439819336, |
| "learning_rate": 3.9e-05, |
| "loss": 8.6271, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.009, |
| "grad_norm": 3.798144578933716, |
| "learning_rate": 4.4e-05, |
| "loss": 8.3997, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.8132495880126953, |
| "learning_rate": 4.899999999999999e-05, |
| "loss": 8.211, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.011, |
| "grad_norm": 3.747354507446289, |
| "learning_rate": 5.399999999999999e-05, |
| "loss": 8.0857, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.012, |
| "grad_norm": 3.045440435409546, |
| "learning_rate": 5.899999999999999e-05, |
| "loss": 7.7861, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.013, |
| "grad_norm": 2.5404093265533447, |
| "learning_rate": 6.4e-05, |
| "loss": 7.5872, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.014, |
| "grad_norm": 2.668727159500122, |
| "learning_rate": 6.9e-05, |
| "loss": 7.2799, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.015, |
| "grad_norm": 2.7760627269744873, |
| "learning_rate": 7.4e-05, |
| "loss": 7.1264, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 2.291050434112549, |
| "learning_rate": 7.899999999999998e-05, |
| "loss": 6.9572, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.017, |
| "grad_norm": 2.175490140914917, |
| "learning_rate": 8.4e-05, |
| "loss": 6.7924, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.018, |
| "grad_norm": 2.3527913093566895, |
| "learning_rate": 8.9e-05, |
| "loss": 6.7404, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.019, |
| "grad_norm": 2.1863811016082764, |
| "learning_rate": 9.4e-05, |
| "loss": 6.4791, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 1.8337147235870361, |
| "learning_rate": 9.9e-05, |
| "loss": 6.3777, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.021, |
| "grad_norm": 2.6010560989379883, |
| "learning_rate": 0.000104, |
| "loss": 6.2828, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.022, |
| "grad_norm": 2.021232843399048, |
| "learning_rate": 0.00010899999999999999, |
| "loss": 6.1714, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.023, |
| "grad_norm": 2.872624397277832, |
| "learning_rate": 0.00011399999999999999, |
| "loss": 6.055, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 1.730695366859436, |
| "learning_rate": 0.00011899999999999999, |
| "loss": 5.9749, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.025, |
| "grad_norm": 1.8848776817321777, |
| "learning_rate": 0.00012399999999999998, |
| "loss": 5.8248, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.026, |
| "grad_norm": 1.6250139474868774, |
| "learning_rate": 0.000129, |
| "loss": 5.8104, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.027, |
| "grad_norm": 1.776007056236267, |
| "learning_rate": 0.00013399999999999998, |
| "loss": 5.6435, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.028, |
| "grad_norm": 1.5446497201919556, |
| "learning_rate": 0.000139, |
| "loss": 5.6116, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.029, |
| "grad_norm": 1.7168349027633667, |
| "learning_rate": 0.00014399999999999998, |
| "loss": 5.5175, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 1.5292563438415527, |
| "learning_rate": 0.000149, |
| "loss": 5.4173, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.031, |
| "grad_norm": 1.3585489988327026, |
| "learning_rate": 0.00015399999999999998, |
| "loss": 5.363, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 1.428066372871399, |
| "learning_rate": 0.000159, |
| "loss": 5.3579, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.033, |
| "grad_norm": 1.2797551155090332, |
| "learning_rate": 0.00016399999999999997, |
| "loss": 5.2454, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.034, |
| "grad_norm": 1.2886139154434204, |
| "learning_rate": 0.000169, |
| "loss": 5.2456, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.035, |
| "grad_norm": 1.220340609550476, |
| "learning_rate": 0.00017399999999999997, |
| "loss": 5.2171, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.036, |
| "grad_norm": 1.3801219463348389, |
| "learning_rate": 0.000179, |
| "loss": 5.1411, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.037, |
| "grad_norm": 1.2507349252700806, |
| "learning_rate": 0.00018399999999999997, |
| "loss": 5.0654, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.038, |
| "grad_norm": 1.1602240800857544, |
| "learning_rate": 0.00018899999999999999, |
| "loss": 4.9534, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.039, |
| "grad_norm": 1.1360636949539185, |
| "learning_rate": 0.00019399999999999997, |
| "loss": 5.003, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.3035187721252441, |
| "learning_rate": 0.00019899999999999999, |
| "loss": 5.0598, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.041, |
| "grad_norm": 1.2107629776000977, |
| "learning_rate": 0.000204, |
| "loss": 4.862, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.042, |
| "grad_norm": 1.2512259483337402, |
| "learning_rate": 0.00020899999999999998, |
| "loss": 4.8316, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.043, |
| "grad_norm": 1.1587598323822021, |
| "learning_rate": 0.000214, |
| "loss": 4.8094, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.044, |
| "grad_norm": 1.0695734024047852, |
| "learning_rate": 0.00021899999999999998, |
| "loss": 4.7331, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.045, |
| "grad_norm": 1.2294334173202515, |
| "learning_rate": 0.000224, |
| "loss": 4.7008, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.046, |
| "grad_norm": 0.9978997707366943, |
| "learning_rate": 0.00022899999999999998, |
| "loss": 4.6802, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.047, |
| "grad_norm": 0.9799301028251648, |
| "learning_rate": 0.000234, |
| "loss": 4.6729, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 0.9935582280158997, |
| "learning_rate": 0.00023899999999999998, |
| "loss": 4.6054, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.049, |
| "grad_norm": 1.0166147947311401, |
| "learning_rate": 0.000244, |
| "loss": 4.5731, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.9937739372253418, |
| "learning_rate": 0.000249, |
| "loss": 4.4931, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.051, |
| "grad_norm": 1.3121594190597534, |
| "learning_rate": 0.000254, |
| "loss": 4.5523, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.052, |
| "grad_norm": 0.8952345252037048, |
| "learning_rate": 0.00025899999999999995, |
| "loss": 4.458, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.053, |
| "grad_norm": 0.9221830368041992, |
| "learning_rate": 0.00026399999999999997, |
| "loss": 4.47, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.054, |
| "grad_norm": 0.9459202289581299, |
| "learning_rate": 0.000269, |
| "loss": 4.3436, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.055, |
| "grad_norm": 0.907535970211029, |
| "learning_rate": 0.000274, |
| "loss": 4.399, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 0.9379016757011414, |
| "learning_rate": 0.000279, |
| "loss": 4.3173, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.057, |
| "grad_norm": 0.8357326984405518, |
| "learning_rate": 0.00028399999999999996, |
| "loss": 4.4404, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.058, |
| "grad_norm": 0.8483668565750122, |
| "learning_rate": 0.000289, |
| "loss": 4.3564, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.059, |
| "grad_norm": 0.8923192024230957, |
| "learning_rate": 0.000294, |
| "loss": 4.2908, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.8518162369728088, |
| "learning_rate": 0.000299, |
| "loss": 4.3578, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.061, |
| "grad_norm": 0.7717773914337158, |
| "learning_rate": 0.00029999946385159685, |
| "loss": 4.274, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.062, |
| "grad_norm": 0.8026501536369324, |
| "learning_rate": 0.00029999728575527777, |
| "loss": 4.2535, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.063, |
| "grad_norm": 0.7490338087081909, |
| "learning_rate": 0.0002999934322260777, |
| "loss": 4.1746, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 0.7846081256866455, |
| "learning_rate": 0.0002999879033070396, |
| "loss": 4.191, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.065, |
| "grad_norm": 0.9395541548728943, |
| "learning_rate": 0.0002999806990599202, |
| "loss": 4.1992, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.066, |
| "grad_norm": 0.7602784633636475, |
| "learning_rate": 0.00029997181956518905, |
| "loss": 4.0873, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.067, |
| "grad_norm": 0.769402027130127, |
| "learning_rate": 0.000299961264922028, |
| "loss": 4.0827, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.068, |
| "grad_norm": 0.7498162984848022, |
| "learning_rate": 0.0002999490352483299, |
| "loss": 4.1329, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.069, |
| "grad_norm": 0.7378515005111694, |
| "learning_rate": 0.00029993513068069704, |
| "loss": 4.0957, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.7770788669586182, |
| "learning_rate": 0.00029991955137444014, |
| "loss": 3.9895, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.071, |
| "grad_norm": 0.6819906830787659, |
| "learning_rate": 0.0002999022975035762, |
| "loss": 3.9696, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 0.7527605295181274, |
| "learning_rate": 0.0002998833692608267, |
| "loss": 4.0049, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.073, |
| "grad_norm": 0.8272974491119385, |
| "learning_rate": 0.0002998627668576155, |
| "loss": 3.9313, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.074, |
| "grad_norm": 0.6840147376060486, |
| "learning_rate": 0.00029984049052406646, |
| "loss": 3.9424, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 0.6854280233383179, |
| "learning_rate": 0.00029981654050900057, |
| "loss": 3.9876, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.076, |
| "grad_norm": 0.6815478205680847, |
| "learning_rate": 0.00029979091707993383, |
| "loss": 3.8654, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.077, |
| "grad_norm": 0.7019010186195374, |
| "learning_rate": 0.00029976362052307355, |
| "loss": 3.9083, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.078, |
| "grad_norm": 0.7006312608718872, |
| "learning_rate": 0.00029973465114331555, |
| "loss": 3.8683, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.079, |
| "grad_norm": 0.6919441223144531, |
| "learning_rate": 0.0002997040092642407, |
| "loss": 3.8363, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.6442705392837524, |
| "learning_rate": 0.00029967169522811135, |
| "loss": 3.7315, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.081, |
| "grad_norm": 0.6530693769454956, |
| "learning_rate": 0.0002996377093958673, |
| "loss": 3.8249, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.082, |
| "grad_norm": 0.7034955024719238, |
| "learning_rate": 0.0002996020521471219, |
| "loss": 3.7612, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.083, |
| "grad_norm": 0.6602813601493835, |
| "learning_rate": 0.000299564723880158, |
| "loss": 3.7912, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.084, |
| "grad_norm": 0.6340587139129639, |
| "learning_rate": 0.00029952572501192314, |
| "loss": 3.7946, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.085, |
| "grad_norm": 0.6614922285079956, |
| "learning_rate": 0.000299485055978025, |
| "loss": 3.7346, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.086, |
| "grad_norm": 0.6904277205467224, |
| "learning_rate": 0.00029944271723272685, |
| "loss": 3.608, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.087, |
| "grad_norm": 1.0566864013671875, |
| "learning_rate": 0.00029939870924894206, |
| "loss": 3.7429, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 0.6728774905204773, |
| "learning_rate": 0.000299353032518229, |
| "loss": 3.7887, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.089, |
| "grad_norm": 0.6397653818130493, |
| "learning_rate": 0.00029930568755078544, |
| "loss": 3.7278, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.649704098701477, |
| "learning_rate": 0.0002992566748754432, |
| "loss": 3.6609, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.091, |
| "grad_norm": 0.6457221508026123, |
| "learning_rate": 0.0002992059950396618, |
| "loss": 3.6433, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.092, |
| "grad_norm": 0.6439397931098938, |
| "learning_rate": 0.00029915364860952256, |
| "loss": 3.5866, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.093, |
| "grad_norm": 0.6577675342559814, |
| "learning_rate": 0.00029909963616972225, |
| "loss": 3.6028, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.094, |
| "grad_norm": 0.6369614601135254, |
| "learning_rate": 0.00029904395832356653, |
| "loss": 3.6547, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.095, |
| "grad_norm": 0.6148671507835388, |
| "learning_rate": 0.00029898661569296336, |
| "loss": 3.5645, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 0.6119360327720642, |
| "learning_rate": 0.00029892760891841585, |
| "loss": 3.5271, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.097, |
| "grad_norm": 0.6091258525848389, |
| "learning_rate": 0.0002988669386590151, |
| "loss": 3.5216, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.098, |
| "grad_norm": 0.6266127228736877, |
| "learning_rate": 0.00029880460559243314, |
| "loss": 3.5137, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.099, |
| "grad_norm": 0.6600483059883118, |
| "learning_rate": 0.00029874061041491493, |
| "loss": 3.6216, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.5879938006401062, |
| "learning_rate": 0.000298674953841271, |
| "loss": 3.4859, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.101, |
| "grad_norm": 0.5879422426223755, |
| "learning_rate": 0.00029860763660486913, |
| "loss": 3.5691, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.102, |
| "grad_norm": 0.6330602169036865, |
| "learning_rate": 0.0002985386594576263, |
| "loss": 3.4669, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.103, |
| "grad_norm": 0.6023596525192261, |
| "learning_rate": 0.0002984680231700004, |
| "loss": 3.5629, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 0.6126276850700378, |
| "learning_rate": 0.00029839572853098133, |
| "loss": 3.4725, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.105, |
| "grad_norm": 0.5834094882011414, |
| "learning_rate": 0.00029832177634808255, |
| "loss": 3.4099, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.106, |
| "grad_norm": 0.6275287866592407, |
| "learning_rate": 0.0002982461674473317, |
| "loss": 3.4758, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.107, |
| "grad_norm": 0.5847580432891846, |
| "learning_rate": 0.00029816890267326184, |
| "loss": 3.319, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.108, |
| "grad_norm": 0.6252226829528809, |
| "learning_rate": 0.0002980899828889013, |
| "loss": 3.4195, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.109, |
| "grad_norm": 0.6148211359977722, |
| "learning_rate": 0.0002980094089757649, |
| "loss": 3.3954, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.6043537855148315, |
| "learning_rate": 0.0002979271818338434, |
| "loss": 3.3332, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.111, |
| "grad_norm": 0.6230714321136475, |
| "learning_rate": 0.0002978433023815939, |
| "loss": 3.3496, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 0.6967390179634094, |
| "learning_rate": 0.00029775777155592907, |
| "loss": 3.3825, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.113, |
| "grad_norm": 0.6039656400680542, |
| "learning_rate": 0.00029767059031220747, |
| "loss": 3.3775, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.114, |
| "grad_norm": 0.6448938846588135, |
| "learning_rate": 0.00029758175962422214, |
| "loss": 3.3608, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.115, |
| "grad_norm": 0.5955671072006226, |
| "learning_rate": 0.00029749128048419006, |
| "loss": 3.2822, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.116, |
| "grad_norm": 1.0059808492660522, |
| "learning_rate": 0.00029739915390274103, |
| "loss": 3.2733, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.117, |
| "grad_norm": 0.589187502861023, |
| "learning_rate": 0.0002973053809089063, |
| "loss": 3.3818, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.118, |
| "grad_norm": 0.5997525453567505, |
| "learning_rate": 0.0002972099625501073, |
| "loss": 3.3073, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.119, |
| "grad_norm": 0.595435380935669, |
| "learning_rate": 0.0002971128998921436, |
| "loss": 3.2607, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.6213010549545288, |
| "learning_rate": 0.0002970141940191814, |
| "loss": 3.2259, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.121, |
| "grad_norm": 0.5790759325027466, |
| "learning_rate": 0.00029691384603374094, |
| "loss": 3.2976, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.122, |
| "grad_norm": 0.5658262372016907, |
| "learning_rate": 0.0002968118570566847, |
| "loss": 3.2429, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.123, |
| "grad_norm": 0.5907502174377441, |
| "learning_rate": 0.0002967082282272045, |
| "loss": 3.339, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.124, |
| "grad_norm": 0.5808840990066528, |
| "learning_rate": 0.00029660296070280895, |
| "loss": 3.2019, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 0.5826807022094727, |
| "learning_rate": 0.00029649605565931043, |
| "loss": 3.2223, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.126, |
| "grad_norm": 0.5372958779335022, |
| "learning_rate": 0.0002963875142908121, |
| "loss": 3.2082, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.127, |
| "grad_norm": 0.5273796916007996, |
| "learning_rate": 0.0002962773378096944, |
| "loss": 3.2612, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.5920493602752686, |
| "learning_rate": 0.0002961655274466015, |
| "loss": 3.1669, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.129, |
| "grad_norm": 0.575118362903595, |
| "learning_rate": 0.0002960520844504279, |
| "loss": 3.1589, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.5747135877609253, |
| "learning_rate": 0.0002959370100883039, |
| "loss": 3.1427, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.131, |
| "grad_norm": 0.5302339792251587, |
| "learning_rate": 0.000295820305645582, |
| "loss": 3.139, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.132, |
| "grad_norm": 0.5856277346611023, |
| "learning_rate": 0.00029570197242582204, |
| "loss": 3.1229, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.133, |
| "grad_norm": 0.5387154817581177, |
| "learning_rate": 0.00029558201175077723, |
| "loss": 3.1793, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.134, |
| "grad_norm": 0.519659161567688, |
| "learning_rate": 0.0002954604249603787, |
| "loss": 3.019, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.135, |
| "grad_norm": 0.5343477725982666, |
| "learning_rate": 0.0002953372134127212, |
| "loss": 3.1136, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 0.5857408046722412, |
| "learning_rate": 0.0002952123784840475, |
| "loss": 3.1447, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.137, |
| "grad_norm": 0.5493313670158386, |
| "learning_rate": 0.000295085921568733, |
| "loss": 3.159, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.138, |
| "grad_norm": 0.5469555854797363, |
| "learning_rate": 0.0002949578440792705, |
| "loss": 3.0828, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.139, |
| "grad_norm": 0.5394929647445679, |
| "learning_rate": 0.0002948281474462541, |
| "loss": 3.0478, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.528149425983429, |
| "learning_rate": 0.0002946968331183634, |
| "loss": 3.1151, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.141, |
| "grad_norm": 0.5426072478294373, |
| "learning_rate": 0.0002945639025623473, |
| "loss": 3.1065, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.142, |
| "grad_norm": 0.5489263534545898, |
| "learning_rate": 0.00029442935726300734, |
| "loss": 3.0476, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.143, |
| "grad_norm": 0.5520033836364746, |
| "learning_rate": 0.0002942931987231816, |
| "loss": 3.0378, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.5429459810256958, |
| "learning_rate": 0.0002941554284637276, |
| "loss": 3.0479, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.145, |
| "grad_norm": 0.5990952253341675, |
| "learning_rate": 0.00029401604802350523, |
| "loss": 3.0617, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.146, |
| "grad_norm": 0.5248637199401855, |
| "learning_rate": 0.0002938750589593599, |
| "loss": 3.0854, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.147, |
| "grad_norm": 0.59062260389328, |
| "learning_rate": 0.0002937324628461048, |
| "loss": 3.0252, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.148, |
| "grad_norm": 0.51813805103302, |
| "learning_rate": 0.0002935882612765035, |
| "loss": 3.0378, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.149, |
| "grad_norm": 0.5538212656974792, |
| "learning_rate": 0.00029344245586125227, |
| "loss": 2.9998, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.5211405158042908, |
| "learning_rate": 0.0002932950482289618, |
| "loss": 3.0235, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.151, |
| "grad_norm": 0.5439658761024475, |
| "learning_rate": 0.00029314604002613913, |
| "loss": 3.0212, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 0.5444567799568176, |
| "learning_rate": 0.0002929954329171693, |
| "loss": 3.0763, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.153, |
| "grad_norm": 0.5256481170654297, |
| "learning_rate": 0.0002928432285842969, |
| "loss": 3.0378, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.154, |
| "grad_norm": 0.5187762379646301, |
| "learning_rate": 0.0002926894287276068, |
| "loss": 3.0012, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.155, |
| "grad_norm": 0.5280289649963379, |
| "learning_rate": 0.0002925340350650058, |
| "loss": 2.9581, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.156, |
| "grad_norm": 0.5345909595489502, |
| "learning_rate": 0.0002923770493322029, |
| "loss": 2.9386, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.157, |
| "grad_norm": 0.5289936661720276, |
| "learning_rate": 0.00029221847328269034, |
| "loss": 2.9282, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.158, |
| "grad_norm": 0.5417344570159912, |
| "learning_rate": 0.00029205830868772364, |
| "loss": 3.0096, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.159, |
| "grad_norm": 0.5449649095535278, |
| "learning_rate": 0.00029189655733630193, |
| "loss": 2.9267, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.5082869529724121, |
| "learning_rate": 0.0002917332210351482, |
| "loss": 2.9358, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.161, |
| "grad_norm": 0.5396576523780823, |
| "learning_rate": 0.0002915683016086889, |
| "loss": 3.0027, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.162, |
| "grad_norm": 0.5313181281089783, |
| "learning_rate": 0.0002914018008990335, |
| "loss": 2.9455, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.163, |
| "grad_norm": 0.49431952834129333, |
| "learning_rate": 0.0002912337207659541, |
| "loss": 3.0227, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.164, |
| "grad_norm": 0.535514235496521, |
| "learning_rate": 0.0002910640630868645, |
| "loss": 2.8925, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.165, |
| "grad_norm": 0.5321778059005737, |
| "learning_rate": 0.00029089282975679946, |
| "loss": 2.898, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.166, |
| "grad_norm": 0.5481770038604736, |
| "learning_rate": 0.00029072002268839316, |
| "loss": 3.0421, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.167, |
| "grad_norm": 0.5205419659614563, |
| "learning_rate": 0.0002905456438118582, |
| "loss": 2.9817, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 0.519214391708374, |
| "learning_rate": 0.00029036969507496385, |
| "loss": 2.8693, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.169, |
| "grad_norm": 0.5142780542373657, |
| "learning_rate": 0.00029019217844301437, |
| "loss": 2.9589, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.5100776553153992, |
| "learning_rate": 0.0002900130958988269, |
| "loss": 2.9288, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.171, |
| "grad_norm": 0.49259114265441895, |
| "learning_rate": 0.00028983244944270957, |
| "loss": 2.8171, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.172, |
| "grad_norm": 0.5288705825805664, |
| "learning_rate": 0.000289650241092439, |
| "loss": 2.8789, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.173, |
| "grad_norm": 0.5391842126846313, |
| "learning_rate": 0.00028946647288323766, |
| "loss": 2.915, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.174, |
| "grad_norm": 0.5194993615150452, |
| "learning_rate": 0.00028928114686775153, |
| "loss": 2.8859, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.175, |
| "grad_norm": 0.503380298614502, |
| "learning_rate": 0.00028909426511602657, |
| "loss": 2.9212, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.5249530076980591, |
| "learning_rate": 0.0002889058297154862, |
| "loss": 2.8832, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.177, |
| "grad_norm": 0.4950558543205261, |
| "learning_rate": 0.0002887158427709075, |
| "loss": 2.8848, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.178, |
| "grad_norm": 0.5167747735977173, |
| "learning_rate": 0.0002885243064043982, |
| "loss": 2.8871, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.179, |
| "grad_norm": 0.49764567613601685, |
| "learning_rate": 0.0002883312227553724, |
| "loss": 2.8416, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.5111367702484131, |
| "learning_rate": 0.00028813659398052707, |
| "loss": 2.8673, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.181, |
| "grad_norm": 0.5087841749191284, |
| "learning_rate": 0.00028794042225381794, |
| "loss": 2.8911, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.182, |
| "grad_norm": 0.5020551085472107, |
| "learning_rate": 0.00028774270976643503, |
| "loss": 2.8075, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.183, |
| "grad_norm": 0.47799405455589294, |
| "learning_rate": 0.00028754345872677837, |
| "loss": 2.8803, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 0.4816206693649292, |
| "learning_rate": 0.0002873426713604331, |
| "loss": 2.7745, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.185, |
| "grad_norm": 0.5181537866592407, |
| "learning_rate": 0.00028714034991014493, |
| "loss": 2.787, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.186, |
| "grad_norm": 0.49121904373168945, |
| "learning_rate": 0.00028693649663579483, |
| "loss": 2.8193, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.187, |
| "grad_norm": 0.49521416425704956, |
| "learning_rate": 0.0002867311138143737, |
| "loss": 2.767, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.188, |
| "grad_norm": 0.47037652134895325, |
| "learning_rate": 0.00028652420373995744, |
| "loss": 2.8282, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.189, |
| "grad_norm": 0.4956018328666687, |
| "learning_rate": 0.0002863157687236808, |
| "loss": 2.7311, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.521686851978302, |
| "learning_rate": 0.0002861058110937116, |
| "loss": 2.7677, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.191, |
| "grad_norm": 0.5066460371017456, |
| "learning_rate": 0.0002858943331952253, |
| "loss": 2.793, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.4976024031639099, |
| "learning_rate": 0.00028568133739037793, |
| "loss": 2.776, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.193, |
| "grad_norm": 0.4874838590621948, |
| "learning_rate": 0.00028546682605828054, |
| "loss": 2.8554, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.194, |
| "grad_norm": 0.4944135546684265, |
| "learning_rate": 0.000285250801594972, |
| "loss": 2.8121, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.195, |
| "grad_norm": 0.48091182112693787, |
| "learning_rate": 0.0002850332664133925, |
| "loss": 2.7394, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.196, |
| "grad_norm": 0.46700772643089294, |
| "learning_rate": 0.0002848142229433568, |
| "loss": 2.6952, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.197, |
| "grad_norm": 0.5188003182411194, |
| "learning_rate": 0.0002845936736315267, |
| "loss": 2.8399, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.198, |
| "grad_norm": 0.5063126683235168, |
| "learning_rate": 0.00028437162094138366, |
| "loss": 2.7841, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.199, |
| "grad_norm": 0.49878448247909546, |
| "learning_rate": 0.0002841480673532021, |
| "loss": 2.7651, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.5015583634376526, |
| "learning_rate": 0.0002839230153640205, |
| "loss": 2.813, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.201, |
| "grad_norm": 0.5300015211105347, |
| "learning_rate": 0.00028369646748761443, |
| "loss": 2.7488, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.202, |
| "grad_norm": 0.4953711926937103, |
| "learning_rate": 0.0002834684262544682, |
| "loss": 2.7762, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.203, |
| "grad_norm": 0.49907276034355164, |
| "learning_rate": 0.0002832388942117462, |
| "loss": 2.7878, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.204, |
| "grad_norm": 0.4764520525932312, |
| "learning_rate": 0.0002830078739232653, |
| "loss": 2.8229, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.205, |
| "grad_norm": 0.46354764699935913, |
| "learning_rate": 0.00028277536796946526, |
| "loss": 2.7619, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.206, |
| "grad_norm": 0.463844895362854, |
| "learning_rate": 0.0002825413789473806, |
| "loss": 2.6742, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.207, |
| "grad_norm": 0.5160843729972839, |
| "learning_rate": 0.0002823059094706113, |
| "loss": 2.7154, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.47835180163383484, |
| "learning_rate": 0.00028206896216929363, |
| "loss": 2.733, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.209, |
| "grad_norm": 0.5362768769264221, |
| "learning_rate": 0.0002818305396900708, |
| "loss": 2.7082, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.4910009503364563, |
| "learning_rate": 0.0002815906446960635, |
| "loss": 2.693, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.211, |
| "grad_norm": 0.5005692839622498, |
| "learning_rate": 0.0002813492798668398, |
| "loss": 2.793, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.212, |
| "grad_norm": 0.47488823533058167, |
| "learning_rate": 0.0002811064478983857, |
| "loss": 2.7746, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.213, |
| "grad_norm": 0.4562411606311798, |
| "learning_rate": 0.00028086215150307473, |
| "loss": 2.6761, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.214, |
| "grad_norm": 0.47498100996017456, |
| "learning_rate": 0.00028061639340963763, |
| "loss": 2.6934, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.215, |
| "grad_norm": 0.4689179062843323, |
| "learning_rate": 0.0002803691763631321, |
| "loss": 2.6655, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 0.46847668290138245, |
| "learning_rate": 0.0002801205031249118, |
| "loss": 2.7459, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.217, |
| "grad_norm": 0.4786347448825836, |
| "learning_rate": 0.00027987037647259587, |
| "loss": 2.7196, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.218, |
| "grad_norm": 0.48441487550735474, |
| "learning_rate": 0.00027961879920003767, |
| "loss": 2.7219, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.219, |
| "grad_norm": 0.5033813714981079, |
| "learning_rate": 0.0002793657741172935, |
| "loss": 2.7095, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.4371601641178131, |
| "learning_rate": 0.0002791113040505915, |
| "loss": 2.6165, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.221, |
| "grad_norm": 0.47073352336883545, |
| "learning_rate": 0.0002788553918422999, |
| "loss": 2.6581, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.222, |
| "grad_norm": 0.5119361281394958, |
| "learning_rate": 0.00027859804035089533, |
| "loss": 2.7874, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.223, |
| "grad_norm": 0.45070821046829224, |
| "learning_rate": 0.0002783392524509307, |
| "loss": 2.6972, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.45409971475601196, |
| "learning_rate": 0.0002780790310330035, |
| "loss": 2.6349, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.225, |
| "grad_norm": 0.46642905473709106, |
| "learning_rate": 0.00027781737900372296, |
| "loss": 2.6704, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.226, |
| "grad_norm": 0.46489498019218445, |
| "learning_rate": 0.0002775542992856781, |
| "loss": 2.7102, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.227, |
| "grad_norm": 0.9031617641448975, |
| "learning_rate": 0.000277289794817405, |
| "loss": 2.6664, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.228, |
| "grad_norm": 0.4736745357513428, |
| "learning_rate": 0.0002770238685533534, |
| "loss": 2.7807, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.229, |
| "grad_norm": 0.4746663272380829, |
| "learning_rate": 0.0002767565234638546, |
| "loss": 2.7733, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.4613768756389618, |
| "learning_rate": 0.0002764877625350876, |
| "loss": 2.6382, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.231, |
| "grad_norm": 0.45333442091941833, |
| "learning_rate": 0.0002762175887690461, |
| "loss": 2.6333, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 0.48350557684898376, |
| "learning_rate": 0.0002759460051835048, |
| "loss": 2.7099, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.233, |
| "grad_norm": 0.46377626061439514, |
| "learning_rate": 0.0002756730148119855, |
| "loss": 2.673, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.234, |
| "grad_norm": 0.4856124818325043, |
| "learning_rate": 0.00027539862070372393, |
| "loss": 2.7564, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.235, |
| "grad_norm": 0.48508885502815247, |
| "learning_rate": 0.0002751228259236348, |
| "loss": 2.6935, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.236, |
| "grad_norm": 0.4603351652622223, |
| "learning_rate": 0.00027484563355227826, |
| "loss": 2.6816, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.237, |
| "grad_norm": 0.4790993928909302, |
| "learning_rate": 0.000274567046685825, |
| "loss": 2.7326, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.238, |
| "grad_norm": 0.4396674335002899, |
| "learning_rate": 0.000274287068436022, |
| "loss": 2.6504, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.239, |
| "grad_norm": 0.4911787211894989, |
| "learning_rate": 0.0002740057019301576, |
| "loss": 2.7139, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.45822376012802124, |
| "learning_rate": 0.0002737229503110267, |
| "loss": 2.6546, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.241, |
| "grad_norm": 0.44974634051322937, |
| "learning_rate": 0.0002734388167368954, |
| "loss": 2.6673, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.242, |
| "grad_norm": 0.45818230509757996, |
| "learning_rate": 0.0002731533043814661, |
| "loss": 2.7511, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.243, |
| "grad_norm": 0.45645296573638916, |
| "learning_rate": 0.0002728664164338418, |
| "loss": 2.6447, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.244, |
| "grad_norm": 0.46149763464927673, |
| "learning_rate": 0.0002725781560984904, |
| "loss": 2.6297, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.245, |
| "grad_norm": 0.47193628549575806, |
| "learning_rate": 0.00027228852659520915, |
| "loss": 2.6723, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.246, |
| "grad_norm": 0.47553935647010803, |
| "learning_rate": 0.0002719975311590885, |
| "loss": 2.6032, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.247, |
| "grad_norm": 0.4629911482334137, |
| "learning_rate": 0.0002717051730404762, |
| "loss": 2.6566, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 0.46325111389160156, |
| "learning_rate": 0.00027141145550494056, |
| "loss": 2.5453, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.249, |
| "grad_norm": 0.46191033720970154, |
| "learning_rate": 0.00027111638183323453, |
| "loss": 2.5978, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.45227476954460144, |
| "learning_rate": 0.00027081995532125857, |
| "loss": 2.6556, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.251, |
| "grad_norm": 0.4406815767288208, |
| "learning_rate": 0.000270522179280024, |
| "loss": 2.6529, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.252, |
| "grad_norm": 0.4414892792701721, |
| "learning_rate": 0.0002702230570356163, |
| "loss": 2.6808, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.253, |
| "grad_norm": 0.45952120423316956, |
| "learning_rate": 0.00026992259192915746, |
| "loss": 2.5599, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.254, |
| "grad_norm": 0.45652300119400024, |
| "learning_rate": 0.000269620787316769, |
| "loss": 2.61, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.255, |
| "grad_norm": 0.4613508880138397, |
| "learning_rate": 0.00026931764656953444, |
| "loss": 2.7847, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.4642449915409088, |
| "learning_rate": 0.0002690131730734615, |
| "loss": 2.6241, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.257, |
| "grad_norm": 0.6733850240707397, |
| "learning_rate": 0.0002687073702294445, |
| "loss": 2.605, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.258, |
| "grad_norm": 0.4731089174747467, |
| "learning_rate": 0.0002684002414532261, |
| "loss": 2.6458, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.259, |
| "grad_norm": 0.5482076406478882, |
| "learning_rate": 0.0002680917901753594, |
| "loss": 2.5853, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.4559101164340973, |
| "learning_rate": 0.0002677820198411696, |
| "loss": 2.5809, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.261, |
| "grad_norm": 0.480686753988266, |
| "learning_rate": 0.0002674709339107152, |
| "loss": 2.6749, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.262, |
| "grad_norm": 0.42668047547340393, |
| "learning_rate": 0.00026715853585874977, |
| "loss": 2.6271, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.263, |
| "grad_norm": 0.4340451657772064, |
| "learning_rate": 0.00026684482917468293, |
| "loss": 2.6019, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 0.4473606050014496, |
| "learning_rate": 0.00026652981736254125, |
| "loss": 2.5944, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.265, |
| "grad_norm": 0.4584132134914398, |
| "learning_rate": 0.0002662135039409296, |
| "loss": 2.6534, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.266, |
| "grad_norm": 0.43533068895339966, |
| "learning_rate": 0.00026589589244299113, |
| "loss": 2.6027, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.267, |
| "grad_norm": 0.456048846244812, |
| "learning_rate": 0.00026557698641636835, |
| "loss": 2.5276, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.268, |
| "grad_norm": 0.441222608089447, |
| "learning_rate": 0.0002652567894231634, |
| "loss": 2.5891, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.269, |
| "grad_norm": 0.4482981860637665, |
| "learning_rate": 0.0002649353050398982, |
| "loss": 2.7258, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.4718676805496216, |
| "learning_rate": 0.0002646125368574743, |
| "loss": 2.6594, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.271, |
| "grad_norm": 0.44714292883872986, |
| "learning_rate": 0.00026428848848113315, |
| "loss": 2.5474, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.42453283071517944, |
| "learning_rate": 0.00026396316353041564, |
| "loss": 2.5412, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.273, |
| "grad_norm": 0.42612630128860474, |
| "learning_rate": 0.0002636365656391216, |
| "loss": 2.6426, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.274, |
| "grad_norm": 0.45025870203971863, |
| "learning_rate": 0.00026330869845526944, |
| "loss": 2.6238, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.275, |
| "grad_norm": 0.46621114015579224, |
| "learning_rate": 0.0002629795656410551, |
| "loss": 2.6543, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.276, |
| "grad_norm": 0.4684096872806549, |
| "learning_rate": 0.00026264917087281134, |
| "loss": 2.5498, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.277, |
| "grad_norm": 0.444062739610672, |
| "learning_rate": 0.0002623175178409667, |
| "loss": 2.5517, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.278, |
| "grad_norm": 0.46531593799591064, |
| "learning_rate": 0.00026198461025000417, |
| "loss": 2.591, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.279, |
| "grad_norm": 0.43859636783599854, |
| "learning_rate": 0.0002616504518184199, |
| "loss": 2.6114, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.4471001923084259, |
| "learning_rate": 0.0002613150462786815, |
| "loss": 2.5127, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.281, |
| "grad_norm": 0.4279544949531555, |
| "learning_rate": 0.00026097839737718664, |
| "loss": 2.6325, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.282, |
| "grad_norm": 0.45175451040267944, |
| "learning_rate": 0.0002606405088742209, |
| "loss": 2.5789, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.283, |
| "grad_norm": 0.4278489351272583, |
| "learning_rate": 0.00026030138454391597, |
| "loss": 2.4853, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.284, |
| "grad_norm": 0.48170074820518494, |
| "learning_rate": 0.00025996102817420745, |
| "loss": 2.5067, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.285, |
| "grad_norm": 0.4388919770717621, |
| "learning_rate": 0.0002596194435667925, |
| "loss": 2.4981, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.286, |
| "grad_norm": 0.4510611593723297, |
| "learning_rate": 0.00025927663453708733, |
| "loss": 2.6489, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.287, |
| "grad_norm": 0.43777748942375183, |
| "learning_rate": 0.0002589326049141847, |
| "loss": 2.5824, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.43350744247436523, |
| "learning_rate": 0.0002585873585408111, |
| "loss": 2.6057, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.289, |
| "grad_norm": 0.42946937680244446, |
| "learning_rate": 0.00025824089927328384, |
| "loss": 2.5705, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.4467051029205322, |
| "learning_rate": 0.00025789323098146787, |
| "loss": 2.6092, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.291, |
| "grad_norm": 0.4496210515499115, |
| "learning_rate": 0.0002575443575487328, |
| "loss": 2.4654, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.292, |
| "grad_norm": 0.43781545758247375, |
| "learning_rate": 0.0002571942828719092, |
| "loss": 2.4598, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.293, |
| "grad_norm": 0.430301308631897, |
| "learning_rate": 0.0002568430108612454, |
| "loss": 2.5123, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.294, |
| "grad_norm": 0.42806944251060486, |
| "learning_rate": 0.00025649054544036356, |
| "loss": 2.5518, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.295, |
| "grad_norm": 0.4417232573032379, |
| "learning_rate": 0.0002561368905462159, |
| "loss": 2.5158, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 0.4251876473426819, |
| "learning_rate": 0.00025578205012904086, |
| "loss": 2.5811, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.297, |
| "grad_norm": 0.45378217101097107, |
| "learning_rate": 0.0002554260281523188, |
| "loss": 2.4669, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.298, |
| "grad_norm": 0.42655450105667114, |
| "learning_rate": 0.00025506882859272796, |
| "loss": 2.4976, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.299, |
| "grad_norm": 0.44121378660202026, |
| "learning_rate": 0.00025471045544009965, |
| "loss": 2.5124, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.4258913993835449, |
| "learning_rate": 0.00025435091269737414, |
| "loss": 2.5127, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.301, |
| "grad_norm": 0.46033087372779846, |
| "learning_rate": 0.0002539902043805556, |
| "loss": 2.4782, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.302, |
| "grad_norm": 0.4258130192756653, |
| "learning_rate": 0.00025362833451866753, |
| "loss": 2.4894, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.303, |
| "grad_norm": 0.44221916794776917, |
| "learning_rate": 0.00025326530715370744, |
| "loss": 2.4741, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.42666810750961304, |
| "learning_rate": 0.00025290112634060186, |
| "loss": 2.5112, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.305, |
| "grad_norm": 0.4374025762081146, |
| "learning_rate": 0.0002525357961471613, |
| "loss": 2.5195, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.306, |
| "grad_norm": 0.4391957223415375, |
| "learning_rate": 0.00025216932065403425, |
| "loss": 2.4956, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.307, |
| "grad_norm": 0.43732672929763794, |
| "learning_rate": 0.00025180170395466204, |
| "loss": 2.4465, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.308, |
| "grad_norm": 0.45370855927467346, |
| "learning_rate": 0.0002514329501552331, |
| "loss": 2.5087, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.309, |
| "grad_norm": 0.4213542342185974, |
| "learning_rate": 0.00025106306337463686, |
| "loss": 2.4634, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.4372135102748871, |
| "learning_rate": 0.0002506920477444179, |
| "loss": 2.5841, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.311, |
| "grad_norm": 0.4450632631778717, |
| "learning_rate": 0.0002503199074087298, |
| "loss": 2.4903, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 0.4359963536262512, |
| "learning_rate": 0.00024994664652428877, |
| "loss": 2.4622, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.313, |
| "grad_norm": 0.4272458255290985, |
| "learning_rate": 0.0002495722692603274, |
| "loss": 2.5966, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.314, |
| "grad_norm": 0.430254191160202, |
| "learning_rate": 0.00024919677979854776, |
| "loss": 2.4504, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.315, |
| "grad_norm": 0.41149428486824036, |
| "learning_rate": 0.0002488201823330751, |
| "loss": 2.5111, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.316, |
| "grad_norm": 0.4507811665534973, |
| "learning_rate": 0.0002484424810704107, |
| "loss": 2.4763, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.317, |
| "grad_norm": 0.43168288469314575, |
| "learning_rate": 0.00024806368022938495, |
| "loss": 2.5124, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.318, |
| "grad_norm": 0.44648832082748413, |
| "learning_rate": 0.0002476837840411103, |
| "loss": 2.46, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.319, |
| "grad_norm": 0.4257158041000366, |
| "learning_rate": 0.0002473027967489341, |
| "loss": 2.5119, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.4564734101295471, |
| "learning_rate": 0.0002469207226083908, |
| "loss": 2.5977, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.321, |
| "grad_norm": 0.46141913533210754, |
| "learning_rate": 0.00024653756588715486, |
| "loss": 2.611, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.322, |
| "grad_norm": 0.44771572947502136, |
| "learning_rate": 0.0002461533308649929, |
| "loss": 2.4669, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.323, |
| "grad_norm": 0.43828845024108887, |
| "learning_rate": 0.0002457680218337157, |
| "loss": 2.433, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.324, |
| "grad_norm": 0.4548298120498657, |
| "learning_rate": 0.0002453816430971307, |
| "loss": 2.4799, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.325, |
| "grad_norm": 0.4636451303958893, |
| "learning_rate": 0.0002449941989709936, |
| "loss": 2.5127, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.326, |
| "grad_norm": 0.422651082277298, |
| "learning_rate": 0.0002446056937829603, |
| "loss": 2.539, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.327, |
| "grad_norm": 0.45776715874671936, |
| "learning_rate": 0.00024421613187253823, |
| "loss": 2.5262, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 0.43375980854034424, |
| "learning_rate": 0.00024382551759103853, |
| "loss": 2.533, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.329, |
| "grad_norm": 0.4225177764892578, |
| "learning_rate": 0.00024343385530152683, |
| "loss": 2.5418, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.42413005232810974, |
| "learning_rate": 0.00024304114937877486, |
| "loss": 2.3906, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.331, |
| "grad_norm": 0.4245309829711914, |
| "learning_rate": 0.00024264740420921132, |
| "loss": 2.4826, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.332, |
| "grad_norm": 0.45272067189216614, |
| "learning_rate": 0.00024225262419087323, |
| "loss": 2.4663, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.333, |
| "grad_norm": 0.43966102600097656, |
| "learning_rate": 0.00024185681373335656, |
| "loss": 2.3933, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.334, |
| "grad_norm": 0.42622506618499756, |
| "learning_rate": 0.00024145997725776697, |
| "loss": 2.4733, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.335, |
| "grad_norm": 0.44191116094589233, |
| "learning_rate": 0.0002410621191966705, |
| "loss": 2.5164, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.40515631437301636, |
| "learning_rate": 0.00024066324399404416, |
| "loss": 2.5272, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.337, |
| "grad_norm": 0.42186275124549866, |
| "learning_rate": 0.00024026335610522604, |
| "loss": 2.4699, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.338, |
| "grad_norm": 0.4451305568218231, |
| "learning_rate": 0.0002398624599968658, |
| "loss": 2.4666, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.339, |
| "grad_norm": 0.4346270263195038, |
| "learning_rate": 0.00023946056014687454, |
| "loss": 2.5605, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.42730483412742615, |
| "learning_rate": 0.00023905766104437504, |
| "loss": 2.4154, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.341, |
| "grad_norm": 0.41405901312828064, |
| "learning_rate": 0.0002386537671896514, |
| "loss": 2.3792, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.342, |
| "grad_norm": 0.42070284485816956, |
| "learning_rate": 0.0002382488830940989, |
| "loss": 2.4676, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.343, |
| "grad_norm": 0.4284003973007202, |
| "learning_rate": 0.00023784301328017354, |
| "loss": 2.433, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 0.4249774217605591, |
| "learning_rate": 0.00023743616228134154, |
| "loss": 2.4119, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.345, |
| "grad_norm": 0.4497978389263153, |
| "learning_rate": 0.00023702833464202882, |
| "loss": 2.4146, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.346, |
| "grad_norm": 0.421589732170105, |
| "learning_rate": 0.00023661953491756996, |
| "loss": 2.4051, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.347, |
| "grad_norm": 0.44604167342185974, |
| "learning_rate": 0.00023620976767415764, |
| "loss": 2.519, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.348, |
| "grad_norm": 0.414231538772583, |
| "learning_rate": 0.00023579903748879145, |
| "loss": 2.4193, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.349, |
| "grad_norm": 0.4184156060218811, |
| "learning_rate": 0.00023538734894922673, |
| "loss": 2.4478, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.43089163303375244, |
| "learning_rate": 0.0002349747066539235, |
| "loss": 2.3942, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.351, |
| "grad_norm": 0.4269482493400574, |
| "learning_rate": 0.00023456111521199494, |
| "loss": 2.5309, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.4043903350830078, |
| "learning_rate": 0.00023414657924315598, |
| "loss": 2.4896, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.353, |
| "grad_norm": 0.4351564347743988, |
| "learning_rate": 0.00023373110337767175, |
| "loss": 2.3659, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.354, |
| "grad_norm": 0.41638249158859253, |
| "learning_rate": 0.00023331469225630567, |
| "loss": 2.4415, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.355, |
| "grad_norm": 0.41405797004699707, |
| "learning_rate": 0.00023289735053026785, |
| "loss": 2.4004, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.356, |
| "grad_norm": 0.43309521675109863, |
| "learning_rate": 0.00023247908286116287, |
| "loss": 2.4222, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.357, |
| "grad_norm": 0.43367326259613037, |
| "learning_rate": 0.00023205989392093812, |
| "loss": 2.4417, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.358, |
| "grad_norm": 0.4391366243362427, |
| "learning_rate": 0.00023163978839183113, |
| "loss": 2.4042, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.359, |
| "grad_norm": 0.4290434718132019, |
| "learning_rate": 0.0002312187709663176, |
| "loss": 2.4001, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.4561871588230133, |
| "learning_rate": 0.0002307968463470589, |
| "loss": 2.4138, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.361, |
| "grad_norm": 0.4181992709636688, |
| "learning_rate": 0.00023037401924684946, |
| "loss": 2.3321, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.362, |
| "grad_norm": 0.4237087666988373, |
| "learning_rate": 0.00022995029438856437, |
| "loss": 2.4286, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.363, |
| "grad_norm": 0.4123045802116394, |
| "learning_rate": 0.00022952567650510617, |
| "loss": 2.4236, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.364, |
| "grad_norm": 0.41121742129325867, |
| "learning_rate": 0.00022910017033935258, |
| "loss": 2.4475, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.365, |
| "grad_norm": 0.4109913408756256, |
| "learning_rate": 0.00022867378064410303, |
| "loss": 2.4107, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.366, |
| "grad_norm": 0.4502372145652771, |
| "learning_rate": 0.00022824651218202578, |
| "loss": 2.4689, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.367, |
| "grad_norm": 0.4249553084373474, |
| "learning_rate": 0.00022781836972560473, |
| "loss": 2.4354, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.4587641954421997, |
| "learning_rate": 0.0002273893580570861, |
| "loss": 2.4022, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.369, |
| "grad_norm": 0.4180518388748169, |
| "learning_rate": 0.000226959481968425, |
| "loss": 2.4303, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.4448012113571167, |
| "learning_rate": 0.0002265287462612318, |
| "loss": 2.3981, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.371, |
| "grad_norm": 0.4289359450340271, |
| "learning_rate": 0.0002260971557467187, |
| "loss": 2.3435, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.372, |
| "grad_norm": 0.42983385920524597, |
| "learning_rate": 0.0002256647152456459, |
| "loss": 2.349, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.373, |
| "grad_norm": 0.4299187660217285, |
| "learning_rate": 0.00022523142958826763, |
| "loss": 2.396, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.374, |
| "grad_norm": 0.42625340819358826, |
| "learning_rate": 0.00022479730361427844, |
| "loss": 2.2904, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 0.4453998804092407, |
| "learning_rate": 0.00022436234217275888, |
| "loss": 2.5129, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 0.4248093366622925, |
| "learning_rate": 0.00022392655012212163, |
| "loss": 2.3976, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.377, |
| "grad_norm": 0.42273494601249695, |
| "learning_rate": 0.0002234899323300569, |
| "loss": 2.4843, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.378, |
| "grad_norm": 0.40302085876464844, |
| "learning_rate": 0.00022305249367347835, |
| "loss": 2.4268, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.379, |
| "grad_norm": 0.4263085126876831, |
| "learning_rate": 0.00022261423903846846, |
| "loss": 2.4438, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.4482550621032715, |
| "learning_rate": 0.00022217517332022386, |
| "loss": 2.4004, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.381, |
| "grad_norm": 0.42222732305526733, |
| "learning_rate": 0.0002217353014230011, |
| "loss": 2.4649, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.382, |
| "grad_norm": 0.44026628136634827, |
| "learning_rate": 0.00022129462826006116, |
| "loss": 2.4186, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.383, |
| "grad_norm": 0.4032437801361084, |
| "learning_rate": 0.0002208531587536153, |
| "loss": 2.388, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.42311862111091614, |
| "learning_rate": 0.00022041089783476954, |
| "loss": 2.4084, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.385, |
| "grad_norm": 0.4101983904838562, |
| "learning_rate": 0.00021996785044346983, |
| "loss": 2.4068, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.386, |
| "grad_norm": 0.42865946888923645, |
| "learning_rate": 0.0002195240215284468, |
| "loss": 2.3719, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.387, |
| "grad_norm": 0.4461745023727417, |
| "learning_rate": 0.00021907941604716057, |
| "loss": 2.382, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.388, |
| "grad_norm": 0.42378079891204834, |
| "learning_rate": 0.00021863403896574534, |
| "loss": 2.3732, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.389, |
| "grad_norm": 0.4562043845653534, |
| "learning_rate": 0.00021818789525895375, |
| "loss": 2.3665, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.41517820954322815, |
| "learning_rate": 0.0002177409899101016, |
| "loss": 2.4582, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.391, |
| "grad_norm": 0.4133061170578003, |
| "learning_rate": 0.00021729332791101203, |
| "loss": 2.3971, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 0.4128275513648987, |
| "learning_rate": 0.00021684491426195957, |
| "loss": 2.4198, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.393, |
| "grad_norm": 0.4075275957584381, |
| "learning_rate": 0.0002163957539716149, |
| "loss": 2.3646, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.394, |
| "grad_norm": 0.4190007448196411, |
| "learning_rate": 0.0002159458520569881, |
| "loss": 2.458, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.395, |
| "grad_norm": 0.4138437509536743, |
| "learning_rate": 0.0002154952135433732, |
| "loss": 2.3209, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.396, |
| "grad_norm": 0.44516539573669434, |
| "learning_rate": 0.00021504384346429187, |
| "loss": 2.4555, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.397, |
| "grad_norm": 0.4246795177459717, |
| "learning_rate": 0.00021459174686143718, |
| "loss": 2.3423, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.398, |
| "grad_norm": 0.42210015654563904, |
| "learning_rate": 0.00021413892878461717, |
| "loss": 2.4987, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.399, |
| "grad_norm": 0.43938738107681274, |
| "learning_rate": 0.0002136853942916987, |
| "loss": 2.4339, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.4321936368942261, |
| "learning_rate": 0.00021323114844855087, |
| "loss": 2.4217, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.401, |
| "grad_norm": 0.436798632144928, |
| "learning_rate": 0.00021277619632898817, |
| "loss": 2.2219, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.402, |
| "grad_norm": 0.4363677203655243, |
| "learning_rate": 0.00021232054301471428, |
| "loss": 2.3598, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.403, |
| "grad_norm": 0.43887215852737427, |
| "learning_rate": 0.0002118641935952648, |
| "loss": 2.4391, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.404, |
| "grad_norm": 0.41993457078933716, |
| "learning_rate": 0.0002114071531679509, |
| "loss": 2.428, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.405, |
| "grad_norm": 0.4155243933200836, |
| "learning_rate": 0.00021094942683780194, |
| "loss": 2.4014, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.406, |
| "grad_norm": 0.4476686716079712, |
| "learning_rate": 0.00021049101971750875, |
| "loss": 2.4425, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.407, |
| "grad_norm": 0.42122960090637207, |
| "learning_rate": 0.00021003193692736643, |
| "loss": 2.3467, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 0.40306955575942993, |
| "learning_rate": 0.00020957218359521706, |
| "loss": 2.3764, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.409, |
| "grad_norm": 0.47617068886756897, |
| "learning_rate": 0.00020911176485639263, |
| "loss": 2.3937, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.42115652561187744, |
| "learning_rate": 0.00020865068585365745, |
| "loss": 2.3776, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.411, |
| "grad_norm": 0.4249623417854309, |
| "learning_rate": 0.00020818895173715083, |
| "loss": 2.3161, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.412, |
| "grad_norm": 0.40884312987327576, |
| "learning_rate": 0.00020772656766432961, |
| "loss": 2.3615, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.413, |
| "grad_norm": 0.4013335108757019, |
| "learning_rate": 0.0002072635387999104, |
| "loss": 2.3916, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.414, |
| "grad_norm": 0.40139567852020264, |
| "learning_rate": 0.00020679987031581206, |
| "loss": 2.3001, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.415, |
| "grad_norm": 0.40394365787506104, |
| "learning_rate": 0.00020633556739109782, |
| "loss": 2.3268, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.4449470341205597, |
| "learning_rate": 0.00020587063521191736, |
| "loss": 2.4028, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.417, |
| "grad_norm": 0.42907705903053284, |
| "learning_rate": 0.0002054050789714491, |
| "loss": 2.3472, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.418, |
| "grad_norm": 0.4474325478076935, |
| "learning_rate": 0.00020493890386984198, |
| "loss": 2.331, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.419, |
| "grad_norm": 0.4234548807144165, |
| "learning_rate": 0.0002044721151141576, |
| "loss": 2.307, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.45159173011779785, |
| "learning_rate": 0.00020400471791831172, |
| "loss": 2.4009, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.421, |
| "grad_norm": 0.42569705843925476, |
| "learning_rate": 0.00020353671750301648, |
| "loss": 2.2994, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.422, |
| "grad_norm": 0.41479888558387756, |
| "learning_rate": 0.0002030681190957216, |
| "loss": 2.3603, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.423, |
| "grad_norm": 0.4099830389022827, |
| "learning_rate": 0.00020259892793055633, |
| "loss": 2.4141, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 0.44319307804107666, |
| "learning_rate": 0.00020212914924827092, |
| "loss": 2.3434, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.425, |
| "grad_norm": 0.4080663323402405, |
| "learning_rate": 0.00020165878829617794, |
| "loss": 2.3563, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.426, |
| "grad_norm": 0.4360628128051758, |
| "learning_rate": 0.00020118785032809385, |
| "loss": 2.3404, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.427, |
| "grad_norm": 0.4054611027240753, |
| "learning_rate": 0.00020071634060428022, |
| "loss": 2.3795, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.428, |
| "grad_norm": 0.4493691623210907, |
| "learning_rate": 0.00020024426439138495, |
| "loss": 2.3898, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.429, |
| "grad_norm": 0.42616891860961914, |
| "learning_rate": 0.00019977162696238358, |
| "loss": 2.4323, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.43381640315055847, |
| "learning_rate": 0.00019929843359652017, |
| "loss": 2.3555, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.431, |
| "grad_norm": 0.39783987402915955, |
| "learning_rate": 0.00019882468957924855, |
| "loss": 2.3587, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.4238894283771515, |
| "learning_rate": 0.00019835040020217316, |
| "loss": 2.3681, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.433, |
| "grad_norm": 0.4072917103767395, |
| "learning_rate": 0.00019787557076298998, |
| "loss": 2.3289, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.434, |
| "grad_norm": 0.40059733390808105, |
| "learning_rate": 0.00019740020656542733, |
| "loss": 2.4105, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.435, |
| "grad_norm": 0.44404855370521545, |
| "learning_rate": 0.00019692431291918667, |
| "loss": 2.2413, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.436, |
| "grad_norm": 0.41822120547294617, |
| "learning_rate": 0.00019644789513988337, |
| "loss": 2.3406, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.437, |
| "grad_norm": 0.4292788803577423, |
| "learning_rate": 0.00019597095854898697, |
| "loss": 2.3422, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.438, |
| "grad_norm": 0.41014593839645386, |
| "learning_rate": 0.0001954935084737623, |
| "loss": 2.3293, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.439, |
| "grad_norm": 0.4195305109024048, |
| "learning_rate": 0.00019501555024720944, |
| "loss": 2.3776, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.39849036931991577, |
| "learning_rate": 0.00019453708920800475, |
| "loss": 2.3109, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.441, |
| "grad_norm": 0.4177517592906952, |
| "learning_rate": 0.0001940581307004404, |
| "loss": 2.4408, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.442, |
| "grad_norm": 0.4089096486568451, |
| "learning_rate": 0.00019357868007436552, |
| "loss": 2.3956, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.443, |
| "grad_norm": 0.3905986547470093, |
| "learning_rate": 0.00019309874268512596, |
| "loss": 2.3823, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.444, |
| "grad_norm": 0.40985196828842163, |
| "learning_rate": 0.00019261832389350463, |
| "loss": 2.3608, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.445, |
| "grad_norm": 0.40191152691841125, |
| "learning_rate": 0.00019213742906566152, |
| "loss": 2.2658, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.446, |
| "grad_norm": 0.4191170632839203, |
| "learning_rate": 0.0001916560635730739, |
| "loss": 2.3718, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.447, |
| "grad_norm": 0.44027984142303467, |
| "learning_rate": 0.00019117423279247628, |
| "loss": 2.3377, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.4240022897720337, |
| "learning_rate": 0.00019069194210580019, |
| "loss": 2.3589, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.449, |
| "grad_norm": 0.40984782576560974, |
| "learning_rate": 0.00019020919690011424, |
| "loss": 2.3659, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.40956535935401917, |
| "learning_rate": 0.000189726002567564, |
| "loss": 2.3578, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.451, |
| "grad_norm": 0.4261001944541931, |
| "learning_rate": 0.0001892423645053116, |
| "loss": 2.3671, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.452, |
| "grad_norm": 0.42046308517456055, |
| "learning_rate": 0.00018875828811547557, |
| "loss": 2.3699, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.453, |
| "grad_norm": 0.423777312040329, |
| "learning_rate": 0.00018827377880507035, |
| "loss": 2.3395, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.454, |
| "grad_norm": 0.41518712043762207, |
| "learning_rate": 0.00018778884198594615, |
| "loss": 2.2932, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.455, |
| "grad_norm": 0.43722566962242126, |
| "learning_rate": 0.00018730348307472824, |
| "loss": 2.2692, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 0.4038710594177246, |
| "learning_rate": 0.00018681770749275647, |
| "loss": 2.283, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.457, |
| "grad_norm": 0.42532870173454285, |
| "learning_rate": 0.00018633152066602508, |
| "loss": 2.2813, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.458, |
| "grad_norm": 0.4140380024909973, |
| "learning_rate": 0.00018584492802512152, |
| "loss": 2.308, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.459, |
| "grad_norm": 0.4389931559562683, |
| "learning_rate": 0.00018535793500516626, |
| "loss": 2.3299, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.42715537548065186, |
| "learning_rate": 0.00018487054704575183, |
| "loss": 2.3266, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.461, |
| "grad_norm": 0.42225635051727295, |
| "learning_rate": 0.00018438276959088218, |
| "loss": 2.3171, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.462, |
| "grad_norm": 0.4294082820415497, |
| "learning_rate": 0.00018389460808891176, |
| "loss": 2.3159, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.463, |
| "grad_norm": 0.44747427105903625, |
| "learning_rate": 0.00018340606799248485, |
| "loss": 2.2908, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.4157891571521759, |
| "learning_rate": 0.0001829171547584744, |
| "loss": 2.4158, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.465, |
| "grad_norm": 0.4599103331565857, |
| "learning_rate": 0.00018242787384792136, |
| "loss": 2.384, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.466, |
| "grad_norm": 0.40598711371421814, |
| "learning_rate": 0.00018193823072597338, |
| "loss": 2.3425, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.467, |
| "grad_norm": 0.43444859981536865, |
| "learning_rate": 0.00018144823086182406, |
| "loss": 2.3395, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.468, |
| "grad_norm": 0.45927369594573975, |
| "learning_rate": 0.00018095787972865162, |
| "loss": 2.5384, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.469, |
| "grad_norm": 0.45538032054901123, |
| "learning_rate": 0.0001804671828035579, |
| "loss": 2.3153, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.3852723240852356, |
| "learning_rate": 0.00017997614556750723, |
| "loss": 2.3461, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.471, |
| "grad_norm": 0.4270138144493103, |
| "learning_rate": 0.00017948477350526504, |
| "loss": 2.2924, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 0.41092297434806824, |
| "learning_rate": 0.00017899307210533663, |
| "loss": 2.3212, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.473, |
| "grad_norm": 0.4263060986995697, |
| "learning_rate": 0.00017850104685990614, |
| "loss": 2.3173, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.474, |
| "grad_norm": 0.4177812933921814, |
| "learning_rate": 0.0001780087032647748, |
| "loss": 2.3849, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.475, |
| "grad_norm": 0.4262104630470276, |
| "learning_rate": 0.00017751604681929967, |
| "loss": 2.3068, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.476, |
| "grad_norm": 0.40943190455436707, |
| "learning_rate": 0.00017702308302633253, |
| "loss": 2.2976, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.477, |
| "grad_norm": 0.40264710783958435, |
| "learning_rate": 0.00017652981739215784, |
| "loss": 2.2258, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.478, |
| "grad_norm": 0.4030868411064148, |
| "learning_rate": 0.0001760362554264319, |
| "loss": 2.3503, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.479, |
| "grad_norm": 0.40495970845222473, |
| "learning_rate": 0.00017554240264212062, |
| "loss": 2.2957, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.413086473941803, |
| "learning_rate": 0.0001750482645554385, |
| "loss": 2.3362, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.481, |
| "grad_norm": 0.42103326320648193, |
| "learning_rate": 0.00017455384668578676, |
| "loss": 2.4136, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.482, |
| "grad_norm": 0.4094327688217163, |
| "learning_rate": 0.00017405915455569163, |
| "loss": 2.3414, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.483, |
| "grad_norm": 0.44162988662719727, |
| "learning_rate": 0.00017356419369074288, |
| "loss": 2.2554, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.484, |
| "grad_norm": 0.41292014718055725, |
| "learning_rate": 0.00017306896961953193, |
| "loss": 2.2463, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.485, |
| "grad_norm": 0.43508976697921753, |
| "learning_rate": 0.00017257348787359022, |
| "loss": 2.2801, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.486, |
| "grad_norm": 0.4286428689956665, |
| "learning_rate": 0.00017207775398732716, |
| "loss": 2.2726, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.487, |
| "grad_norm": 0.4207102060317993, |
| "learning_rate": 0.0001715817734979687, |
| "loss": 2.3964, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 0.4012376368045807, |
| "learning_rate": 0.00017108555194549524, |
| "loss": 2.3938, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.489, |
| "grad_norm": 0.42363470792770386, |
| "learning_rate": 0.00017058909487257972, |
| "loss": 2.3354, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.4191186726093292, |
| "learning_rate": 0.0001700924078245259, |
| "loss": 2.3304, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.491, |
| "grad_norm": 0.4244017004966736, |
| "learning_rate": 0.00016959549634920623, |
| "loss": 2.2543, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.492, |
| "grad_norm": 0.418206125497818, |
| "learning_rate": 0.00016909836599699997, |
| "loss": 2.322, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.493, |
| "grad_norm": 0.4423177242279053, |
| "learning_rate": 0.0001686010223207312, |
| "loss": 2.1921, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.494, |
| "grad_norm": 0.4443405270576477, |
| "learning_rate": 0.00016810347087560672, |
| "loss": 2.3374, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.495, |
| "grad_norm": 0.4554826617240906, |
| "learning_rate": 0.0001676057172191542, |
| "loss": 2.3143, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.41316652297973633, |
| "learning_rate": 0.00016710776691115968, |
| "loss": 2.3136, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.497, |
| "grad_norm": 0.4269903302192688, |
| "learning_rate": 0.00016660962551360613, |
| "loss": 2.3707, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.498, |
| "grad_norm": 0.4215530753135681, |
| "learning_rate": 0.00016611129859061066, |
| "loss": 2.3857, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.499, |
| "grad_norm": 0.42953386902809143, |
| "learning_rate": 0.00016561279170836267, |
| "loss": 2.2289, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.41060370206832886, |
| "learning_rate": 0.00016511411043506184, |
| "loss": 2.2564, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.501, |
| "grad_norm": 0.425135999917984, |
| "learning_rate": 0.00016461526034085563, |
| "loss": 2.4423, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.502, |
| "grad_norm": 0.4176621735095978, |
| "learning_rate": 0.00016411624699777717, |
| "loss": 2.3313, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.503, |
| "grad_norm": 0.4177638590335846, |
| "learning_rate": 0.000163617075979683, |
| "loss": 2.2815, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 0.42569682002067566, |
| "learning_rate": 0.0001631177528621911, |
| "loss": 2.299, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.505, |
| "grad_norm": 0.41489818692207336, |
| "learning_rate": 0.00016261828322261803, |
| "loss": 2.2341, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.506, |
| "grad_norm": 0.4402053654193878, |
| "learning_rate": 0.0001621186726399172, |
| "loss": 2.3399, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.507, |
| "grad_norm": 0.4353001117706299, |
| "learning_rate": 0.0001616189266946162, |
| "loss": 2.2805, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.508, |
| "grad_norm": 0.4333752393722534, |
| "learning_rate": 0.00016111905096875468, |
| "loss": 2.2669, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.509, |
| "grad_norm": 0.4180094599723816, |
| "learning_rate": 0.0001606190510458218, |
| "loss": 2.3054, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.4101157784461975, |
| "learning_rate": 0.00016011893251069404, |
| "loss": 2.3033, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.511, |
| "grad_norm": 0.41150620579719543, |
| "learning_rate": 0.0001596187009495728, |
| "loss": 2.3246, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.4234907925128937, |
| "learning_rate": 0.00015911836194992183, |
| "loss": 2.3146, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.513, |
| "grad_norm": 0.4023984968662262, |
| "learning_rate": 0.00015861792110040496, |
| "loss": 2.2686, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.514, |
| "grad_norm": 0.41848230361938477, |
| "learning_rate": 0.0001581173839908238, |
| "loss": 2.249, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.515, |
| "grad_norm": 0.3979667127132416, |
| "learning_rate": 0.000157616756212055, |
| "loss": 2.2636, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.516, |
| "grad_norm": 0.4226064085960388, |
| "learning_rate": 0.00015711604335598808, |
| "loss": 2.2615, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.517, |
| "grad_norm": 0.4342791736125946, |
| "learning_rate": 0.00015661525101546268, |
| "loss": 2.374, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.518, |
| "grad_norm": 0.4076896011829376, |
| "learning_rate": 0.00015611438478420648, |
| "loss": 2.2511, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.519, |
| "grad_norm": 0.42497551441192627, |
| "learning_rate": 0.00015561345025677235, |
| "loss": 2.306, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.40860408544540405, |
| "learning_rate": 0.00015511245302847606, |
| "loss": 2.2657, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.521, |
| "grad_norm": 0.43411552906036377, |
| "learning_rate": 0.00015461139869533383, |
| "loss": 2.3071, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.522, |
| "grad_norm": 0.44043874740600586, |
| "learning_rate": 0.00015411029285399946, |
| "loss": 2.316, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.523, |
| "grad_norm": 0.4515673518180847, |
| "learning_rate": 0.0001536091411017024, |
| "loss": 2.2077, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.524, |
| "grad_norm": 0.42663195729255676, |
| "learning_rate": 0.0001531079490361847, |
| "loss": 2.2715, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.525, |
| "grad_norm": 0.4274803102016449, |
| "learning_rate": 0.00015260672225563877, |
| "loss": 2.2968, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.526, |
| "grad_norm": 0.4248811900615692, |
| "learning_rate": 0.0001521054663586448, |
| "loss": 2.3694, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.527, |
| "grad_norm": 0.4073241651058197, |
| "learning_rate": 0.00015160418694410815, |
| "loss": 2.2967, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.42025795578956604, |
| "learning_rate": 0.00015110288961119693, |
| "loss": 2.2745, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.529, |
| "grad_norm": 0.42329052090644836, |
| "learning_rate": 0.00015060157995927932, |
| "loss": 2.3771, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.38789117336273193, |
| "learning_rate": 0.0001501002635878612, |
| "loss": 2.2525, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.531, |
| "grad_norm": 0.43104180693626404, |
| "learning_rate": 0.00014959894609652334, |
| "loss": 2.29, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.532, |
| "grad_norm": 0.42107024788856506, |
| "learning_rate": 0.00014909763308485925, |
| "loss": 2.3289, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.533, |
| "grad_norm": 0.419460654258728, |
| "learning_rate": 0.0001485963301524122, |
| "loss": 2.24, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.534, |
| "grad_norm": 0.45686954259872437, |
| "learning_rate": 0.0001480950428986129, |
| "loss": 2.2585, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.535, |
| "grad_norm": 0.4176424741744995, |
| "learning_rate": 0.0001475937769227171, |
| "loss": 2.3315, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 0.4299408197402954, |
| "learning_rate": 0.00014709253782374274, |
| "loss": 2.2319, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.537, |
| "grad_norm": 0.42534300684928894, |
| "learning_rate": 0.00014659133120040755, |
| "loss": 2.2121, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.538, |
| "grad_norm": 0.42528364062309265, |
| "learning_rate": 0.00014609016265106656, |
| "loss": 2.2479, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.539, |
| "grad_norm": 0.4252580404281616, |
| "learning_rate": 0.0001455890377736495, |
| "loss": 2.2287, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.4283309876918793, |
| "learning_rate": 0.00014508796216559832, |
| "loss": 2.2557, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.541, |
| "grad_norm": 0.434817373752594, |
| "learning_rate": 0.00014458694142380454, |
| "loss": 2.2359, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.542, |
| "grad_norm": 0.41941994428634644, |
| "learning_rate": 0.00014408598114454705, |
| "loss": 2.2425, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.543, |
| "grad_norm": 0.42644003033638, |
| "learning_rate": 0.00014358508692342926, |
| "loss": 2.3102, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.45429447293281555, |
| "learning_rate": 0.0001430842643553166, |
| "loss": 2.2705, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.545, |
| "grad_norm": 0.38917839527130127, |
| "learning_rate": 0.00014258351903427438, |
| "loss": 2.2375, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.546, |
| "grad_norm": 0.42386049032211304, |
| "learning_rate": 0.00014208285655350496, |
| "loss": 2.2354, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.547, |
| "grad_norm": 0.43970856070518494, |
| "learning_rate": 0.00014158228250528546, |
| "loss": 2.3308, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.548, |
| "grad_norm": 0.4227691888809204, |
| "learning_rate": 0.00014108180248090506, |
| "loss": 2.3061, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.549, |
| "grad_norm": 0.39985859394073486, |
| "learning_rate": 0.000140581422070603, |
| "loss": 2.2377, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.3900357484817505, |
| "learning_rate": 0.00014008114686350556, |
| "loss": 2.2854, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.551, |
| "grad_norm": 0.4252527356147766, |
| "learning_rate": 0.00013958098244756418, |
| "loss": 2.2305, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 0.3899967074394226, |
| "learning_rate": 0.0001390809344094926, |
| "loss": 2.3445, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.553, |
| "grad_norm": 0.4172196090221405, |
| "learning_rate": 0.0001385810083347047, |
| "loss": 2.2162, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.554, |
| "grad_norm": 0.43587014079093933, |
| "learning_rate": 0.0001380812098072522, |
| "loss": 2.2852, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.555, |
| "grad_norm": 0.41883230209350586, |
| "learning_rate": 0.0001375815444097619, |
| "loss": 2.2875, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.556, |
| "grad_norm": 0.41043713688850403, |
| "learning_rate": 0.0001370820177233738, |
| "loss": 2.3192, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.557, |
| "grad_norm": 0.42568516731262207, |
| "learning_rate": 0.00013658263532767835, |
| "loss": 2.2224, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.558, |
| "grad_norm": 0.44088488817214966, |
| "learning_rate": 0.00013608340280065446, |
| "loss": 2.2605, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.559, |
| "grad_norm": 0.39374229311943054, |
| "learning_rate": 0.0001355843257186069, |
| "loss": 2.2003, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.398308128118515, |
| "learning_rate": 0.00013508540965610424, |
| "loss": 2.27, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.561, |
| "grad_norm": 0.3940664231777191, |
| "learning_rate": 0.00013458666018591656, |
| "loss": 2.1863, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.562, |
| "grad_norm": 0.4169365167617798, |
| "learning_rate": 0.00013408808287895306, |
| "loss": 2.1897, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.563, |
| "grad_norm": 0.4238418638706207, |
| "learning_rate": 0.0001335896833041999, |
| "loss": 2.258, |
| "step": 2815 |
| }, |
| { |
| "epoch": 0.564, |
| "grad_norm": 0.423495352268219, |
| "learning_rate": 0.00013309146702865805, |
| "loss": 2.2247, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.565, |
| "grad_norm": 0.4261130690574646, |
| "learning_rate": 0.00013259343961728108, |
| "loss": 2.2895, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.566, |
| "grad_norm": 0.42107266187667847, |
| "learning_rate": 0.00013209560663291303, |
| "loss": 2.217, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.567, |
| "grad_norm": 0.4057311713695526, |
| "learning_rate": 0.0001315979736362261, |
| "loss": 2.2517, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 0.41574499011039734, |
| "learning_rate": 0.00013110054618565886, |
| "loss": 2.2166, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.569, |
| "grad_norm": 0.3923725485801697, |
| "learning_rate": 0.0001306033298373539, |
| "loss": 2.2217, |
| "step": 2845 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.4277208149433136, |
| "learning_rate": 0.0001301063301450958, |
| "loss": 2.257, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.571, |
| "grad_norm": 0.4455857574939728, |
| "learning_rate": 0.00012960955266024917, |
| "loss": 2.2104, |
| "step": 2855 |
| }, |
| { |
| "epoch": 0.572, |
| "grad_norm": 0.43064215779304504, |
| "learning_rate": 0.00012911300293169664, |
| "loss": 2.2939, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.573, |
| "grad_norm": 0.4337853789329529, |
| "learning_rate": 0.00012861668650577688, |
| "loss": 2.3123, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.574, |
| "grad_norm": 0.41334277391433716, |
| "learning_rate": 0.00012812060892622264, |
| "loss": 2.2551, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.575, |
| "grad_norm": 0.4144851863384247, |
| "learning_rate": 0.00012762477573409868, |
| "loss": 2.241, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.40655460953712463, |
| "learning_rate": 0.00012712919246774017, |
| "loss": 2.1983, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.577, |
| "grad_norm": 0.4161902666091919, |
| "learning_rate": 0.0001266338646626906, |
| "loss": 2.1633, |
| "step": 2885 |
| }, |
| { |
| "epoch": 0.578, |
| "grad_norm": 0.45048007369041443, |
| "learning_rate": 0.00012613879785163998, |
| "loss": 2.3365, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.579, |
| "grad_norm": 0.40051379799842834, |
| "learning_rate": 0.00012564399756436314, |
| "loss": 2.1469, |
| "step": 2895 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.4100329875946045, |
| "learning_rate": 0.00012514946932765794, |
| "loss": 2.1957, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.581, |
| "grad_norm": 0.40546661615371704, |
| "learning_rate": 0.00012465521866528346, |
| "loss": 2.182, |
| "step": 2905 |
| }, |
| { |
| "epoch": 0.582, |
| "grad_norm": 0.4651910662651062, |
| "learning_rate": 0.00012416125109789827, |
| "loss": 2.2542, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.583, |
| "grad_norm": 0.4111328721046448, |
| "learning_rate": 0.00012366757214299896, |
| "loss": 2.2598, |
| "step": 2915 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 0.4306609630584717, |
| "learning_rate": 0.00012317418731485832, |
| "loss": 2.2524, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.585, |
| "grad_norm": 0.4010145664215088, |
| "learning_rate": 0.00012268110212446386, |
| "loss": 2.2163, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.586, |
| "grad_norm": 0.44942528009414673, |
| "learning_rate": 0.00012218832207945609, |
| "loss": 2.3242, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.587, |
| "grad_norm": 0.42698466777801514, |
| "learning_rate": 0.00012169585268406728, |
| "loss": 2.2714, |
| "step": 2935 |
| }, |
| { |
| "epoch": 0.588, |
| "grad_norm": 0.420461505651474, |
| "learning_rate": 0.00012120369943905976, |
| "loss": 2.2011, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.589, |
| "grad_norm": 0.4152168929576874, |
| "learning_rate": 0.00012071186784166456, |
| "loss": 2.2704, |
| "step": 2945 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.39992696046829224, |
| "learning_rate": 0.00012022036338551985, |
| "loss": 2.2062, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.591, |
| "grad_norm": 0.39946138858795166, |
| "learning_rate": 0.00011972919156060985, |
| "loss": 2.2606, |
| "step": 2955 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.4226006865501404, |
| "learning_rate": 0.00011923835785320349, |
| "loss": 2.252, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.593, |
| "grad_norm": 0.416586697101593, |
| "learning_rate": 0.00011874786774579264, |
| "loss": 2.1411, |
| "step": 2965 |
| }, |
| { |
| "epoch": 0.594, |
| "grad_norm": 0.44908973574638367, |
| "learning_rate": 0.00011825772671703155, |
| "loss": 2.2244, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.595, |
| "grad_norm": 0.42042890191078186, |
| "learning_rate": 0.0001177679402416753, |
| "loss": 2.1188, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.596, |
| "grad_norm": 0.4261787235736847, |
| "learning_rate": 0.00011727851379051865, |
| "loss": 2.2926, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.597, |
| "grad_norm": 0.4221886992454529, |
| "learning_rate": 0.00011678945283033488, |
| "loss": 2.2446, |
| "step": 2985 |
| }, |
| { |
| "epoch": 0.598, |
| "grad_norm": 0.41435757279396057, |
| "learning_rate": 0.000116300762823815, |
| "loss": 2.2215, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.599, |
| "grad_norm": 0.4205993115901947, |
| "learning_rate": 0.00011581244922950652, |
| "loss": 2.2375, |
| "step": 2995 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.41477659344673157, |
| "learning_rate": 0.00011532451750175245, |
| "loss": 2.2337, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.601, |
| "grad_norm": 0.4197056293487549, |
| "learning_rate": 0.00011483697309063048, |
| "loss": 2.241, |
| "step": 3005 |
| }, |
| { |
| "epoch": 0.602, |
| "grad_norm": 0.4107452929019928, |
| "learning_rate": 0.00011434982144189203, |
| "loss": 2.2036, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.603, |
| "grad_norm": 0.4387679994106293, |
| "learning_rate": 0.00011386306799690153, |
| "loss": 2.1471, |
| "step": 3015 |
| }, |
| { |
| "epoch": 0.604, |
| "grad_norm": 0.4124211072921753, |
| "learning_rate": 0.0001133767181925756, |
| "loss": 2.1362, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.605, |
| "grad_norm": 0.4435650408267975, |
| "learning_rate": 0.00011289077746132207, |
| "loss": 2.2456, |
| "step": 3025 |
| }, |
| { |
| "epoch": 0.606, |
| "grad_norm": 0.4163254201412201, |
| "learning_rate": 0.00011240525123097982, |
| "loss": 2.1252, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.607, |
| "grad_norm": 0.44170472025871277, |
| "learning_rate": 0.00011192014492475771, |
| "loss": 2.2534, |
| "step": 3035 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.41352444887161255, |
| "learning_rate": 0.00011143546396117416, |
| "loss": 2.2033, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.609, |
| "grad_norm": 0.44527488946914673, |
| "learning_rate": 0.00011095121375399656, |
| "loss": 2.1998, |
| "step": 3045 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.42093437910079956, |
| "learning_rate": 0.00011046739971218091, |
| "loss": 2.261, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.611, |
| "grad_norm": 0.44440940022468567, |
| "learning_rate": 0.00010998402723981153, |
| "loss": 2.182, |
| "step": 3055 |
| }, |
| { |
| "epoch": 0.612, |
| "grad_norm": 0.4243025779724121, |
| "learning_rate": 0.00010950110173604009, |
| "loss": 2.1064, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.613, |
| "grad_norm": 0.4191250801086426, |
| "learning_rate": 0.00010901862859502616, |
| "loss": 2.2331, |
| "step": 3065 |
| }, |
| { |
| "epoch": 0.614, |
| "grad_norm": 0.392995148897171, |
| "learning_rate": 0.00010853661320587631, |
| "loss": 2.2349, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.615, |
| "grad_norm": 0.41072773933410645, |
| "learning_rate": 0.00010805506095258419, |
| "loss": 2.2145, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 0.42511963844299316, |
| "learning_rate": 0.00010757397721397026, |
| "loss": 2.3133, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.617, |
| "grad_norm": 0.4170040488243103, |
| "learning_rate": 0.00010709336736362188, |
| "loss": 2.2486, |
| "step": 3085 |
| }, |
| { |
| "epoch": 0.618, |
| "grad_norm": 0.44579964876174927, |
| "learning_rate": 0.0001066132367698332, |
| "loss": 2.2715, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.619, |
| "grad_norm": 0.4112447500228882, |
| "learning_rate": 0.00010613359079554517, |
| "loss": 2.205, |
| "step": 3095 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.4118829667568207, |
| "learning_rate": 0.00010565443479828558, |
| "loss": 2.2176, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.621, |
| "grad_norm": 0.4276077449321747, |
| "learning_rate": 0.00010517577413010938, |
| "loss": 2.2166, |
| "step": 3105 |
| }, |
| { |
| "epoch": 0.622, |
| "grad_norm": 0.41999852657318115, |
| "learning_rate": 0.00010469761413753881, |
| "loss": 2.1811, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.623, |
| "grad_norm": 0.43243396282196045, |
| "learning_rate": 0.0001042199601615037, |
| "loss": 2.2746, |
| "step": 3115 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.3946484327316284, |
| "learning_rate": 0.00010374281753728167, |
| "loss": 2.1711, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 0.40924206376075745, |
| "learning_rate": 0.00010326619159443889, |
| "loss": 2.2793, |
| "step": 3125 |
| }, |
| { |
| "epoch": 0.626, |
| "grad_norm": 0.44488149881362915, |
| "learning_rate": 0.00010279008765677017, |
| "loss": 2.1942, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.627, |
| "grad_norm": 0.43026819825172424, |
| "learning_rate": 0.00010231451104223966, |
| "loss": 2.184, |
| "step": 3135 |
| }, |
| { |
| "epoch": 0.628, |
| "grad_norm": 0.4194297194480896, |
| "learning_rate": 0.00010183946706292149, |
| "loss": 2.2779, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.629, |
| "grad_norm": 0.40315303206443787, |
| "learning_rate": 0.00010136496102494028, |
| "loss": 2.2575, |
| "step": 3145 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.42293116450309753, |
| "learning_rate": 0.00010089099822841223, |
| "loss": 2.1476, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.631, |
| "grad_norm": 0.4373874366283417, |
| "learning_rate": 0.00010041758396738528, |
| "loss": 2.1989, |
| "step": 3155 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 0.4246419072151184, |
| "learning_rate": 9.994472352978073e-05, |
| "loss": 2.303, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.633, |
| "grad_norm": 0.41784340143203735, |
| "learning_rate": 9.94724221973336e-05, |
| "loss": 2.1698, |
| "step": 3165 |
| }, |
| { |
| "epoch": 0.634, |
| "grad_norm": 0.4105053246021271, |
| "learning_rate": 9.900068524553393e-05, |
| "loss": 2.1971, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.635, |
| "grad_norm": 0.4249609708786011, |
| "learning_rate": 9.852951794356768e-05, |
| "loss": 2.1984, |
| "step": 3175 |
| }, |
| { |
| "epoch": 0.636, |
| "grad_norm": 0.44941267371177673, |
| "learning_rate": 9.805892555425801e-05, |
| "loss": 2.1712, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.637, |
| "grad_norm": 0.43053391575813293, |
| "learning_rate": 9.758891333400652e-05, |
| "loss": 2.2137, |
| "step": 3185 |
| }, |
| { |
| "epoch": 0.638, |
| "grad_norm": 0.43545395135879517, |
| "learning_rate": 9.711948653273438e-05, |
| "loss": 2.2478, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.639, |
| "grad_norm": 0.420151025056839, |
| "learning_rate": 9.665065039382376e-05, |
| "loss": 2.2099, |
| "step": 3195 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.42247268557548523, |
| "learning_rate": 9.61824101540593e-05, |
| "loss": 2.1647, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.641, |
| "grad_norm": 0.4133879542350769, |
| "learning_rate": 9.571477104356962e-05, |
| "loss": 2.2214, |
| "step": 3205 |
| }, |
| { |
| "epoch": 0.642, |
| "grad_norm": 0.41226497292518616, |
| "learning_rate": 9.524773828576888e-05, |
| "loss": 2.2337, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.643, |
| "grad_norm": 0.4366952180862427, |
| "learning_rate": 9.47813170972983e-05, |
| "loss": 2.213, |
| "step": 3215 |
| }, |
| { |
| "epoch": 0.644, |
| "grad_norm": 0.4010978937149048, |
| "learning_rate": 9.43155126879682e-05, |
| "loss": 2.2454, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.645, |
| "grad_norm": 0.4122413992881775, |
| "learning_rate": 9.385033026069957e-05, |
| "loss": 2.2107, |
| "step": 3225 |
| }, |
| { |
| "epoch": 0.646, |
| "grad_norm": 0.4530417323112488, |
| "learning_rate": 9.338577501146599e-05, |
| "loss": 2.2506, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.647, |
| "grad_norm": 0.44792503118515015, |
| "learning_rate": 9.292185212923554e-05, |
| "loss": 2.267, |
| "step": 3235 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 0.4253360331058502, |
| "learning_rate": 9.245856679591302e-05, |
| "loss": 2.1882, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.649, |
| "grad_norm": 0.42117589712142944, |
| "learning_rate": 9.199592418628206e-05, |
| "loss": 2.2687, |
| "step": 3245 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.43629190325737, |
| "learning_rate": 9.153392946794694e-05, |
| "loss": 2.2089, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.651, |
| "grad_norm": 0.419784814119339, |
| "learning_rate": 9.107258780127536e-05, |
| "loss": 2.157, |
| "step": 3255 |
| }, |
| { |
| "epoch": 0.652, |
| "grad_norm": 0.40995609760284424, |
| "learning_rate": 9.061190433934059e-05, |
| "loss": 2.233, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.653, |
| "grad_norm": 0.43481624126434326, |
| "learning_rate": 9.015188422786391e-05, |
| "loss": 2.1447, |
| "step": 3265 |
| }, |
| { |
| "epoch": 0.654, |
| "grad_norm": 0.40843117237091064, |
| "learning_rate": 8.9692532605157e-05, |
| "loss": 2.1295, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.655, |
| "grad_norm": 0.42146334052085876, |
| "learning_rate": 8.92338546020648e-05, |
| "loss": 2.1455, |
| "step": 3275 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.3924829661846161, |
| "learning_rate": 8.877585534190814e-05, |
| "loss": 2.1831, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.657, |
| "grad_norm": 0.40246203541755676, |
| "learning_rate": 8.831853994042638e-05, |
| "loss": 2.2533, |
| "step": 3285 |
| }, |
| { |
| "epoch": 0.658, |
| "grad_norm": 0.4149978458881378, |
| "learning_rate": 8.786191350572032e-05, |
| "loss": 2.1951, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.659, |
| "grad_norm": 0.4230225384235382, |
| "learning_rate": 8.740598113819519e-05, |
| "loss": 2.2113, |
| "step": 3295 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.41584622859954834, |
| "learning_rate": 8.695074793050369e-05, |
| "loss": 2.2119, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.661, |
| "grad_norm": 0.4212537407875061, |
| "learning_rate": 8.649621896748913e-05, |
| "loss": 2.235, |
| "step": 3305 |
| }, |
| { |
| "epoch": 0.662, |
| "grad_norm": 0.39631447196006775, |
| "learning_rate": 8.604239932612844e-05, |
| "loss": 2.2289, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.663, |
| "grad_norm": 0.43470436334609985, |
| "learning_rate": 8.558929407547562e-05, |
| "loss": 2.1882, |
| "step": 3315 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 0.4370731711387634, |
| "learning_rate": 8.513690827660527e-05, |
| "loss": 2.1841, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.665, |
| "grad_norm": 0.41377973556518555, |
| "learning_rate": 8.468524698255575e-05, |
| "loss": 2.1727, |
| "step": 3325 |
| }, |
| { |
| "epoch": 0.666, |
| "grad_norm": 0.3933526575565338, |
| "learning_rate": 8.42343152382728e-05, |
| "loss": 2.1645, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.667, |
| "grad_norm": 0.4057426154613495, |
| "learning_rate": 8.378411808055346e-05, |
| "loss": 2.1732, |
| "step": 3335 |
| }, |
| { |
| "epoch": 0.668, |
| "grad_norm": 0.45039045810699463, |
| "learning_rate": 8.333466053798954e-05, |
| "loss": 2.2185, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.669, |
| "grad_norm": 0.4107955992221832, |
| "learning_rate": 8.28859476309115e-05, |
| "loss": 2.1829, |
| "step": 3345 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.43991073966026306, |
| "learning_rate": 8.243798437133237e-05, |
| "loss": 2.1252, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.671, |
| "grad_norm": 0.4022817611694336, |
| "learning_rate": 8.199077576289188e-05, |
| "loss": 2.1649, |
| "step": 3355 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.4041346311569214, |
| "learning_rate": 8.154432680080062e-05, |
| "loss": 2.1329, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.673, |
| "grad_norm": 0.4271688759326935, |
| "learning_rate": 8.109864247178372e-05, |
| "loss": 2.2025, |
| "step": 3365 |
| }, |
| { |
| "epoch": 0.674, |
| "grad_norm": 0.4121275842189789, |
| "learning_rate": 8.065372775402587e-05, |
| "loss": 2.2487, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.675, |
| "grad_norm": 0.42577147483825684, |
| "learning_rate": 8.020958761711541e-05, |
| "loss": 2.2157, |
| "step": 3375 |
| }, |
| { |
| "epoch": 0.676, |
| "grad_norm": 0.4191531240940094, |
| "learning_rate": 7.976622702198865e-05, |
| "loss": 2.1838, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.677, |
| "grad_norm": 0.4344123601913452, |
| "learning_rate": 7.932365092087456e-05, |
| "loss": 2.2468, |
| "step": 3385 |
| }, |
| { |
| "epoch": 0.678, |
| "grad_norm": 0.4339258670806885, |
| "learning_rate": 7.888186425723967e-05, |
| "loss": 2.2697, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.679, |
| "grad_norm": 0.4105294942855835, |
| "learning_rate": 7.844087196573273e-05, |
| "loss": 2.1848, |
| "step": 3395 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.4143899381160736, |
| "learning_rate": 7.800067897212935e-05, |
| "loss": 2.1466, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.681, |
| "grad_norm": 0.4071793854236603, |
| "learning_rate": 7.756129019327723e-05, |
| "loss": 2.1962, |
| "step": 3405 |
| }, |
| { |
| "epoch": 0.682, |
| "grad_norm": 0.410900354385376, |
| "learning_rate": 7.712271053704131e-05, |
| "loss": 2.1805, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.683, |
| "grad_norm": 0.39401447772979736, |
| "learning_rate": 7.668494490224872e-05, |
| "loss": 2.1064, |
| "step": 3415 |
| }, |
| { |
| "epoch": 0.684, |
| "grad_norm": 0.4203227162361145, |
| "learning_rate": 7.624799817863411e-05, |
| "loss": 2.2015, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.685, |
| "grad_norm": 0.4399988353252411, |
| "learning_rate": 7.581187524678525e-05, |
| "loss": 2.1792, |
| "step": 3425 |
| }, |
| { |
| "epoch": 0.686, |
| "grad_norm": 0.41790443658828735, |
| "learning_rate": 7.53765809780884e-05, |
| "loss": 2.2491, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.687, |
| "grad_norm": 0.40690094232559204, |
| "learning_rate": 7.494212023467359e-05, |
| "loss": 2.2041, |
| "step": 3435 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 0.411528617143631, |
| "learning_rate": 7.450849786936075e-05, |
| "loss": 2.1019, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.689, |
| "grad_norm": 0.443026602268219, |
| "learning_rate": 7.407571872560541e-05, |
| "loss": 2.1692, |
| "step": 3445 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.4203071892261505, |
| "learning_rate": 7.364378763744429e-05, |
| "loss": 2.1624, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.691, |
| "grad_norm": 0.40647628903388977, |
| "learning_rate": 7.321270942944182e-05, |
| "loss": 2.2838, |
| "step": 3455 |
| }, |
| { |
| "epoch": 0.692, |
| "grad_norm": 0.4123462438583374, |
| "learning_rate": 7.278248891663567e-05, |
| "loss": 2.08, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.693, |
| "grad_norm": 0.3882538676261902, |
| "learning_rate": 7.235313090448357e-05, |
| "loss": 2.1054, |
| "step": 3465 |
| }, |
| { |
| "epoch": 0.694, |
| "grad_norm": 0.44436848163604736, |
| "learning_rate": 7.192464018880904e-05, |
| "loss": 2.1851, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.695, |
| "grad_norm": 0.45201411843299866, |
| "learning_rate": 7.149702155574834e-05, |
| "loss": 2.2164, |
| "step": 3475 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 0.4433143734931946, |
| "learning_rate": 7.10702797816967e-05, |
| "loss": 2.1391, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.697, |
| "grad_norm": 0.4341137409210205, |
| "learning_rate": 7.064441963325487e-05, |
| "loss": 2.1427, |
| "step": 3485 |
| }, |
| { |
| "epoch": 0.698, |
| "grad_norm": 0.42594924569129944, |
| "learning_rate": 7.021944586717639e-05, |
| "loss": 2.2405, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.699, |
| "grad_norm": 0.4368554651737213, |
| "learning_rate": 6.979536323031396e-05, |
| "loss": 2.2518, |
| "step": 3495 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.3928126096725464, |
| "learning_rate": 6.937217645956655e-05, |
| "loss": 2.1556, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.701, |
| "grad_norm": 0.3956170082092285, |
| "learning_rate": 6.894989028182651e-05, |
| "loss": 2.1703, |
| "step": 3505 |
| }, |
| { |
| "epoch": 0.702, |
| "grad_norm": 0.39387911558151245, |
| "learning_rate": 6.852850941392695e-05, |
| "loss": 2.1718, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.703, |
| "grad_norm": 0.4357449412345886, |
| "learning_rate": 6.810803856258872e-05, |
| "loss": 2.223, |
| "step": 3515 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.42475512623786926, |
| "learning_rate": 6.768848242436801e-05, |
| "loss": 2.2292, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.705, |
| "grad_norm": 0.44198477268218994, |
| "learning_rate": 6.7269845685604e-05, |
| "loss": 2.1521, |
| "step": 3525 |
| }, |
| { |
| "epoch": 0.706, |
| "grad_norm": 0.4247047007083893, |
| "learning_rate": 6.685213302236643e-05, |
| "loss": 2.1793, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.707, |
| "grad_norm": 0.4207407236099243, |
| "learning_rate": 6.643534910040319e-05, |
| "loss": 2.2442, |
| "step": 3535 |
| }, |
| { |
| "epoch": 0.708, |
| "grad_norm": 0.4190295934677124, |
| "learning_rate": 6.601949857508839e-05, |
| "loss": 2.234, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.709, |
| "grad_norm": 0.44241568446159363, |
| "learning_rate": 6.560458609137043e-05, |
| "loss": 2.1567, |
| "step": 3545 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.5026627779006958, |
| "learning_rate": 6.519061628372005e-05, |
| "loss": 2.1802, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.711, |
| "grad_norm": 0.4083501100540161, |
| "learning_rate": 6.477759377607821e-05, |
| "loss": 2.2652, |
| "step": 3555 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 0.40966013073921204, |
| "learning_rate": 6.436552318180513e-05, |
| "loss": 2.1943, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.713, |
| "grad_norm": 0.4204246997833252, |
| "learning_rate": 6.395440910362828e-05, |
| "loss": 2.2324, |
| "step": 3565 |
| }, |
| { |
| "epoch": 0.714, |
| "grad_norm": 0.4089759290218353, |
| "learning_rate": 6.354425613359101e-05, |
| "loss": 2.2376, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.715, |
| "grad_norm": 0.40603703260421753, |
| "learning_rate": 6.31350688530013e-05, |
| "loss": 2.2388, |
| "step": 3575 |
| }, |
| { |
| "epoch": 0.716, |
| "grad_norm": 0.4146242141723633, |
| "learning_rate": 6.272685183238074e-05, |
| "loss": 2.1973, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.717, |
| "grad_norm": 0.4212912619113922, |
| "learning_rate": 6.231960963141334e-05, |
| "loss": 2.1458, |
| "step": 3585 |
| }, |
| { |
| "epoch": 0.718, |
| "grad_norm": 0.41953805088996887, |
| "learning_rate": 6.191334679889455e-05, |
| "loss": 2.176, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.719, |
| "grad_norm": 0.40272924304008484, |
| "learning_rate": 6.150806787268044e-05, |
| "loss": 2.1482, |
| "step": 3595 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.4300851821899414, |
| "learning_rate": 6.110377737963731e-05, |
| "loss": 2.204, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.721, |
| "grad_norm": 0.4585989713668823, |
| "learning_rate": 6.0700479835590733e-05, |
| "loss": 2.1059, |
| "step": 3605 |
| }, |
| { |
| "epoch": 0.722, |
| "grad_norm": 0.3827567994594574, |
| "learning_rate": 6.029817974527527e-05, |
| "loss": 2.11, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.723, |
| "grad_norm": 0.4101276695728302, |
| "learning_rate": 5.9896881602284275e-05, |
| "loss": 2.2383, |
| "step": 3615 |
| }, |
| { |
| "epoch": 0.724, |
| "grad_norm": 0.42217323184013367, |
| "learning_rate": 5.949658988901959e-05, |
| "loss": 2.2011, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.725, |
| "grad_norm": 0.419291615486145, |
| "learning_rate": 5.9097309076641424e-05, |
| "loss": 2.1178, |
| "step": 3625 |
| }, |
| { |
| "epoch": 0.726, |
| "grad_norm": 0.4263475835323334, |
| "learning_rate": 5.8699043625018415e-05, |
| "loss": 2.1247, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.727, |
| "grad_norm": 0.4190998077392578, |
| "learning_rate": 5.830179798267806e-05, |
| "loss": 2.2097, |
| "step": 3635 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 0.41809847950935364, |
| "learning_rate": 5.79055765867566e-05, |
| "loss": 2.2182, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.729, |
| "grad_norm": 0.43095046281814575, |
| "learning_rate": 5.7510383862949934e-05, |
| "loss": 2.1382, |
| "step": 3645 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.4270274341106415, |
| "learning_rate": 5.711622422546374e-05, |
| "loss": 2.1623, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.731, |
| "grad_norm": 0.4485456645488739, |
| "learning_rate": 5.672310207696454e-05, |
| "loss": 2.2079, |
| "step": 3655 |
| }, |
| { |
| "epoch": 0.732, |
| "grad_norm": 0.4084364175796509, |
| "learning_rate": 5.6331021808530195e-05, |
| "loss": 2.1675, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.733, |
| "grad_norm": 0.4245437681674957, |
| "learning_rate": 5.5939987799601204e-05, |
| "loss": 2.2246, |
| "step": 3665 |
| }, |
| { |
| "epoch": 0.734, |
| "grad_norm": 0.42533889412879944, |
| "learning_rate": 5.5550004417931477e-05, |
| "loss": 2.1298, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.735, |
| "grad_norm": 0.40783774852752686, |
| "learning_rate": 5.516107601953962e-05, |
| "loss": 2.1108, |
| "step": 3675 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.43854960799217224, |
| "learning_rate": 5.4773206948660505e-05, |
| "loss": 2.1013, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.737, |
| "grad_norm": 0.4022478461265564, |
| "learning_rate": 5.4386401537696536e-05, |
| "loss": 2.1055, |
| "step": 3685 |
| }, |
| { |
| "epoch": 0.738, |
| "grad_norm": 0.4073084592819214, |
| "learning_rate": 5.40006641071692e-05, |
| "loss": 2.1329, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.739, |
| "grad_norm": 0.4003308117389679, |
| "learning_rate": 5.36159989656709e-05, |
| "loss": 2.2016, |
| "step": 3695 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.43242812156677246, |
| "learning_rate": 5.3232410409817006e-05, |
| "loss": 2.1525, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.741, |
| "grad_norm": 0.44237396121025085, |
| "learning_rate": 5.2849902724197526e-05, |
| "loss": 2.1776, |
| "step": 3705 |
| }, |
| { |
| "epoch": 0.742, |
| "grad_norm": 0.424785315990448, |
| "learning_rate": 5.2468480181329384e-05, |
| "loss": 2.1294, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.743, |
| "grad_norm": 0.4005042612552643, |
| "learning_rate": 5.208814704160888e-05, |
| "loss": 2.1107, |
| "step": 3715 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 0.41858258843421936, |
| "learning_rate": 5.1708907553263925e-05, |
| "loss": 2.1498, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.745, |
| "grad_norm": 0.4518043100833893, |
| "learning_rate": 5.13307659523065e-05, |
| "loss": 2.1934, |
| "step": 3725 |
| }, |
| { |
| "epoch": 0.746, |
| "grad_norm": 0.42560485005378723, |
| "learning_rate": 5.095372646248547e-05, |
| "loss": 2.2194, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.747, |
| "grad_norm": 0.4324474632740021, |
| "learning_rate": 5.057779329523947e-05, |
| "loss": 2.1427, |
| "step": 3735 |
| }, |
| { |
| "epoch": 0.748, |
| "grad_norm": 0.4265004098415375, |
| "learning_rate": 5.020297064964985e-05, |
| "loss": 2.1932, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.749, |
| "grad_norm": 0.4324226975440979, |
| "learning_rate": 4.982926271239342e-05, |
| "loss": 2.0714, |
| "step": 3745 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.4050389230251312, |
| "learning_rate": 4.945667365769621e-05, |
| "loss": 2.1401, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.751, |
| "grad_norm": 0.4235606789588928, |
| "learning_rate": 4.908520764728665e-05, |
| "loss": 2.2192, |
| "step": 3755 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 0.446185439825058, |
| "learning_rate": 4.8714868830348854e-05, |
| "loss": 2.1989, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.753, |
| "grad_norm": 0.39757952094078064, |
| "learning_rate": 4.83456613434765e-05, |
| "loss": 2.1534, |
| "step": 3765 |
| }, |
| { |
| "epoch": 0.754, |
| "grad_norm": 0.42775917053222656, |
| "learning_rate": 4.7977589310626704e-05, |
| "loss": 2.17, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.755, |
| "grad_norm": 0.4254078269004822, |
| "learning_rate": 4.761065684307383e-05, |
| "loss": 2.1444, |
| "step": 3775 |
| }, |
| { |
| "epoch": 0.756, |
| "grad_norm": 0.416073739528656, |
| "learning_rate": 4.724486803936337e-05, |
| "loss": 2.1706, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.757, |
| "grad_norm": 0.4071180820465088, |
| "learning_rate": 4.688022698526658e-05, |
| "loss": 2.1173, |
| "step": 3785 |
| }, |
| { |
| "epoch": 0.758, |
| "grad_norm": 0.41832828521728516, |
| "learning_rate": 4.651673775373463e-05, |
| "loss": 2.2064, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.759, |
| "grad_norm": 0.41309747099876404, |
| "learning_rate": 4.615440440485299e-05, |
| "loss": 2.1729, |
| "step": 3795 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.43315672874450684, |
| "learning_rate": 4.579323098579626e-05, |
| "loss": 2.2041, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.761, |
| "grad_norm": 0.43665811419487, |
| "learning_rate": 4.5433221530782945e-05, |
| "loss": 2.1855, |
| "step": 3805 |
| }, |
| { |
| "epoch": 0.762, |
| "grad_norm": 0.4120488166809082, |
| "learning_rate": 4.5074380061030366e-05, |
| "loss": 2.2696, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.763, |
| "grad_norm": 0.41258561611175537, |
| "learning_rate": 4.471671058470968e-05, |
| "loss": 2.2001, |
| "step": 3815 |
| }, |
| { |
| "epoch": 0.764, |
| "grad_norm": 0.4156684875488281, |
| "learning_rate": 4.436021709690113e-05, |
| "loss": 2.1246, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.765, |
| "grad_norm": 0.4078438878059387, |
| "learning_rate": 4.400490357954959e-05, |
| "loss": 2.1279, |
| "step": 3825 |
| }, |
| { |
| "epoch": 0.766, |
| "grad_norm": 0.41062772274017334, |
| "learning_rate": 4.365077400141981e-05, |
| "loss": 2.2157, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.767, |
| "grad_norm": 0.4282727539539337, |
| "learning_rate": 4.329783231805235e-05, |
| "loss": 2.3185, |
| "step": 3835 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.4196500778198242, |
| "learning_rate": 4.294608247171917e-05, |
| "loss": 2.0916, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.769, |
| "grad_norm": 0.4065400958061218, |
| "learning_rate": 4.259552839137982e-05, |
| "loss": 2.1515, |
| "step": 3845 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.4283042550086975, |
| "learning_rate": 4.2246173992637276e-05, |
| "loss": 2.1246, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.771, |
| "grad_norm": 0.4189644753932953, |
| "learning_rate": 4.1898023177694563e-05, |
| "loss": 2.1942, |
| "step": 3855 |
| }, |
| { |
| "epoch": 0.772, |
| "grad_norm": 0.4234643876552582, |
| "learning_rate": 4.1551079835310816e-05, |
| "loss": 2.0987, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.773, |
| "grad_norm": 0.4286406934261322, |
| "learning_rate": 4.120534784075802e-05, |
| "loss": 2.109, |
| "step": 3865 |
| }, |
| { |
| "epoch": 0.774, |
| "grad_norm": 0.42778655886650085, |
| "learning_rate": 4.086083105577779e-05, |
| "loss": 2.1574, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.775, |
| "grad_norm": 0.40660133957862854, |
| "learning_rate": 4.051753332853803e-05, |
| "loss": 2.1345, |
| "step": 3875 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 0.41279226541519165, |
| "learning_rate": 4.0175458493590195e-05, |
| "loss": 2.0764, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.777, |
| "grad_norm": 0.40562158823013306, |
| "learning_rate": 3.98346103718262e-05, |
| "loss": 2.1556, |
| "step": 3885 |
| }, |
| { |
| "epoch": 0.778, |
| "grad_norm": 0.42849069833755493, |
| "learning_rate": 3.9494992770436014e-05, |
| "loss": 2.1131, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.779, |
| "grad_norm": 0.4102763831615448, |
| "learning_rate": 3.915660948286486e-05, |
| "loss": 2.1785, |
| "step": 3895 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.44005340337753296, |
| "learning_rate": 3.8819464288771026e-05, |
| "loss": 2.159, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.781, |
| "grad_norm": 0.42726826667785645, |
| "learning_rate": 3.848356095398364e-05, |
| "loss": 2.1305, |
| "step": 3905 |
| }, |
| { |
| "epoch": 0.782, |
| "grad_norm": 0.4183489680290222, |
| "learning_rate": 3.81489032304606e-05, |
| "loss": 2.1276, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.783, |
| "grad_norm": 0.4101635217666626, |
| "learning_rate": 3.781549485624651e-05, |
| "loss": 2.1621, |
| "step": 3915 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 0.4038468897342682, |
| "learning_rate": 3.7483339555431055e-05, |
| "loss": 2.1201, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.785, |
| "grad_norm": 0.4497060477733612, |
| "learning_rate": 3.715244103810755e-05, |
| "loss": 2.1344, |
| "step": 3925 |
| }, |
| { |
| "epoch": 0.786, |
| "grad_norm": 0.42213934659957886, |
| "learning_rate": 3.682280300033128e-05, |
| "loss": 2.1957, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.787, |
| "grad_norm": 0.4279301166534424, |
| "learning_rate": 3.649442912407811e-05, |
| "loss": 2.1426, |
| "step": 3935 |
| }, |
| { |
| "epoch": 0.788, |
| "grad_norm": 0.42436257004737854, |
| "learning_rate": 3.616732307720373e-05, |
| "loss": 2.1294, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.789, |
| "grad_norm": 0.4053291380405426, |
| "learning_rate": 3.584148851340252e-05, |
| "loss": 2.1124, |
| "step": 3945 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.38851654529571533, |
| "learning_rate": 3.551692907216652e-05, |
| "loss": 2.1285, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.791, |
| "grad_norm": 0.4097929894924164, |
| "learning_rate": 3.519364837874506e-05, |
| "loss": 2.1942, |
| "step": 3955 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 0.42528805136680603, |
| "learning_rate": 3.48716500441042e-05, |
| "loss": 2.2122, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.793, |
| "grad_norm": 0.4303857386112213, |
| "learning_rate": 3.455093766488641e-05, |
| "loss": 2.1138, |
| "step": 3965 |
| }, |
| { |
| "epoch": 0.794, |
| "grad_norm": 0.40125489234924316, |
| "learning_rate": 3.423151482337013e-05, |
| "loss": 2.0879, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.795, |
| "grad_norm": 0.41210323572158813, |
| "learning_rate": 3.3913385087430204e-05, |
| "loss": 2.0709, |
| "step": 3975 |
| }, |
| { |
| "epoch": 0.796, |
| "grad_norm": 0.41345149278640747, |
| "learning_rate": 3.359655201049778e-05, |
| "loss": 2.1647, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.797, |
| "grad_norm": 0.41159018874168396, |
| "learning_rate": 3.3281019131520534e-05, |
| "loss": 2.216, |
| "step": 3985 |
| }, |
| { |
| "epoch": 0.798, |
| "grad_norm": 0.432202011346817, |
| "learning_rate": 3.2966789974923295e-05, |
| "loss": 2.122, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.799, |
| "grad_norm": 0.42457619309425354, |
| "learning_rate": 3.2653868050568695e-05, |
| "loss": 2.1532, |
| "step": 3995 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.4103912115097046, |
| "learning_rate": 3.2342256853717857e-05, |
| "loss": 2.0655, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.801, |
| "grad_norm": 0.4154665470123291, |
| "learning_rate": 3.203195986499138e-05, |
| "loss": 2.1186, |
| "step": 4005 |
| }, |
| { |
| "epoch": 0.802, |
| "grad_norm": 0.40893417596817017, |
| "learning_rate": 3.1722980550330465e-05, |
| "loss": 2.1407, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.803, |
| "grad_norm": 0.4399654269218445, |
| "learning_rate": 3.141532236095833e-05, |
| "loss": 2.0957, |
| "step": 4015 |
| }, |
| { |
| "epoch": 0.804, |
| "grad_norm": 0.4194296598434448, |
| "learning_rate": 3.110898873334137e-05, |
| "loss": 2.0873, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.805, |
| "grad_norm": 0.4045575261116028, |
| "learning_rate": 3.080398308915116e-05, |
| "loss": 2.2489, |
| "step": 4025 |
| }, |
| { |
| "epoch": 0.806, |
| "grad_norm": 0.42948484420776367, |
| "learning_rate": 3.0500308835225794e-05, |
| "loss": 2.1339, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.807, |
| "grad_norm": 0.40769994258880615, |
| "learning_rate": 3.0197969363532294e-05, |
| "loss": 2.123, |
| "step": 4035 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 0.42292699217796326, |
| "learning_rate": 2.98969680511283e-05, |
| "loss": 2.1557, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.809, |
| "grad_norm": 0.4049204885959625, |
| "learning_rate": 2.9597308260124676e-05, |
| "loss": 2.2234, |
| "step": 4045 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.4200775623321533, |
| "learning_rate": 2.9298993337647776e-05, |
| "loss": 2.145, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.811, |
| "grad_norm": 0.4226503074169159, |
| "learning_rate": 2.900202661580201e-05, |
| "loss": 2.0963, |
| "step": 4055 |
| }, |
| { |
| "epoch": 0.812, |
| "grad_norm": 0.4314316213130951, |
| "learning_rate": 2.8706411411632886e-05, |
| "loss": 2.2098, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.813, |
| "grad_norm": 0.44541093707084656, |
| "learning_rate": 2.8412151027089663e-05, |
| "loss": 2.0709, |
| "step": 4065 |
| }, |
| { |
| "epoch": 0.814, |
| "grad_norm": 0.4003683030605316, |
| "learning_rate": 2.8119248748988682e-05, |
| "loss": 2.1063, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.815, |
| "grad_norm": 0.4231080412864685, |
| "learning_rate": 2.782770784897646e-05, |
| "loss": 2.1509, |
| "step": 4075 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 0.41555121541023254, |
| "learning_rate": 2.7537531583493415e-05, |
| "loss": 2.1913, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.817, |
| "grad_norm": 0.42107847332954407, |
| "learning_rate": 2.7248723193737193e-05, |
| "loss": 2.1571, |
| "step": 4085 |
| }, |
| { |
| "epoch": 0.818, |
| "grad_norm": 0.43069687485694885, |
| "learning_rate": 2.6961285905626605e-05, |
| "loss": 2.1125, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.819, |
| "grad_norm": 0.4340486526489258, |
| "learning_rate": 2.667522292976569e-05, |
| "loss": 2.1622, |
| "step": 4095 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.41151127219200134, |
| "learning_rate": 2.639053746140772e-05, |
| "loss": 2.1475, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.821, |
| "grad_norm": 0.4273358881473541, |
| "learning_rate": 2.6107232680419514e-05, |
| "loss": 2.1178, |
| "step": 4105 |
| }, |
| { |
| "epoch": 0.822, |
| "grad_norm": 0.42515695095062256, |
| "learning_rate": 2.582531175124592e-05, |
| "loss": 2.0993, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.823, |
| "grad_norm": 0.40709659457206726, |
| "learning_rate": 2.5544777822874585e-05, |
| "loss": 2.1192, |
| "step": 4115 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 0.402413010597229, |
| "learning_rate": 2.526563402880074e-05, |
| "loss": 2.1685, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.825, |
| "grad_norm": 0.42556092143058777, |
| "learning_rate": 2.498788348699194e-05, |
| "loss": 2.1758, |
| "step": 4125 |
| }, |
| { |
| "epoch": 0.826, |
| "grad_norm": 0.41573649644851685, |
| "learning_rate": 2.471152929985366e-05, |
| "loss": 2.117, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.827, |
| "grad_norm": 0.393812894821167, |
| "learning_rate": 2.44365745541944e-05, |
| "loss": 2.133, |
| "step": 4135 |
| }, |
| { |
| "epoch": 0.828, |
| "grad_norm": 0.39863842725753784, |
| "learning_rate": 2.4163022321191183e-05, |
| "loss": 2.1545, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.829, |
| "grad_norm": 0.40823498368263245, |
| "learning_rate": 2.389087565635529e-05, |
| "loss": 2.098, |
| "step": 4145 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.39885497093200684, |
| "learning_rate": 2.362013759949824e-05, |
| "loss": 2.0917, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.831, |
| "grad_norm": 0.4393814504146576, |
| "learning_rate": 2.335081117469777e-05, |
| "loss": 2.208, |
| "step": 4155 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.3837684094905853, |
| "learning_rate": 2.3082899390263833e-05, |
| "loss": 2.1412, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.833, |
| "grad_norm": 0.4429077208042145, |
| "learning_rate": 2.28164052387054e-05, |
| "loss": 2.1168, |
| "step": 4165 |
| }, |
| { |
| "epoch": 0.834, |
| "grad_norm": 0.4012123644351959, |
| "learning_rate": 2.2551331696696797e-05, |
| "loss": 2.1188, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.835, |
| "grad_norm": 0.4135606288909912, |
| "learning_rate": 2.228768172504442e-05, |
| "loss": 2.2259, |
| "step": 4175 |
| }, |
| { |
| "epoch": 0.836, |
| "grad_norm": 0.441450834274292, |
| "learning_rate": 2.2025458268653727e-05, |
| "loss": 2.2108, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.837, |
| "grad_norm": 0.4205508828163147, |
| "learning_rate": 2.1764664256496433e-05, |
| "loss": 2.1244, |
| "step": 4185 |
| }, |
| { |
| "epoch": 0.838, |
| "grad_norm": 0.41583573818206787, |
| "learning_rate": 2.150530260157769e-05, |
| "loss": 2.1185, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.839, |
| "grad_norm": 0.425163209438324, |
| "learning_rate": 2.1247376200903534e-05, |
| "loss": 2.1598, |
| "step": 4195 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.4113369286060333, |
| "learning_rate": 2.0990887935448537e-05, |
| "loss": 2.0728, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.841, |
| "grad_norm": 0.41494420170783997, |
| "learning_rate": 2.073584067012376e-05, |
| "loss": 2.1892, |
| "step": 4205 |
| }, |
| { |
| "epoch": 0.842, |
| "grad_norm": 0.4220603108406067, |
| "learning_rate": 2.0482237253744538e-05, |
| "loss": 2.0895, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.843, |
| "grad_norm": 0.43092748522758484, |
| "learning_rate": 2.023008051899884e-05, |
| "loss": 2.1202, |
| "step": 4215 |
| }, |
| { |
| "epoch": 0.844, |
| "grad_norm": 0.39946693181991577, |
| "learning_rate": 1.9979373282415482e-05, |
| "loss": 2.1241, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.845, |
| "grad_norm": 0.4280877113342285, |
| "learning_rate": 1.9730118344332874e-05, |
| "loss": 2.2293, |
| "step": 4225 |
| }, |
| { |
| "epoch": 0.846, |
| "grad_norm": 0.407251238822937, |
| "learning_rate": 1.9482318488867458e-05, |
| "loss": 2.1459, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.847, |
| "grad_norm": 0.43046170473098755, |
| "learning_rate": 1.92359764838828e-05, |
| "loss": 2.1587, |
| "step": 4235 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 0.4038347601890564, |
| "learning_rate": 1.899109508095872e-05, |
| "loss": 2.021, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.849, |
| "grad_norm": 0.42142099142074585, |
| "learning_rate": 1.874767701536035e-05, |
| "loss": 2.1318, |
| "step": 4245 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.40510687232017517, |
| "learning_rate": 1.850572500600782e-05, |
| "loss": 2.2302, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.851, |
| "grad_norm": 0.4129900336265564, |
| "learning_rate": 1.8265241755445665e-05, |
| "loss": 2.2122, |
| "step": 4255 |
| }, |
| { |
| "epoch": 0.852, |
| "grad_norm": 0.4296528995037079, |
| "learning_rate": 1.8026229949812826e-05, |
| "loss": 2.235, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.853, |
| "grad_norm": 0.43158674240112305, |
| "learning_rate": 1.77886922588125e-05, |
| "loss": 2.1896, |
| "step": 4265 |
| }, |
| { |
| "epoch": 0.854, |
| "grad_norm": 0.3998924195766449, |
| "learning_rate": 1.755263133568246e-05, |
| "loss": 2.09, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.855, |
| "grad_norm": 0.41046082973480225, |
| "learning_rate": 1.7318049817165285e-05, |
| "loss": 2.2035, |
| "step": 4275 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 0.39339983463287354, |
| "learning_rate": 1.7084950323478935e-05, |
| "loss": 2.1128, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.857, |
| "grad_norm": 0.41789743304252625, |
| "learning_rate": 1.685333545828761e-05, |
| "loss": 2.1941, |
| "step": 4285 |
| }, |
| { |
| "epoch": 0.858, |
| "grad_norm": 0.7934504747390747, |
| "learning_rate": 1.6623207808672556e-05, |
| "loss": 2.1591, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.859, |
| "grad_norm": 0.42379069328308105, |
| "learning_rate": 1.6394569945103116e-05, |
| "loss": 2.1738, |
| "step": 4295 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.4412902593612671, |
| "learning_rate": 1.6167424421408117e-05, |
| "loss": 2.0838, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.861, |
| "grad_norm": 0.42037296295166016, |
| "learning_rate": 1.594177377474736e-05, |
| "loss": 2.1533, |
| "step": 4305 |
| }, |
| { |
| "epoch": 0.862, |
| "grad_norm": 0.40737658739089966, |
| "learning_rate": 1.5717620525583264e-05, |
| "loss": 2.2523, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.863, |
| "grad_norm": 0.4244195818901062, |
| "learning_rate": 1.5494967177652485e-05, |
| "loss": 2.132, |
| "step": 4315 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.3916720747947693, |
| "learning_rate": 1.527381621793839e-05, |
| "loss": 2.1012, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.865, |
| "grad_norm": 0.4201657772064209, |
| "learning_rate": 1.5054170116642956e-05, |
| "loss": 2.1597, |
| "step": 4325 |
| }, |
| { |
| "epoch": 0.866, |
| "grad_norm": 0.40828606486320496, |
| "learning_rate": 1.4836031327159203e-05, |
| "loss": 2.1062, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.867, |
| "grad_norm": 0.43217313289642334, |
| "learning_rate": 1.4619402286043875e-05, |
| "loss": 2.0693, |
| "step": 4335 |
| }, |
| { |
| "epoch": 0.868, |
| "grad_norm": 0.4222164750099182, |
| "learning_rate": 1.4404285412990247e-05, |
| "loss": 2.1159, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.869, |
| "grad_norm": 0.4298483729362488, |
| "learning_rate": 1.419068311080106e-05, |
| "loss": 2.2043, |
| "step": 4345 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.4016087055206299, |
| "learning_rate": 1.3978597765361504e-05, |
| "loss": 2.1043, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.871, |
| "grad_norm": 0.42064622044563293, |
| "learning_rate": 1.3768031745612923e-05, |
| "loss": 2.2281, |
| "step": 4355 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 0.4412085711956024, |
| "learning_rate": 1.3558987403526118e-05, |
| "loss": 2.1408, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.873, |
| "grad_norm": 0.42196419835090637, |
| "learning_rate": 1.3351467074075068e-05, |
| "loss": 2.1301, |
| "step": 4365 |
| }, |
| { |
| "epoch": 0.874, |
| "grad_norm": 0.40128079056739807, |
| "learning_rate": 1.3145473075210921e-05, |
| "loss": 2.1288, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 0.40371379256248474, |
| "learning_rate": 1.2941007707836148e-05, |
| "loss": 2.1145, |
| "step": 4375 |
| }, |
| { |
| "epoch": 0.876, |
| "grad_norm": 0.4210174083709717, |
| "learning_rate": 1.2738073255778741e-05, |
| "loss": 2.2467, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.877, |
| "grad_norm": 0.412621408700943, |
| "learning_rate": 1.2536671985766722e-05, |
| "loss": 2.12, |
| "step": 4385 |
| }, |
| { |
| "epoch": 0.878, |
| "grad_norm": 0.40745627880096436, |
| "learning_rate": 1.2336806147402828e-05, |
| "loss": 2.1071, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.879, |
| "grad_norm": 0.39555612206459045, |
| "learning_rate": 1.2138477973139515e-05, |
| "loss": 2.1897, |
| "step": 4395 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.45732036232948303, |
| "learning_rate": 1.1941689678253746e-05, |
| "loss": 2.1497, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.881, |
| "grad_norm": 0.44343504309654236, |
| "learning_rate": 1.1746443460822574e-05, |
| "loss": 2.1451, |
| "step": 4405 |
| }, |
| { |
| "epoch": 0.882, |
| "grad_norm": 0.4288816452026367, |
| "learning_rate": 1.1552741501698281e-05, |
| "loss": 2.1104, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.883, |
| "grad_norm": 0.41571128368377686, |
| "learning_rate": 1.136058596448428e-05, |
| "loss": 2.1129, |
| "step": 4415 |
| }, |
| { |
| "epoch": 0.884, |
| "grad_norm": 0.4085163474082947, |
| "learning_rate": 1.1169978995510753e-05, |
| "loss": 2.1633, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.885, |
| "grad_norm": 0.41918131709098816, |
| "learning_rate": 1.0980922723810765e-05, |
| "loss": 2.0841, |
| "step": 4425 |
| }, |
| { |
| "epoch": 0.886, |
| "grad_norm": 0.42264747619628906, |
| "learning_rate": 1.079341926109652e-05, |
| "loss": 2.083, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.887, |
| "grad_norm": 0.4294689893722534, |
| "learning_rate": 1.0607470701735632e-05, |
| "loss": 2.1732, |
| "step": 4435 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 0.41121116280555725, |
| "learning_rate": 1.0423079122727974e-05, |
| "loss": 2.1585, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.889, |
| "grad_norm": 0.4106404185295105, |
| "learning_rate": 1.0240246583682177e-05, |
| "loss": 2.1152, |
| "step": 4445 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.4227830171585083, |
| "learning_rate": 1.0058975126792873e-05, |
| "loss": 2.1444, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.891, |
| "grad_norm": 0.4478638470172882, |
| "learning_rate": 9.879266776817757e-06, |
| "loss": 2.1113, |
| "step": 4455 |
| }, |
| { |
| "epoch": 0.892, |
| "grad_norm": 0.42579948902130127, |
| "learning_rate": 9.701123541055023e-06, |
| "loss": 2.189, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.893, |
| "grad_norm": 0.4126085937023163, |
| "learning_rate": 9.524547409320881e-06, |
| "loss": 2.1873, |
| "step": 4465 |
| }, |
| { |
| "epoch": 0.894, |
| "grad_norm": 0.3965235650539398, |
| "learning_rate": 9.349540353927415e-06, |
| "loss": 2.1206, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.895, |
| "grad_norm": 0.44063737988471985, |
| "learning_rate": 9.176104329660472e-06, |
| "loss": 2.0752, |
| "step": 4475 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.41276708245277405, |
| "learning_rate": 9.004241273757957e-06, |
| "loss": 2.1658, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.897, |
| "grad_norm": 0.4311666190624237, |
| "learning_rate": 8.833953105887975e-06, |
| "loss": 2.1305, |
| "step": 4485 |
| }, |
| { |
| "epoch": 0.898, |
| "grad_norm": 0.4143565893173218, |
| "learning_rate": 8.665241728127592e-06, |
| "loss": 2.1187, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.899, |
| "grad_norm": 0.40346577763557434, |
| "learning_rate": 8.498109024941541e-06, |
| "loss": 2.2537, |
| "step": 4495 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.3936259150505066, |
| "learning_rate": 8.332556863161093e-06, |
| "loss": 2.162, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.901, |
| "grad_norm": 0.42583733797073364, |
| "learning_rate": 8.168587091963247e-06, |
| "loss": 2.1827, |
| "step": 4505 |
| }, |
| { |
| "epoch": 0.902, |
| "grad_norm": 0.4225499629974365, |
| "learning_rate": 8.006201542850132e-06, |
| "loss": 2.0864, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.903, |
| "grad_norm": 0.3910048305988312, |
| "learning_rate": 7.84540202962854e-06, |
| "loss": 2.1557, |
| "step": 4515 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 0.42542725801467896, |
| "learning_rate": 7.686190348389542e-06, |
| "loss": 2.0824, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.905, |
| "grad_norm": 0.4061259627342224, |
| "learning_rate": 7.528568277488539e-06, |
| "loss": 2.1533, |
| "step": 4525 |
| }, |
| { |
| "epoch": 0.906, |
| "grad_norm": 0.4078681766986847, |
| "learning_rate": 7.372537577525395e-06, |
| "loss": 2.2207, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.907, |
| "grad_norm": 0.4450269341468811, |
| "learning_rate": 7.218099991324827e-06, |
| "loss": 2.1339, |
| "step": 4535 |
| }, |
| { |
| "epoch": 0.908, |
| "grad_norm": 0.42582967877388, |
| "learning_rate": 7.0652572439166726e-06, |
| "loss": 2.191, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.909, |
| "grad_norm": 0.4486072063446045, |
| "learning_rate": 6.914011042516965e-06, |
| "loss": 2.2439, |
| "step": 4545 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.43058037757873535, |
| "learning_rate": 6.7643630765086885e-06, |
| "loss": 2.1713, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.911, |
| "grad_norm": 0.4128859341144562, |
| "learning_rate": 6.616315017422891e-06, |
| "loss": 2.1686, |
| "step": 4555 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 0.4264604151248932, |
| "learning_rate": 6.4698685189200625e-06, |
| "loss": 2.1281, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.913, |
| "grad_norm": 0.3913510739803314, |
| "learning_rate": 6.325025216771657e-06, |
| "loss": 2.1368, |
| "step": 4565 |
| }, |
| { |
| "epoch": 0.914, |
| "grad_norm": 0.40388935804367065, |
| "learning_rate": 6.181786728841847e-06, |
| "loss": 2.2191, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.915, |
| "grad_norm": 0.43146443367004395, |
| "learning_rate": 6.040154655069401e-06, |
| "loss": 2.131, |
| "step": 4575 |
| }, |
| { |
| "epoch": 0.916, |
| "grad_norm": 0.42876219749450684, |
| "learning_rate": 5.900130577449785e-06, |
| "loss": 2.1655, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.917, |
| "grad_norm": 0.4041782021522522, |
| "learning_rate": 5.761716060017652e-06, |
| "loss": 2.1558, |
| "step": 4585 |
| }, |
| { |
| "epoch": 0.918, |
| "grad_norm": 0.46385347843170166, |
| "learning_rate": 5.652144368961192e-06, |
| "loss": 2.2228, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.919, |
| "grad_norm": 0.4040263295173645, |
| "learning_rate": 5.516630943835609e-06, |
| "loss": 2.0635, |
| "step": 4595 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.4247514009475708, |
| "learning_rate": 5.382731362494269e-06, |
| "loss": 2.1747, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.921, |
| "grad_norm": 0.43409496545791626, |
| "learning_rate": 5.250447120562695e-06, |
| "loss": 2.136, |
| "step": 4605 |
| }, |
| { |
| "epoch": 0.922, |
| "grad_norm": 0.42186084389686584, |
| "learning_rate": 5.1197796956234106e-06, |
| "loss": 2.1409, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.923, |
| "grad_norm": 0.4255131483078003, |
| "learning_rate": 4.990730547199467e-06, |
| "loss": 2.1583, |
| "step": 4615 |
| }, |
| { |
| "epoch": 0.924, |
| "grad_norm": 0.41459619998931885, |
| "learning_rate": 4.863301116738211e-06, |
| "loss": 2.1897, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.925, |
| "grad_norm": 0.4171781539916992, |
| "learning_rate": 4.737492827595107e-06, |
| "loss": 2.161, |
| "step": 4625 |
| }, |
| { |
| "epoch": 0.926, |
| "grad_norm": 0.39910873770713806, |
| "learning_rate": 4.6133070850178924e-06, |
| "loss": 2.1911, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.927, |
| "grad_norm": 0.43180596828460693, |
| "learning_rate": 4.490745276130814e-06, |
| "loss": 2.1061, |
| "step": 4635 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.43445250391960144, |
| "learning_rate": 4.369808769919214e-06, |
| "loss": 2.1358, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.929, |
| "grad_norm": 0.3996482789516449, |
| "learning_rate": 4.2504989172142405e-06, |
| "loss": 2.1734, |
| "step": 4645 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.42857295274734497, |
| "learning_rate": 4.132817050677639e-06, |
| "loss": 2.1879, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.931, |
| "grad_norm": 0.43241751194000244, |
| "learning_rate": 4.016764484786972e-06, |
| "loss": 2.1718, |
| "step": 4655 |
| }, |
| { |
| "epoch": 0.932, |
| "grad_norm": 0.41567280888557434, |
| "learning_rate": 3.9023425158209385e-06, |
| "loss": 2.1295, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.933, |
| "grad_norm": 0.41955962777137756, |
| "learning_rate": 3.78955242184491e-06, |
| "loss": 2.1306, |
| "step": 4665 |
| }, |
| { |
| "epoch": 0.934, |
| "grad_norm": 0.4202505946159363, |
| "learning_rate": 3.6783954626964873e-06, |
| "loss": 2.1312, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.935, |
| "grad_norm": 0.43055739998817444, |
| "learning_rate": 3.5688728799716626e-06, |
| "loss": 2.1468, |
| "step": 4675 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 0.41296547651290894, |
| "learning_rate": 3.460985897010832e-06, |
| "loss": 2.1501, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.937, |
| "grad_norm": 0.4199470281600952, |
| "learning_rate": 3.354735718885121e-06, |
| "loss": 2.1343, |
| "step": 4685 |
| }, |
| { |
| "epoch": 0.938, |
| "grad_norm": 0.4009940028190613, |
| "learning_rate": 3.2501235323829146e-06, |
| "loss": 2.1858, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.939, |
| "grad_norm": 0.4397661089897156, |
| "learning_rate": 3.147150505996715e-06, |
| "loss": 2.219, |
| "step": 4695 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.4313606321811676, |
| "learning_rate": 3.0458177899099545e-06, |
| "loss": 2.1644, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.941, |
| "grad_norm": 0.40876680612564087, |
| "learning_rate": 2.9461265159841873e-06, |
| "loss": 2.2214, |
| "step": 4705 |
| }, |
| { |
| "epoch": 0.942, |
| "grad_norm": 0.44418561458587646, |
| "learning_rate": 2.848077797746501e-06, |
| "loss": 2.1538, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.943, |
| "grad_norm": 0.4055839478969574, |
| "learning_rate": 2.7516727303770248e-06, |
| "loss": 2.2004, |
| "step": 4715 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 0.4120195209980011, |
| "learning_rate": 2.656912390696708e-06, |
| "loss": 2.1759, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.945, |
| "grad_norm": 0.41687390208244324, |
| "learning_rate": 2.5637978371552794e-06, |
| "loss": 2.1172, |
| "step": 4725 |
| }, |
| { |
| "epoch": 0.946, |
| "grad_norm": 0.42830410599708557, |
| "learning_rate": 2.4723301098194704e-06, |
| "loss": 2.2135, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.947, |
| "grad_norm": 0.4193302392959595, |
| "learning_rate": 2.3825102303613617e-06, |
| "loss": 2.141, |
| "step": 4735 |
| }, |
| { |
| "epoch": 0.948, |
| "grad_norm": 0.44555971026420593, |
| "learning_rate": 2.2943392020469742e-06, |
| "loss": 2.179, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.949, |
| "grad_norm": 0.3976489007472992, |
| "learning_rate": 2.2078180097250266e-06, |
| "loss": 2.1691, |
| "step": 4745 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.4367688000202179, |
| "learning_rate": 2.1229476198160456e-06, |
| "loss": 2.1502, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.951, |
| "grad_norm": 0.40627768635749817, |
| "learning_rate": 2.0397289803014584e-06, |
| "loss": 2.1399, |
| "step": 4755 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 0.3884209096431732, |
| "learning_rate": 1.9581630207130326e-06, |
| "loss": 1.9814, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.953, |
| "grad_norm": 0.4176620841026306, |
| "learning_rate": 1.878250652122537e-06, |
| "loss": 2.1467, |
| "step": 4765 |
| }, |
| { |
| "epoch": 0.954, |
| "grad_norm": 0.41188085079193115, |
| "learning_rate": 1.7999927671315307e-06, |
| "loss": 2.1645, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.955, |
| "grad_norm": 0.41481921076774597, |
| "learning_rate": 1.7233902398613719e-06, |
| "loss": 2.1747, |
| "step": 4775 |
| }, |
| { |
| "epoch": 0.956, |
| "grad_norm": 0.40912356972694397, |
| "learning_rate": 1.6484439259435267e-06, |
| "loss": 2.1477, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.957, |
| "grad_norm": 0.44400301575660706, |
| "learning_rate": 1.575154662509892e-06, |
| "loss": 2.1439, |
| "step": 4785 |
| }, |
| { |
| "epoch": 0.958, |
| "grad_norm": 0.391618013381958, |
| "learning_rate": 1.5035232681835874e-06, |
| "loss": 2.0929, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.959, |
| "grad_norm": 0.4395817220211029, |
| "learning_rate": 1.4335505430696947e-06, |
| "loss": 2.2465, |
| "step": 4795 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.4161112606525421, |
| "learning_rate": 1.3652372687464164e-06, |
| "loss": 2.1842, |
| "step": 4800 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 5000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.93792398032896e+17, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|