| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 175, | |
| "global_step": 1398, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001430615164520744, | |
| "grad_norm": 7.826082229614258, | |
| "learning_rate": 0.0, | |
| "loss": 0.898, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.001430615164520744, | |
| "eval_loss": 1.065092921257019, | |
| "eval_runtime": 65.026, | |
| "eval_samples_per_second": 6.382, | |
| "eval_steps_per_second": 0.4, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.002861230329041488, | |
| "grad_norm": 10.205927848815918, | |
| "learning_rate": 3.7500000000000005e-08, | |
| "loss": 0.9261, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.004291845493562232, | |
| "grad_norm": 3.9774727821350098, | |
| "learning_rate": 7.500000000000001e-08, | |
| "loss": 0.9309, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.005722460658082976, | |
| "grad_norm": 5.370663642883301, | |
| "learning_rate": 1.125e-07, | |
| "loss": 0.846, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.00715307582260372, | |
| "grad_norm": 6.776569843292236, | |
| "learning_rate": 1.5000000000000002e-07, | |
| "loss": 0.8782, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.008583690987124463, | |
| "grad_norm": 8.526254653930664, | |
| "learning_rate": 1.875e-07, | |
| "loss": 0.9247, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.010014306151645207, | |
| "grad_norm": 2.043957471847534, | |
| "learning_rate": 2.25e-07, | |
| "loss": 0.9349, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.011444921316165951, | |
| "grad_norm": 2.4873178005218506, | |
| "learning_rate": 2.625e-07, | |
| "loss": 0.8981, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.012875536480686695, | |
| "grad_norm": 4.598736763000488, | |
| "learning_rate": 3.0000000000000004e-07, | |
| "loss": 0.8809, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.01430615164520744, | |
| "grad_norm": 6.595153331756592, | |
| "learning_rate": 3.375e-07, | |
| "loss": 0.9229, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.015736766809728183, | |
| "grad_norm": 5.382663249969482, | |
| "learning_rate": 3.75e-07, | |
| "loss": 0.9396, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.017167381974248927, | |
| "grad_norm": 10.02416706085205, | |
| "learning_rate": 4.125e-07, | |
| "loss": 0.8546, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.01859799713876967, | |
| "grad_norm": 4.947641849517822, | |
| "learning_rate": 4.5e-07, | |
| "loss": 0.9159, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.020028612303290415, | |
| "grad_norm": 3.2930426597595215, | |
| "learning_rate": 4.875e-07, | |
| "loss": 0.9403, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.02145922746781116, | |
| "grad_norm": 24.454675674438477, | |
| "learning_rate": 5.25e-07, | |
| "loss": 0.8754, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.022889842632331903, | |
| "grad_norm": 7.453534126281738, | |
| "learning_rate": 5.625e-07, | |
| "loss": 0.9897, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.024320457796852647, | |
| "grad_norm": 8.125139236450195, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 0.9593, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.02575107296137339, | |
| "grad_norm": 8.038130760192871, | |
| "learning_rate": 6.375e-07, | |
| "loss": 0.9863, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.027181688125894134, | |
| "grad_norm": 10.386178016662598, | |
| "learning_rate": 6.75e-07, | |
| "loss": 0.9412, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.02861230329041488, | |
| "grad_norm": 9.146885871887207, | |
| "learning_rate": 7.125e-07, | |
| "loss": 0.8988, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.030042918454935622, | |
| "grad_norm": 6.290739059448242, | |
| "learning_rate": 7.5e-07, | |
| "loss": 0.8968, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.031473533619456366, | |
| "grad_norm": 2.8495869636535645, | |
| "learning_rate": 7.875000000000001e-07, | |
| "loss": 0.9229, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.032904148783977114, | |
| "grad_norm": 4.456954002380371, | |
| "learning_rate": 8.25e-07, | |
| "loss": 0.8605, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.034334763948497854, | |
| "grad_norm": 12.40089225769043, | |
| "learning_rate": 8.625e-07, | |
| "loss": 0.8897, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0357653791130186, | |
| "grad_norm": 3.42988920211792, | |
| "learning_rate": 9e-07, | |
| "loss": 0.8653, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03719599427753934, | |
| "grad_norm": 2.2468039989471436, | |
| "learning_rate": 9.375e-07, | |
| "loss": 0.9229, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.03862660944206009, | |
| "grad_norm": 4.040201663970947, | |
| "learning_rate": 9.75e-07, | |
| "loss": 0.8753, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.04005722460658083, | |
| "grad_norm": 4.08870792388916, | |
| "learning_rate": 1.0125e-06, | |
| "loss": 0.9356, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.04148783977110158, | |
| "grad_norm": 5.570353984832764, | |
| "learning_rate": 1.05e-06, | |
| "loss": 0.8388, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.04291845493562232, | |
| "grad_norm": 4.162603378295898, | |
| "learning_rate": 1.0875e-06, | |
| "loss": 0.865, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.044349070100143065, | |
| "grad_norm": 9.821990013122559, | |
| "learning_rate": 1.125e-06, | |
| "loss": 0.9317, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.045779685264663805, | |
| "grad_norm": 17.85947036743164, | |
| "learning_rate": 1.1625e-06, | |
| "loss": 0.9546, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04721030042918455, | |
| "grad_norm": 4.307530879974365, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 0.7619, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.04864091559370529, | |
| "grad_norm": 3.723987579345703, | |
| "learning_rate": 1.2375e-06, | |
| "loss": 0.8835, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.05007153075822604, | |
| "grad_norm": 6.962404251098633, | |
| "learning_rate": 1.275e-06, | |
| "loss": 0.7924, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.05150214592274678, | |
| "grad_norm": 4.578495025634766, | |
| "learning_rate": 1.3125000000000001e-06, | |
| "loss": 0.8527, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.05293276108726753, | |
| "grad_norm": 9.046110153198242, | |
| "learning_rate": 1.35e-06, | |
| "loss": 0.9319, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.05436337625178827, | |
| "grad_norm": 2.2053868770599365, | |
| "learning_rate": 1.3875e-06, | |
| "loss": 0.9608, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.055793991416309016, | |
| "grad_norm": 2.3856260776519775, | |
| "learning_rate": 1.425e-06, | |
| "loss": 0.8641, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.05722460658082976, | |
| "grad_norm": 1.8333237171173096, | |
| "learning_rate": 1.4625e-06, | |
| "loss": 0.9357, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.058655221745350504, | |
| "grad_norm": 2.9304890632629395, | |
| "learning_rate": 1.5e-06, | |
| "loss": 0.8986, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.060085836909871244, | |
| "grad_norm": 3.4019198417663574, | |
| "learning_rate": 1.5374999999999999e-06, | |
| "loss": 0.9427, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.06151645207439199, | |
| "grad_norm": 7.195025444030762, | |
| "learning_rate": 1.5750000000000002e-06, | |
| "loss": 0.851, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.06294706723891273, | |
| "grad_norm": 7.58285665512085, | |
| "learning_rate": 1.6125e-06, | |
| "loss": 0.9239, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.06437768240343347, | |
| "grad_norm": 7.752026081085205, | |
| "learning_rate": 1.65e-06, | |
| "loss": 0.863, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.06580829756795423, | |
| "grad_norm": 13.529495239257812, | |
| "learning_rate": 1.6875e-06, | |
| "loss": 0.8844, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.06723891273247497, | |
| "grad_norm": 4.444079399108887, | |
| "learning_rate": 1.725e-06, | |
| "loss": 0.9185, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.06866952789699571, | |
| "grad_norm": 8.650182723999023, | |
| "learning_rate": 1.7625e-06, | |
| "loss": 0.8735, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.07010014306151645, | |
| "grad_norm": 3.767944097518921, | |
| "learning_rate": 1.8e-06, | |
| "loss": 0.7559, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.0715307582260372, | |
| "grad_norm": 7.349740982055664, | |
| "learning_rate": 1.8375000000000002e-06, | |
| "loss": 0.8848, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07296137339055794, | |
| "grad_norm": 6.42757511138916, | |
| "learning_rate": 1.875e-06, | |
| "loss": 0.8778, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.07439198855507868, | |
| "grad_norm": 4.057242393493652, | |
| "learning_rate": 1.9125e-06, | |
| "loss": 0.8433, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.07582260371959942, | |
| "grad_norm": 2.327789306640625, | |
| "learning_rate": 1.95e-06, | |
| "loss": 0.8083, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.07725321888412018, | |
| "grad_norm": 8.588128089904785, | |
| "learning_rate": 1.9875e-06, | |
| "loss": 0.8587, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.07868383404864092, | |
| "grad_norm": 9.92045783996582, | |
| "learning_rate": 2.025e-06, | |
| "loss": 0.8613, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.08011444921316166, | |
| "grad_norm": 5.001506805419922, | |
| "learning_rate": 2.0625e-06, | |
| "loss": 0.8549, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.0815450643776824, | |
| "grad_norm": 4.943772315979004, | |
| "learning_rate": 2.1e-06, | |
| "loss": 0.9253, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.08297567954220315, | |
| "grad_norm": 2.5432281494140625, | |
| "learning_rate": 2.1375000000000003e-06, | |
| "loss": 0.8148, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.0844062947067239, | |
| "grad_norm": 3.7364847660064697, | |
| "learning_rate": 2.175e-06, | |
| "loss": 0.8329, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.08583690987124463, | |
| "grad_norm": 2.2858948707580566, | |
| "learning_rate": 2.2125e-06, | |
| "loss": 0.8131, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.08726752503576538, | |
| "grad_norm": 3.740797281265259, | |
| "learning_rate": 2.25e-06, | |
| "loss": 0.8974, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.08869814020028613, | |
| "grad_norm": 7.974575042724609, | |
| "learning_rate": 2.2875e-06, | |
| "loss": 0.8773, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.09012875536480687, | |
| "grad_norm": 3.5054333209991455, | |
| "learning_rate": 2.325e-06, | |
| "loss": 0.8011, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.09155937052932761, | |
| "grad_norm": 1.7374111413955688, | |
| "learning_rate": 2.3625e-06, | |
| "loss": 0.8732, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.09298998569384835, | |
| "grad_norm": 1.5484044551849365, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 0.816, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.0944206008583691, | |
| "grad_norm": 7.499728679656982, | |
| "learning_rate": 2.4375e-06, | |
| "loss": 0.8951, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.09585121602288985, | |
| "grad_norm": 2.1170144081115723, | |
| "learning_rate": 2.475e-06, | |
| "loss": 0.8616, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.09728183118741059, | |
| "grad_norm": 4.520656108856201, | |
| "learning_rate": 2.5125e-06, | |
| "loss": 0.823, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.09871244635193133, | |
| "grad_norm": 2.0560104846954346, | |
| "learning_rate": 2.55e-06, | |
| "loss": 0.8335, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.10014306151645208, | |
| "grad_norm": 1.0364820957183838, | |
| "learning_rate": 2.5875000000000002e-06, | |
| "loss": 0.8691, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.10157367668097282, | |
| "grad_norm": 13.255958557128906, | |
| "learning_rate": 2.6250000000000003e-06, | |
| "loss": 0.9005, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.10300429184549356, | |
| "grad_norm": 1.2062978744506836, | |
| "learning_rate": 2.6625e-06, | |
| "loss": 0.9268, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1044349070100143, | |
| "grad_norm": 5.754052639007568, | |
| "learning_rate": 2.7e-06, | |
| "loss": 0.8912, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.10586552217453506, | |
| "grad_norm": 6.267002105712891, | |
| "learning_rate": 2.7375e-06, | |
| "loss": 0.8835, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.1072961373390558, | |
| "grad_norm": 5.339660167694092, | |
| "learning_rate": 2.775e-06, | |
| "loss": 0.8765, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.10872675250357654, | |
| "grad_norm": 3.0998125076293945, | |
| "learning_rate": 2.8125e-06, | |
| "loss": 0.7866, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.11015736766809728, | |
| "grad_norm": 5.969987392425537, | |
| "learning_rate": 2.85e-06, | |
| "loss": 0.8956, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.11158798283261803, | |
| "grad_norm": 3.4417006969451904, | |
| "learning_rate": 2.8875000000000003e-06, | |
| "loss": 0.7929, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.11301859799713877, | |
| "grad_norm": 6.582152366638184, | |
| "learning_rate": 2.925e-06, | |
| "loss": 0.9373, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.11444921316165951, | |
| "grad_norm": 1.3354519605636597, | |
| "learning_rate": 2.9625e-06, | |
| "loss": 0.8581, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.11587982832618025, | |
| "grad_norm": 13.804448127746582, | |
| "learning_rate": 3e-06, | |
| "loss": 0.8789, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.11731044349070101, | |
| "grad_norm": 3.086815357208252, | |
| "learning_rate": 2.999995738818993e-06, | |
| "loss": 0.8516, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.11874105865522175, | |
| "grad_norm": 14.031466484069824, | |
| "learning_rate": 2.999982955300181e-06, | |
| "loss": 0.9504, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.12017167381974249, | |
| "grad_norm": 7.03550910949707, | |
| "learning_rate": 2.9999616495161956e-06, | |
| "loss": 0.841, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.12160228898426323, | |
| "grad_norm": 1.4175535440444946, | |
| "learning_rate": 2.9999318215880865e-06, | |
| "loss": 0.8488, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.12303290414878398, | |
| "grad_norm": 3.6041760444641113, | |
| "learning_rate": 2.9998934716853238e-06, | |
| "loss": 0.865, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.12446351931330472, | |
| "grad_norm": 2.260624408721924, | |
| "learning_rate": 2.9998466000257944e-06, | |
| "loss": 0.9309, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.12589413447782546, | |
| "grad_norm": 29.91061782836914, | |
| "learning_rate": 2.9997912068758043e-06, | |
| "loss": 0.8052, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.12732474964234622, | |
| "grad_norm": 15.271418571472168, | |
| "learning_rate": 2.9997272925500735e-06, | |
| "loss": 0.8355, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.12875536480686695, | |
| "grad_norm": 9.124563217163086, | |
| "learning_rate": 2.9996548574117354e-06, | |
| "loss": 0.847, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1301859799713877, | |
| "grad_norm": 2.8253238201141357, | |
| "learning_rate": 2.9995739018723365e-06, | |
| "loss": 0.85, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.13161659513590845, | |
| "grad_norm": 12.32058048248291, | |
| "learning_rate": 2.999484426391831e-06, | |
| "loss": 0.907, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.13304721030042918, | |
| "grad_norm": 1.6532840728759766, | |
| "learning_rate": 2.999386431478581e-06, | |
| "loss": 0.8617, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.13447782546494993, | |
| "grad_norm": 2.9194514751434326, | |
| "learning_rate": 2.9992799176893515e-06, | |
| "loss": 0.8747, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.13590844062947066, | |
| "grad_norm": 5.123015880584717, | |
| "learning_rate": 2.999164885629309e-06, | |
| "loss": 0.8485, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.13733905579399142, | |
| "grad_norm": 2.5870351791381836, | |
| "learning_rate": 2.9990413359520165e-06, | |
| "loss": 0.8487, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.13876967095851217, | |
| "grad_norm": 99.052490234375, | |
| "learning_rate": 2.998909269359431e-06, | |
| "loss": 0.8617, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.1402002861230329, | |
| "grad_norm": 2.046233892440796, | |
| "learning_rate": 2.998768686601898e-06, | |
| "loss": 0.8317, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.14163090128755365, | |
| "grad_norm": 2.7805709838867188, | |
| "learning_rate": 2.99861958847815e-06, | |
| "loss": 0.8411, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.1430615164520744, | |
| "grad_norm": 4.889650821685791, | |
| "learning_rate": 2.998461975835298e-06, | |
| "loss": 0.9486, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14449213161659513, | |
| "grad_norm": 4.531475067138672, | |
| "learning_rate": 2.9982958495688307e-06, | |
| "loss": 0.7729, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.1459227467811159, | |
| "grad_norm": 25.037559509277344, | |
| "learning_rate": 2.9981212106226067e-06, | |
| "loss": 0.8791, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.1473533619456366, | |
| "grad_norm": 5.148260116577148, | |
| "learning_rate": 2.9979380599888506e-06, | |
| "loss": 0.8771, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.14878397711015737, | |
| "grad_norm": 12.409090042114258, | |
| "learning_rate": 2.997746398708146e-06, | |
| "loss": 0.8488, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.15021459227467812, | |
| "grad_norm": 9.207916259765625, | |
| "learning_rate": 2.99754622786943e-06, | |
| "loss": 0.9463, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.15164520743919885, | |
| "grad_norm": 9.328911781311035, | |
| "learning_rate": 2.99733754860999e-06, | |
| "loss": 0.8123, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.1530758226037196, | |
| "grad_norm": 3.671304941177368, | |
| "learning_rate": 2.997120362115451e-06, | |
| "loss": 0.7729, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.15450643776824036, | |
| "grad_norm": 3.156205654144287, | |
| "learning_rate": 2.9968946696197754e-06, | |
| "loss": 0.8232, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.15593705293276108, | |
| "grad_norm": 5.058759689331055, | |
| "learning_rate": 2.9966604724052517e-06, | |
| "loss": 0.7645, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.15736766809728184, | |
| "grad_norm": 6.537101745605469, | |
| "learning_rate": 2.9964177718024888e-06, | |
| "loss": 0.8669, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.15879828326180256, | |
| "grad_norm": 13.55328369140625, | |
| "learning_rate": 2.9961665691904087e-06, | |
| "loss": 0.7843, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.16022889842632332, | |
| "grad_norm": 1.259379267692566, | |
| "learning_rate": 2.9959068659962367e-06, | |
| "loss": 0.7038, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.16165951359084407, | |
| "grad_norm": 2.055260181427002, | |
| "learning_rate": 2.995638663695497e-06, | |
| "loss": 0.7348, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.1630901287553648, | |
| "grad_norm": 3.9688467979431152, | |
| "learning_rate": 2.9953619638120004e-06, | |
| "loss": 0.8377, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.16452074391988555, | |
| "grad_norm": 1.2332788705825806, | |
| "learning_rate": 2.9950767679178377e-06, | |
| "loss": 0.8324, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.1659513590844063, | |
| "grad_norm": 4.329883098602295, | |
| "learning_rate": 2.994783077633372e-06, | |
| "loss": 0.8407, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.16738197424892703, | |
| "grad_norm": 30.238853454589844, | |
| "learning_rate": 2.994480894627225e-06, | |
| "loss": 0.7921, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.1688125894134478, | |
| "grad_norm": 3.2549190521240234, | |
| "learning_rate": 2.9941702206162733e-06, | |
| "loss": 0.9115, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.17024320457796852, | |
| "grad_norm": 2.118321180343628, | |
| "learning_rate": 2.9938510573656333e-06, | |
| "loss": 0.8938, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.17167381974248927, | |
| "grad_norm": 9.38047981262207, | |
| "learning_rate": 2.993523406688656e-06, | |
| "loss": 0.9441, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.17310443490701002, | |
| "grad_norm": 2.0981671810150146, | |
| "learning_rate": 2.9931872704469126e-06, | |
| "loss": 0.8982, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.17453505007153075, | |
| "grad_norm": 2.4683213233947754, | |
| "learning_rate": 2.992842650550186e-06, | |
| "loss": 0.9112, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.1759656652360515, | |
| "grad_norm": 6.200721740722656, | |
| "learning_rate": 2.9924895489564602e-06, | |
| "loss": 0.9541, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.17739628040057226, | |
| "grad_norm": 13.966652870178223, | |
| "learning_rate": 2.9921279676719085e-06, | |
| "loss": 0.8528, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.17882689556509299, | |
| "grad_norm": 4.730607032775879, | |
| "learning_rate": 2.9917579087508817e-06, | |
| "loss": 0.7931, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.18025751072961374, | |
| "grad_norm": 14.206419944763184, | |
| "learning_rate": 2.9913793742958968e-06, | |
| "loss": 0.9154, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.18168812589413447, | |
| "grad_norm": 6.954555988311768, | |
| "learning_rate": 2.9909923664576264e-06, | |
| "loss": 0.7906, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.18311874105865522, | |
| "grad_norm": 5.227564811706543, | |
| "learning_rate": 2.9905968874348833e-06, | |
| "loss": 0.8771, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.18454935622317598, | |
| "grad_norm": 9.619230270385742, | |
| "learning_rate": 2.9901929394746126e-06, | |
| "loss": 0.9761, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.1859799713876967, | |
| "grad_norm": 22.448091506958008, | |
| "learning_rate": 2.9897805248718737e-06, | |
| "loss": 0.83, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.18741058655221746, | |
| "grad_norm": 4.735238075256348, | |
| "learning_rate": 2.9893596459698313e-06, | |
| "loss": 0.841, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.1888412017167382, | |
| "grad_norm": 3.6265194416046143, | |
| "learning_rate": 2.9889303051597403e-06, | |
| "loss": 0.8511, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.19027181688125894, | |
| "grad_norm": 4.24748420715332, | |
| "learning_rate": 2.9884925048809327e-06, | |
| "loss": 0.8496, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.1917024320457797, | |
| "grad_norm": 5.5269856452941895, | |
| "learning_rate": 2.9880462476208033e-06, | |
| "loss": 0.8475, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.19313304721030042, | |
| "grad_norm": 3.4209656715393066, | |
| "learning_rate": 2.987591535914796e-06, | |
| "loss": 0.77, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.19456366237482117, | |
| "grad_norm": 1.303078055381775, | |
| "learning_rate": 2.9871283723463896e-06, | |
| "loss": 0.8877, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.19599427753934193, | |
| "grad_norm": 4.734808444976807, | |
| "learning_rate": 2.986656759547082e-06, | |
| "loss": 0.8509, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.19742489270386265, | |
| "grad_norm": 22.503400802612305, | |
| "learning_rate": 2.986176700196377e-06, | |
| "loss": 0.859, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.1988555078683834, | |
| "grad_norm": 11.41430950164795, | |
| "learning_rate": 2.9856881970217674e-06, | |
| "loss": 0.8071, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.20028612303290416, | |
| "grad_norm": 1.9186583757400513, | |
| "learning_rate": 2.985191252798721e-06, | |
| "loss": 0.7943, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2017167381974249, | |
| "grad_norm": 3.3689098358154297, | |
| "learning_rate": 2.9846858703506625e-06, | |
| "loss": 0.8457, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.20314735336194564, | |
| "grad_norm": 23.29608726501465, | |
| "learning_rate": 2.984172052548961e-06, | |
| "loss": 0.8721, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.20457796852646637, | |
| "grad_norm": 3.0760669708251953, | |
| "learning_rate": 2.98364980231291e-06, | |
| "loss": 0.9147, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.20600858369098712, | |
| "grad_norm": 6.489161491394043, | |
| "learning_rate": 2.9831191226097138e-06, | |
| "loss": 0.7935, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.20743919885550788, | |
| "grad_norm": 23.182281494140625, | |
| "learning_rate": 2.9825800164544683e-06, | |
| "loss": 0.8989, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.2088698140200286, | |
| "grad_norm": 3.310340404510498, | |
| "learning_rate": 2.9820324869101457e-06, | |
| "loss": 0.9176, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.21030042918454936, | |
| "grad_norm": 37.046268463134766, | |
| "learning_rate": 2.9814765370875757e-06, | |
| "loss": 0.8695, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.2117310443490701, | |
| "grad_norm": 7.289843559265137, | |
| "learning_rate": 2.980912170145429e-06, | |
| "loss": 0.7522, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.21316165951359084, | |
| "grad_norm": 2.9157369136810303, | |
| "learning_rate": 2.9803393892901983e-06, | |
| "loss": 0.8782, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.2145922746781116, | |
| "grad_norm": 5.29908561706543, | |
| "learning_rate": 2.9797581977761813e-06, | |
| "loss": 0.9556, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.21602288984263232, | |
| "grad_norm": 15.43282413482666, | |
| "learning_rate": 2.97916859890546e-06, | |
| "loss": 0.794, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.21745350500715308, | |
| "grad_norm": 1.031524419784546, | |
| "learning_rate": 2.9785705960278854e-06, | |
| "loss": 0.7869, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.21888412017167383, | |
| "grad_norm": 1.8489532470703125, | |
| "learning_rate": 2.9779641925410552e-06, | |
| "loss": 0.8462, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.22031473533619456, | |
| "grad_norm": 3.093093156814575, | |
| "learning_rate": 2.9773493918902956e-06, | |
| "loss": 0.8689, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.2217453505007153, | |
| "grad_norm": 12.075631141662598, | |
| "learning_rate": 2.9767261975686436e-06, | |
| "loss": 0.835, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.22317596566523606, | |
| "grad_norm": 5.174819469451904, | |
| "learning_rate": 2.976094613116823e-06, | |
| "loss": 0.7994, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.2246065808297568, | |
| "grad_norm": 14.805009841918945, | |
| "learning_rate": 2.975454642123228e-06, | |
| "loss": 0.7749, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.22603719599427755, | |
| "grad_norm": 6.730155944824219, | |
| "learning_rate": 2.9748062882239032e-06, | |
| "loss": 0.7781, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.22746781115879827, | |
| "grad_norm": 1.8753336668014526, | |
| "learning_rate": 2.9741495551025176e-06, | |
| "loss": 0.8107, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.22889842632331903, | |
| "grad_norm": 7.615732192993164, | |
| "learning_rate": 2.9734844464903513e-06, | |
| "loss": 0.8196, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.23032904148783978, | |
| "grad_norm": 11.586601257324219, | |
| "learning_rate": 2.9728109661662674e-06, | |
| "loss": 0.7974, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.2317596566523605, | |
| "grad_norm": 12.352217674255371, | |
| "learning_rate": 2.972129117956695e-06, | |
| "loss": 0.7608, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.23319027181688126, | |
| "grad_norm": 23.733856201171875, | |
| "learning_rate": 2.971438905735606e-06, | |
| "loss": 0.8376, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.23462088698140202, | |
| "grad_norm": 11.216538429260254, | |
| "learning_rate": 2.9707403334244917e-06, | |
| "loss": 0.8035, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.23605150214592274, | |
| "grad_norm": 14.699457168579102, | |
| "learning_rate": 2.9700334049923436e-06, | |
| "loss": 0.7992, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.2374821173104435, | |
| "grad_norm": 11.340972900390625, | |
| "learning_rate": 2.9693181244556285e-06, | |
| "loss": 0.8836, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.23891273247496422, | |
| "grad_norm": 25.200716018676758, | |
| "learning_rate": 2.968594495878266e-06, | |
| "loss": 0.9051, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.24034334763948498, | |
| "grad_norm": 9.116874694824219, | |
| "learning_rate": 2.967862523371605e-06, | |
| "loss": 0.8595, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.24177396280400573, | |
| "grad_norm": 8.399476051330566, | |
| "learning_rate": 2.9671222110944032e-06, | |
| "loss": 0.8618, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.24320457796852646, | |
| "grad_norm": 3.272933006286621, | |
| "learning_rate": 2.9663735632527995e-06, | |
| "loss": 0.7056, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2446351931330472, | |
| "grad_norm": 6.614375591278076, | |
| "learning_rate": 2.9656165841002934e-06, | |
| "loss": 0.7985, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.24606580829756797, | |
| "grad_norm": 32.3626594543457, | |
| "learning_rate": 2.964851277937717e-06, | |
| "loss": 0.7313, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.2474964234620887, | |
| "grad_norm": 8.703509330749512, | |
| "learning_rate": 2.9640776491132155e-06, | |
| "loss": 0.859, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.24892703862660945, | |
| "grad_norm": 4.464837551116943, | |
| "learning_rate": 2.9632957020222185e-06, | |
| "loss": 0.841, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2503576537911302, | |
| "grad_norm": 3.000603199005127, | |
| "learning_rate": 2.9625054411074166e-06, | |
| "loss": 0.804, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.2503576537911302, | |
| "eval_loss": 0.987718939781189, | |
| "eval_runtime": 63.9493, | |
| "eval_samples_per_second": 6.49, | |
| "eval_steps_per_second": 0.407, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.25178826895565093, | |
| "grad_norm": 7.612940788269043, | |
| "learning_rate": 2.9617068708587365e-06, | |
| "loss": 0.7769, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.2532188841201717, | |
| "grad_norm": 8.721627235412598, | |
| "learning_rate": 2.9608999958133147e-06, | |
| "loss": 0.8665, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.25464949928469244, | |
| "grad_norm": 16.083751678466797, | |
| "learning_rate": 2.9600848205554717e-06, | |
| "loss": 0.782, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.25608011444921314, | |
| "grad_norm": 6.257425308227539, | |
| "learning_rate": 2.959261349716687e-06, | |
| "loss": 0.9526, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.2575107296137339, | |
| "grad_norm": 1.2820326089859009, | |
| "learning_rate": 2.9584295879755717e-06, | |
| "loss": 0.7956, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.25894134477825465, | |
| "grad_norm": 6.126409530639648, | |
| "learning_rate": 2.957589540057842e-06, | |
| "loss": 0.7572, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.2603719599427754, | |
| "grad_norm": 6.914258003234863, | |
| "learning_rate": 2.9567412107362925e-06, | |
| "loss": 0.8475, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.26180257510729615, | |
| "grad_norm": 10.246359825134277, | |
| "learning_rate": 2.9558846048307703e-06, | |
| "loss": 0.865, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.2632331902718169, | |
| "grad_norm": 7.619375705718994, | |
| "learning_rate": 2.955019727208145e-06, | |
| "loss": 0.8139, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.2646638054363376, | |
| "grad_norm": 5.338575839996338, | |
| "learning_rate": 2.9541465827822845e-06, | |
| "loss": 0.8606, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.26609442060085836, | |
| "grad_norm": 7.702805042266846, | |
| "learning_rate": 2.9532651765140233e-06, | |
| "loss": 0.893, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.2675250357653791, | |
| "grad_norm": 19.783300399780273, | |
| "learning_rate": 2.952375513411137e-06, | |
| "loss": 0.9462, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.26895565092989987, | |
| "grad_norm": 7.961276054382324, | |
| "learning_rate": 2.951477598528313e-06, | |
| "loss": 0.8445, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2703862660944206, | |
| "grad_norm": 2.4168357849121094, | |
| "learning_rate": 2.9505714369671222e-06, | |
| "loss": 0.7095, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.2718168812589413, | |
| "grad_norm": 3.021878957748413, | |
| "learning_rate": 2.949657033875989e-06, | |
| "loss": 0.8208, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.2732474964234621, | |
| "grad_norm": 3.7104063034057617, | |
| "learning_rate": 2.948734394450162e-06, | |
| "loss": 0.8333, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.27467811158798283, | |
| "grad_norm": 8.396327018737793, | |
| "learning_rate": 2.947803523931687e-06, | |
| "loss": 0.8052, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.2761087267525036, | |
| "grad_norm": 4.769831657409668, | |
| "learning_rate": 2.9468644276093736e-06, | |
| "loss": 0.7715, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.27753934191702434, | |
| "grad_norm": 5.608636379241943, | |
| "learning_rate": 2.9459171108187688e-06, | |
| "loss": 0.8781, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.27896995708154504, | |
| "grad_norm": 2.4100029468536377, | |
| "learning_rate": 2.9449615789421225e-06, | |
| "loss": 0.8128, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.2804005722460658, | |
| "grad_norm": 12.692727088928223, | |
| "learning_rate": 2.943997837408361e-06, | |
| "loss": 0.8316, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.28183118741058655, | |
| "grad_norm": 2.0479393005371094, | |
| "learning_rate": 2.943025891693054e-06, | |
| "loss": 0.7717, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.2832618025751073, | |
| "grad_norm": 5.383994102478027, | |
| "learning_rate": 2.9420457473183827e-06, | |
| "loss": 0.8796, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.28469241773962806, | |
| "grad_norm": 45.458194732666016, | |
| "learning_rate": 2.941057409853112e-06, | |
| "loss": 0.9014, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.2861230329041488, | |
| "grad_norm": 5.420682907104492, | |
| "learning_rate": 2.9400608849125535e-06, | |
| "loss": 0.8651, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2875536480686695, | |
| "grad_norm": 3.0556061267852783, | |
| "learning_rate": 2.939056178158539e-06, | |
| "loss": 0.7834, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.28898426323319026, | |
| "grad_norm": 7.664134979248047, | |
| "learning_rate": 2.938043295299385e-06, | |
| "loss": 0.7516, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.290414878397711, | |
| "grad_norm": 10.764482498168945, | |
| "learning_rate": 2.937022242089861e-06, | |
| "loss": 0.8377, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.2918454935622318, | |
| "grad_norm": 17.695600509643555, | |
| "learning_rate": 2.9359930243311565e-06, | |
| "loss": 0.9017, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2932761087267525, | |
| "grad_norm": 6.34639835357666, | |
| "learning_rate": 2.9349556478708494e-06, | |
| "loss": 0.8308, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.2947067238912732, | |
| "grad_norm": 62.43265914916992, | |
| "learning_rate": 2.933910118602872e-06, | |
| "loss": 0.8773, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.296137339055794, | |
| "grad_norm": 5.159948348999023, | |
| "learning_rate": 2.932856442467476e-06, | |
| "loss": 0.6787, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.29756795422031473, | |
| "grad_norm": 3.1082255840301514, | |
| "learning_rate": 2.931794625451202e-06, | |
| "loss": 0.8965, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.2989985693848355, | |
| "grad_norm": 2.5208675861358643, | |
| "learning_rate": 2.930724673586842e-06, | |
| "loss": 0.8792, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.30042918454935624, | |
| "grad_norm": 12.822099685668945, | |
| "learning_rate": 2.929646592953408e-06, | |
| "loss": 0.8534, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.30185979971387694, | |
| "grad_norm": 21.156463623046875, | |
| "learning_rate": 2.928560389676095e-06, | |
| "loss": 0.7975, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.3032904148783977, | |
| "grad_norm": 1.6340172290802002, | |
| "learning_rate": 2.9274660699262483e-06, | |
| "loss": 0.7555, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.30472103004291845, | |
| "grad_norm": 4.860538005828857, | |
| "learning_rate": 2.926363639921327e-06, | |
| "loss": 0.8352, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.3061516452074392, | |
| "grad_norm": 1.8420562744140625, | |
| "learning_rate": 2.92525310592487e-06, | |
| "loss": 0.8709, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.30758226037195996, | |
| "grad_norm": 3.420260429382324, | |
| "learning_rate": 2.9241344742464586e-06, | |
| "loss": 0.8462, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.3090128755364807, | |
| "grad_norm": 4.125131130218506, | |
| "learning_rate": 2.923007751241683e-06, | |
| "loss": 0.8501, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.3104434907010014, | |
| "grad_norm": 4.323355674743652, | |
| "learning_rate": 2.9218729433121034e-06, | |
| "loss": 0.8146, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.31187410586552217, | |
| "grad_norm": 3.767756938934326, | |
| "learning_rate": 2.920730056905216e-06, | |
| "loss": 0.8045, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.3133047210300429, | |
| "grad_norm": 104.36705017089844, | |
| "learning_rate": 2.919579098514415e-06, | |
| "loss": 0.7723, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.3147353361945637, | |
| "grad_norm": 5.676706790924072, | |
| "learning_rate": 2.9184200746789575e-06, | |
| "loss": 0.8171, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.31616595135908443, | |
| "grad_norm": 17.918733596801758, | |
| "learning_rate": 2.9172529919839226e-06, | |
| "loss": 0.8766, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.31759656652360513, | |
| "grad_norm": 6.99448823928833, | |
| "learning_rate": 2.9160778570601787e-06, | |
| "loss": 0.7374, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.3190271816881259, | |
| "grad_norm": 2.60298490524292, | |
| "learning_rate": 2.9148946765843418e-06, | |
| "loss": 0.7419, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.32045779685264664, | |
| "grad_norm": 11.042309761047363, | |
| "learning_rate": 2.913703457278741e-06, | |
| "loss": 0.8656, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.3218884120171674, | |
| "grad_norm": 7.073480129241943, | |
| "learning_rate": 2.9125042059113773e-06, | |
| "loss": 0.7972, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.32331902718168815, | |
| "grad_norm": 3.2704951763153076, | |
| "learning_rate": 2.9112969292958874e-06, | |
| "loss": 0.8576, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.32474964234620884, | |
| "grad_norm": 5.423962116241455, | |
| "learning_rate": 2.9100816342915025e-06, | |
| "loss": 0.8155, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.3261802575107296, | |
| "grad_norm": 10.780782699584961, | |
| "learning_rate": 2.908858327803013e-06, | |
| "loss": 0.833, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.32761087267525035, | |
| "grad_norm": 5.944501876831055, | |
| "learning_rate": 2.907627016780725e-06, | |
| "loss": 0.8205, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.3290414878397711, | |
| "grad_norm": 14.172869682312012, | |
| "learning_rate": 2.906387708220425e-06, | |
| "loss": 0.8103, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.33047210300429186, | |
| "grad_norm": 5.723515033721924, | |
| "learning_rate": 2.905140409163337e-06, | |
| "loss": 0.8297, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.3319027181688126, | |
| "grad_norm": 10.871804237365723, | |
| "learning_rate": 2.903885126696083e-06, | |
| "loss": 0.8411, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 13.487065315246582, | |
| "learning_rate": 2.902621867950645e-06, | |
| "loss": 0.873, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.33476394849785407, | |
| "grad_norm": 6.601111888885498, | |
| "learning_rate": 2.9013506401043214e-06, | |
| "loss": 0.8536, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.3361945636623748, | |
| "grad_norm": 6.6699724197387695, | |
| "learning_rate": 2.900071450379688e-06, | |
| "loss": 0.827, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.3376251788268956, | |
| "grad_norm": 3.9626381397247314, | |
| "learning_rate": 2.8987843060445575e-06, | |
| "loss": 0.8954, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.33905579399141633, | |
| "grad_norm": 1.9564266204833984, | |
| "learning_rate": 2.8974892144119353e-06, | |
| "loss": 0.7551, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.34048640915593703, | |
| "grad_norm": 7.052184581756592, | |
| "learning_rate": 2.896186182839982e-06, | |
| "loss": 0.8094, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.3419170243204578, | |
| "grad_norm": 28.235042572021484, | |
| "learning_rate": 2.8948752187319696e-06, | |
| "loss": 0.7715, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.34334763948497854, | |
| "grad_norm": 7.05892276763916, | |
| "learning_rate": 2.8935563295362367e-06, | |
| "loss": 0.7823, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3447782546494993, | |
| "grad_norm": 4.274432182312012, | |
| "learning_rate": 2.8922295227461523e-06, | |
| "loss": 0.8163, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.34620886981402005, | |
| "grad_norm": 2.481339693069458, | |
| "learning_rate": 2.8908948059000676e-06, | |
| "loss": 0.812, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.34763948497854075, | |
| "grad_norm": 3.1200881004333496, | |
| "learning_rate": 2.8895521865812758e-06, | |
| "loss": 0.7542, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.3490701001430615, | |
| "grad_norm": 9.71296501159668, | |
| "learning_rate": 2.88820167241797e-06, | |
| "loss": 0.8787, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.35050071530758226, | |
| "grad_norm": 1.676202654838562, | |
| "learning_rate": 2.886843271083196e-06, | |
| "loss": 0.7536, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.351931330472103, | |
| "grad_norm": 1.9348456859588623, | |
| "learning_rate": 2.8854769902948127e-06, | |
| "loss": 0.7707, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.35336194563662376, | |
| "grad_norm": 21.045368194580078, | |
| "learning_rate": 2.8841028378154463e-06, | |
| "loss": 0.8119, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.3547925608011445, | |
| "grad_norm": 6.235752582550049, | |
| "learning_rate": 2.8827208214524477e-06, | |
| "loss": 0.7814, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.3562231759656652, | |
| "grad_norm": 9.359082221984863, | |
| "learning_rate": 2.881330949057845e-06, | |
| "loss": 0.8157, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.35765379113018597, | |
| "grad_norm": 5.472043037414551, | |
| "learning_rate": 2.8799332285283025e-06, | |
| "loss": 0.8594, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3590844062947067, | |
| "grad_norm": 11.745348930358887, | |
| "learning_rate": 2.8785276678050736e-06, | |
| "loss": 0.8394, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.3605150214592275, | |
| "grad_norm": 16.824607849121094, | |
| "learning_rate": 2.877114274873957e-06, | |
| "loss": 0.7987, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.36194563662374823, | |
| "grad_norm": 30.396041870117188, | |
| "learning_rate": 2.8756930577652493e-06, | |
| "loss": 0.7705, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.36337625178826893, | |
| "grad_norm": 5.095501899719238, | |
| "learning_rate": 2.874264024553702e-06, | |
| "loss": 0.8093, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3648068669527897, | |
| "grad_norm": 5.913100242614746, | |
| "learning_rate": 2.8728271833584744e-06, | |
| "loss": 0.8863, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.36623748211731044, | |
| "grad_norm": 3.5775020122528076, | |
| "learning_rate": 2.871382542343087e-06, | |
| "loss": 0.8394, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.3676680972818312, | |
| "grad_norm": 4.6704816818237305, | |
| "learning_rate": 2.869930109715375e-06, | |
| "loss": 0.9023, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.36909871244635195, | |
| "grad_norm": 52.114646911621094, | |
| "learning_rate": 2.868469893727443e-06, | |
| "loss": 0.713, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.37052932761087265, | |
| "grad_norm": 5.689326763153076, | |
| "learning_rate": 2.8670019026756174e-06, | |
| "loss": 0.9299, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.3719599427753934, | |
| "grad_norm": 1.1218035221099854, | |
| "learning_rate": 2.8655261449003993e-06, | |
| "loss": 0.8403, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.37339055793991416, | |
| "grad_norm": 2.4807209968566895, | |
| "learning_rate": 2.864042628786416e-06, | |
| "loss": 0.8961, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.3748211731044349, | |
| "grad_norm": 6.620181560516357, | |
| "learning_rate": 2.8625513627623757e-06, | |
| "loss": 0.839, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.37625178826895567, | |
| "grad_norm": 7.724957466125488, | |
| "learning_rate": 2.8610523553010174e-06, | |
| "loss": 0.8033, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.3776824034334764, | |
| "grad_norm": 3.1110544204711914, | |
| "learning_rate": 2.8595456149190633e-06, | |
| "loss": 0.8175, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3791130185979971, | |
| "grad_norm": 5.656611919403076, | |
| "learning_rate": 2.858031150177173e-06, | |
| "loss": 0.823, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.3805436337625179, | |
| "grad_norm": 5.221110820770264, | |
| "learning_rate": 2.85650896967989e-06, | |
| "loss": 0.8279, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.38197424892703863, | |
| "grad_norm": 3.36710786819458, | |
| "learning_rate": 2.854979082075596e-06, | |
| "loss": 0.7052, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.3834048640915594, | |
| "grad_norm": 5.043059349060059, | |
| "learning_rate": 2.8534414960564626e-06, | |
| "loss": 0.815, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.38483547925608014, | |
| "grad_norm": 2.3259692192077637, | |
| "learning_rate": 2.8518962203583996e-06, | |
| "loss": 0.8315, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.38626609442060084, | |
| "grad_norm": 2.116469621658325, | |
| "learning_rate": 2.850343263761005e-06, | |
| "loss": 0.8151, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3876967095851216, | |
| "grad_norm": 5.095742225646973, | |
| "learning_rate": 2.8487826350875188e-06, | |
| "loss": 0.8809, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.38912732474964234, | |
| "grad_norm": 21.042909622192383, | |
| "learning_rate": 2.8472143432047694e-06, | |
| "loss": 0.8604, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.3905579399141631, | |
| "grad_norm": 4.103556156158447, | |
| "learning_rate": 2.8456383970231238e-06, | |
| "loss": 0.8797, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.39198855507868385, | |
| "grad_norm": 8.809136390686035, | |
| "learning_rate": 2.8440548054964382e-06, | |
| "loss": 0.9017, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.39341917024320455, | |
| "grad_norm": 4.425339221954346, | |
| "learning_rate": 2.8424635776220057e-06, | |
| "loss": 0.9289, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.3948497854077253, | |
| "grad_norm": 4.326204776763916, | |
| "learning_rate": 2.8408647224405066e-06, | |
| "loss": 0.768, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.39628040057224606, | |
| "grad_norm": 14.46237564086914, | |
| "learning_rate": 2.8392582490359563e-06, | |
| "loss": 0.8116, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.3977110157367668, | |
| "grad_norm": 7.003748416900635, | |
| "learning_rate": 2.8376441665356527e-06, | |
| "loss": 0.7712, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.39914163090128757, | |
| "grad_norm": 6.612820625305176, | |
| "learning_rate": 2.8360224841101273e-06, | |
| "loss": 0.874, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.4005722460658083, | |
| "grad_norm": 1.8514535427093506, | |
| "learning_rate": 2.8343932109730885e-06, | |
| "loss": 0.8416, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.402002861230329, | |
| "grad_norm": 3.661787271499634, | |
| "learning_rate": 2.8327563563813735e-06, | |
| "loss": 0.8026, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.4034334763948498, | |
| "grad_norm": 4.149445056915283, | |
| "learning_rate": 2.8311119296348947e-06, | |
| "loss": 0.8505, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.40486409155937053, | |
| "grad_norm": 1.8762818574905396, | |
| "learning_rate": 2.829459940076585e-06, | |
| "loss": 0.91, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.4062947067238913, | |
| "grad_norm": 3.605158805847168, | |
| "learning_rate": 2.8278003970923464e-06, | |
| "loss": 0.786, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.40772532188841204, | |
| "grad_norm": 5.466380596160889, | |
| "learning_rate": 2.826133310110996e-06, | |
| "loss": 0.7949, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.40915593705293274, | |
| "grad_norm": 2.403118133544922, | |
| "learning_rate": 2.824458688604214e-06, | |
| "loss": 0.8175, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.4105865522174535, | |
| "grad_norm": 8.62721061706543, | |
| "learning_rate": 2.8227765420864864e-06, | |
| "loss": 0.7938, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.41201716738197425, | |
| "grad_norm": 12.812850952148438, | |
| "learning_rate": 2.821086880115055e-06, | |
| "loss": 0.8682, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.413447782546495, | |
| "grad_norm": 10.280946731567383, | |
| "learning_rate": 2.81938971228986e-06, | |
| "loss": 0.7679, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.41487839771101576, | |
| "grad_norm": 5.222766399383545, | |
| "learning_rate": 2.8176850482534874e-06, | |
| "loss": 0.8453, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.41630901287553645, | |
| "grad_norm": 16.025169372558594, | |
| "learning_rate": 2.8159728976911133e-06, | |
| "loss": 0.7303, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.4177396280400572, | |
| "grad_norm": 2.5485048294067383, | |
| "learning_rate": 2.8142532703304487e-06, | |
| "loss": 0.8233, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.41917024320457796, | |
| "grad_norm": 2.9927330017089844, | |
| "learning_rate": 2.8125261759416854e-06, | |
| "loss": 0.8752, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.4206008583690987, | |
| "grad_norm": 20.316953659057617, | |
| "learning_rate": 2.810791624337438e-06, | |
| "loss": 0.7761, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.4220314735336195, | |
| "grad_norm": 5.816092014312744, | |
| "learning_rate": 2.8090496253726924e-06, | |
| "loss": 0.8886, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.4234620886981402, | |
| "grad_norm": 2.1833443641662598, | |
| "learning_rate": 2.8073001889447446e-06, | |
| "loss": 0.8559, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.4248927038626609, | |
| "grad_norm": 1.9403437376022339, | |
| "learning_rate": 2.805543324993149e-06, | |
| "loss": 0.7898, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.4263233190271817, | |
| "grad_norm": 18.38999366760254, | |
| "learning_rate": 2.8037790434996593e-06, | |
| "loss": 0.8416, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.42775393419170243, | |
| "grad_norm": 4.05740213394165, | |
| "learning_rate": 2.8020073544881724e-06, | |
| "loss": 0.8204, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.4291845493562232, | |
| "grad_norm": 1.8824354410171509, | |
| "learning_rate": 2.800228268024672e-06, | |
| "loss": 0.78, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.43061516452074394, | |
| "grad_norm": 2.7645819187164307, | |
| "learning_rate": 2.79844179421717e-06, | |
| "loss": 0.8157, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.43204577968526464, | |
| "grad_norm": 3.2076547145843506, | |
| "learning_rate": 2.796647943215651e-06, | |
| "loss": 0.8537, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.4334763948497854, | |
| "grad_norm": 3.7037010192871094, | |
| "learning_rate": 2.7948467252120144e-06, | |
| "loss": 0.8262, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.43490701001430615, | |
| "grad_norm": 6.0140557289123535, | |
| "learning_rate": 2.793038150440013e-06, | |
| "loss": 0.9137, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.4363376251788269, | |
| "grad_norm": 3.6040737628936768, | |
| "learning_rate": 2.7912222291752013e-06, | |
| "loss": 0.8043, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.43776824034334766, | |
| "grad_norm": 2.64436674118042, | |
| "learning_rate": 2.7893989717348702e-06, | |
| "loss": 0.8577, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.43919885550786836, | |
| "grad_norm": 3.0492098331451416, | |
| "learning_rate": 2.7875683884779937e-06, | |
| "loss": 0.8455, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.4406294706723891, | |
| "grad_norm": 3.0012905597686768, | |
| "learning_rate": 2.785730489805167e-06, | |
| "loss": 0.787, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.44206008583690987, | |
| "grad_norm": 2.695319652557373, | |
| "learning_rate": 2.783885286158549e-06, | |
| "loss": 0.8001, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.4434907010014306, | |
| "grad_norm": 4.0424909591674805, | |
| "learning_rate": 2.782032788021802e-06, | |
| "loss": 0.78, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.4449213161659514, | |
| "grad_norm": 2.0582504272460938, | |
| "learning_rate": 2.7801730059200314e-06, | |
| "loss": 0.8018, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.44635193133047213, | |
| "grad_norm": 1.0271695852279663, | |
| "learning_rate": 2.7783059504197293e-06, | |
| "loss": 0.8059, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.44778254649499283, | |
| "grad_norm": 12.270268440246582, | |
| "learning_rate": 2.7764316321287102e-06, | |
| "loss": 0.7964, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.4492131616595136, | |
| "grad_norm": 4.83074951171875, | |
| "learning_rate": 2.774550061696055e-06, | |
| "loss": 0.8015, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.45064377682403434, | |
| "grad_norm": 4.174887180328369, | |
| "learning_rate": 2.7726612498120442e-06, | |
| "loss": 0.8314, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.4520743919885551, | |
| "grad_norm": 2.5617687702178955, | |
| "learning_rate": 2.7707652072081057e-06, | |
| "loss": 0.7849, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.45350500715307585, | |
| "grad_norm": 23.52600860595703, | |
| "learning_rate": 2.7688619446567456e-06, | |
| "loss": 0.8122, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.45493562231759654, | |
| "grad_norm": 1.7928926944732666, | |
| "learning_rate": 2.7669514729714935e-06, | |
| "loss": 0.882, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.4563662374821173, | |
| "grad_norm": 8.705628395080566, | |
| "learning_rate": 2.765033803006836e-06, | |
| "loss": 0.788, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.45779685264663805, | |
| "grad_norm": 118.05711364746094, | |
| "learning_rate": 2.7631089456581586e-06, | |
| "loss": 0.8104, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.4592274678111588, | |
| "grad_norm": 3.2315642833709717, | |
| "learning_rate": 2.7611769118616817e-06, | |
| "loss": 0.8708, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.46065808297567956, | |
| "grad_norm": 3.948796033859253, | |
| "learning_rate": 2.7592377125944e-06, | |
| "loss": 0.7526, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.46208869814020026, | |
| "grad_norm": 4.273873329162598, | |
| "learning_rate": 2.7572913588740195e-06, | |
| "loss": 0.8011, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.463519313304721, | |
| "grad_norm": 2.294113874435425, | |
| "learning_rate": 2.755337861758893e-06, | |
| "loss": 0.795, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.46494992846924177, | |
| "grad_norm": 72.31570434570312, | |
| "learning_rate": 2.7533772323479605e-06, | |
| "loss": 0.8524, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.4663805436337625, | |
| "grad_norm": 2.326502799987793, | |
| "learning_rate": 2.7514094817806853e-06, | |
| "loss": 0.7838, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.4678111587982833, | |
| "grad_norm": 7.991358280181885, | |
| "learning_rate": 2.7494346212369884e-06, | |
| "loss": 0.7923, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.46924177396280403, | |
| "grad_norm": 5.2567596435546875, | |
| "learning_rate": 2.7474526619371874e-06, | |
| "loss": 0.8094, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.47067238912732473, | |
| "grad_norm": 29.58182144165039, | |
| "learning_rate": 2.7454636151419323e-06, | |
| "loss": 0.8041, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.4721030042918455, | |
| "grad_norm": 4.548513412475586, | |
| "learning_rate": 2.7434674921521414e-06, | |
| "loss": 0.8016, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.47353361945636624, | |
| "grad_norm": 16.49585723876953, | |
| "learning_rate": 2.7414643043089362e-06, | |
| "loss": 0.7666, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.474964234620887, | |
| "grad_norm": 4.154926300048828, | |
| "learning_rate": 2.739454062993578e-06, | |
| "loss": 0.7745, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.47639484978540775, | |
| "grad_norm": 6.365798473358154, | |
| "learning_rate": 2.7374367796274023e-06, | |
| "loss": 0.8022, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.47782546494992845, | |
| "grad_norm": 125.90961456298828, | |
| "learning_rate": 2.735412465671756e-06, | |
| "loss": 0.8109, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.4792560801144492, | |
| "grad_norm": 32.61653518676758, | |
| "learning_rate": 2.73338113262793e-06, | |
| "loss": 0.8748, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.48068669527896996, | |
| "grad_norm": 3.2467617988586426, | |
| "learning_rate": 2.7313427920370948e-06, | |
| "loss": 0.8134, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.4821173104434907, | |
| "grad_norm": 9.577071189880371, | |
| "learning_rate": 2.7292974554802343e-06, | |
| "loss": 0.8149, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.48354792560801146, | |
| "grad_norm": 9.44502067565918, | |
| "learning_rate": 2.7272451345780804e-06, | |
| "loss": 0.825, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.48497854077253216, | |
| "grad_norm": 3.725696325302124, | |
| "learning_rate": 2.725185840991049e-06, | |
| "loss": 0.8543, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.4864091559370529, | |
| "grad_norm": 9.806964874267578, | |
| "learning_rate": 2.723119586419169e-06, | |
| "loss": 0.7656, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.48783977110157367, | |
| "grad_norm": 6.31876802444458, | |
| "learning_rate": 2.721046382602021e-06, | |
| "loss": 0.8145, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.4892703862660944, | |
| "grad_norm": 5.0293073654174805, | |
| "learning_rate": 2.718966241318666e-06, | |
| "loss": 0.8477, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.4907010014306152, | |
| "grad_norm": 1.991077184677124, | |
| "learning_rate": 2.7168791743875835e-06, | |
| "loss": 0.7861, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.49213161659513593, | |
| "grad_norm": 7.8108344078063965, | |
| "learning_rate": 2.7147851936665995e-06, | |
| "loss": 0.8532, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.49356223175965663, | |
| "grad_norm": 2.972151041030884, | |
| "learning_rate": 2.712684311052822e-06, | |
| "loss": 0.8825, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.4949928469241774, | |
| "grad_norm": 3.060875177383423, | |
| "learning_rate": 2.710576538482572e-06, | |
| "loss": 0.8001, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.49642346208869814, | |
| "grad_norm": 10.620682716369629, | |
| "learning_rate": 2.7084618879313177e-06, | |
| "loss": 0.8303, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.4978540772532189, | |
| "grad_norm": 21.889728546142578, | |
| "learning_rate": 2.706340371413603e-06, | |
| "loss": 0.8979, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.49928469241773965, | |
| "grad_norm": 9.274587631225586, | |
| "learning_rate": 2.7042120009829832e-06, | |
| "loss": 0.8525, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.5007153075822603, | |
| "grad_norm": 16.314605712890625, | |
| "learning_rate": 2.7020767887319534e-06, | |
| "loss": 0.8911, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5007153075822603, | |
| "eval_loss": 0.9631034731864929, | |
| "eval_runtime": 64.0772, | |
| "eval_samples_per_second": 6.477, | |
| "eval_steps_per_second": 0.406, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5021459227467812, | |
| "grad_norm": 15.189119338989258, | |
| "learning_rate": 2.6999347467918816e-06, | |
| "loss": 0.7916, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.5035765379113019, | |
| "grad_norm": 6.363760948181152, | |
| "learning_rate": 2.6977858873329394e-06, | |
| "loss": 0.863, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.5050071530758226, | |
| "grad_norm": 18.08306121826172, | |
| "learning_rate": 2.695630222564032e-06, | |
| "loss": 0.8125, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.5064377682403434, | |
| "grad_norm": 5.672774791717529, | |
| "learning_rate": 2.6934677647327293e-06, | |
| "loss": 0.8818, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.5078683834048641, | |
| "grad_norm": 61.24919509887695, | |
| "learning_rate": 2.6912985261251977e-06, | |
| "loss": 0.8885, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.5092989985693849, | |
| "grad_norm": 7.921273708343506, | |
| "learning_rate": 2.689122519066128e-06, | |
| "loss": 0.7384, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.5107296137339056, | |
| "grad_norm": 2.321747064590454, | |
| "learning_rate": 2.686939755918667e-06, | |
| "loss": 0.7979, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.5121602288984263, | |
| "grad_norm": 6.9070587158203125, | |
| "learning_rate": 2.684750249084346e-06, | |
| "loss": 0.8531, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.5135908440629471, | |
| "grad_norm": 2.6162514686584473, | |
| "learning_rate": 2.6825540110030117e-06, | |
| "loss": 0.8871, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.5150214592274678, | |
| "grad_norm": 8.098695755004883, | |
| "learning_rate": 2.6803510541527555e-06, | |
| "loss": 0.8527, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5164520743919886, | |
| "grad_norm": 1.7876381874084473, | |
| "learning_rate": 2.678141391049841e-06, | |
| "loss": 0.8607, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.5178826895565093, | |
| "grad_norm": 83.18020629882812, | |
| "learning_rate": 2.675925034248633e-06, | |
| "loss": 0.8275, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.51931330472103, | |
| "grad_norm": 2.7980153560638428, | |
| "learning_rate": 2.67370199634153e-06, | |
| "loss": 0.8568, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.5207439198855508, | |
| "grad_norm": 2.3697915077209473, | |
| "learning_rate": 2.671472289958886e-06, | |
| "loss": 0.8863, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.5221745350500715, | |
| "grad_norm": 8.928977012634277, | |
| "learning_rate": 2.669235927768946e-06, | |
| "loss": 0.714, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.5236051502145923, | |
| "grad_norm": 17.770780563354492, | |
| "learning_rate": 2.6669929224777677e-06, | |
| "loss": 0.7601, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.525035765379113, | |
| "grad_norm": 2.65303635597229, | |
| "learning_rate": 2.664743286829154e-06, | |
| "loss": 0.8077, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.5264663805436338, | |
| "grad_norm": 2.1842598915100098, | |
| "learning_rate": 2.6624870336045768e-06, | |
| "loss": 0.791, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.5278969957081545, | |
| "grad_norm": 3.5350661277770996, | |
| "learning_rate": 2.660224175623108e-06, | |
| "loss": 0.8359, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.5293276108726752, | |
| "grad_norm": 6.636647701263428, | |
| "learning_rate": 2.6579547257413438e-06, | |
| "loss": 0.7339, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.530758226037196, | |
| "grad_norm": 2.953014612197876, | |
| "learning_rate": 2.6556786968533337e-06, | |
| "loss": 0.7684, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.5321888412017167, | |
| "grad_norm": 16.38330841064453, | |
| "learning_rate": 2.6533961018905052e-06, | |
| "loss": 0.7963, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.5336194563662375, | |
| "grad_norm": 3.730391502380371, | |
| "learning_rate": 2.6511069538215928e-06, | |
| "loss": 0.8331, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.5350500715307582, | |
| "grad_norm": 2.098069906234741, | |
| "learning_rate": 2.6488112656525614e-06, | |
| "loss": 0.7582, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.5364806866952789, | |
| "grad_norm": 10.553278923034668, | |
| "learning_rate": 2.6465090504265353e-06, | |
| "loss": 0.7405, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.5379113018597997, | |
| "grad_norm": 8.935467720031738, | |
| "learning_rate": 2.6442003212237215e-06, | |
| "loss": 0.8012, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.5393419170243204, | |
| "grad_norm": 5.658432483673096, | |
| "learning_rate": 2.6418850911613385e-06, | |
| "loss": 0.8527, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.5407725321888412, | |
| "grad_norm": 7.131669521331787, | |
| "learning_rate": 2.6395633733935376e-06, | |
| "loss": 0.7484, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.542203147353362, | |
| "grad_norm": 7.413619518280029, | |
| "learning_rate": 2.6372351811113327e-06, | |
| "loss": 0.8055, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.5436337625178826, | |
| "grad_norm": 3.693314790725708, | |
| "learning_rate": 2.634900527542522e-06, | |
| "loss": 0.8518, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5450643776824035, | |
| "grad_norm": 19.805158615112305, | |
| "learning_rate": 2.632559425951613e-06, | |
| "loss": 0.7986, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.5464949928469242, | |
| "grad_norm": 4.035129070281982, | |
| "learning_rate": 2.63021188963975e-06, | |
| "loss": 0.7836, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.547925608011445, | |
| "grad_norm": 5.204458236694336, | |
| "learning_rate": 2.6278579319446364e-06, | |
| "loss": 0.8931, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.5493562231759657, | |
| "grad_norm": 2.124077320098877, | |
| "learning_rate": 2.625497566240458e-06, | |
| "loss": 0.7553, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.5507868383404864, | |
| "grad_norm": 23.981964111328125, | |
| "learning_rate": 2.623130805937809e-06, | |
| "loss": 0.8436, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.5522174535050072, | |
| "grad_norm": 3.7908241748809814, | |
| "learning_rate": 2.6207576644836144e-06, | |
| "loss": 0.7655, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.5536480686695279, | |
| "grad_norm": 2.662917375564575, | |
| "learning_rate": 2.6183781553610553e-06, | |
| "loss": 0.8928, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.5550786838340487, | |
| "grad_norm": 12.019503593444824, | |
| "learning_rate": 2.615992292089489e-06, | |
| "loss": 0.7619, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.5565092989985694, | |
| "grad_norm": 2.186976194381714, | |
| "learning_rate": 2.613600088224378e-06, | |
| "loss": 0.8131, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.5579399141630901, | |
| "grad_norm": 4.182912349700928, | |
| "learning_rate": 2.6112015573572054e-06, | |
| "loss": 0.7677, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5593705293276109, | |
| "grad_norm": 4.425599575042725, | |
| "learning_rate": 2.6087967131154046e-06, | |
| "loss": 0.7237, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.5608011444921316, | |
| "grad_norm": 3.038487672805786, | |
| "learning_rate": 2.6063855691622773e-06, | |
| "loss": 0.8731, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.5622317596566524, | |
| "grad_norm": 8.466862678527832, | |
| "learning_rate": 2.6039681391969175e-06, | |
| "loss": 0.851, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.5636623748211731, | |
| "grad_norm": 1.744046688079834, | |
| "learning_rate": 2.6015444369541346e-06, | |
| "loss": 0.7861, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.5650929899856938, | |
| "grad_norm": 4.3912835121154785, | |
| "learning_rate": 2.5991144762043736e-06, | |
| "loss": 0.7755, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.5665236051502146, | |
| "grad_norm": 2.832746744155884, | |
| "learning_rate": 2.5966782707536385e-06, | |
| "loss": 0.8042, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.5679542203147353, | |
| "grad_norm": 12.723127365112305, | |
| "learning_rate": 2.5942358344434123e-06, | |
| "loss": 0.8115, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.5693848354792561, | |
| "grad_norm": 12.688072204589844, | |
| "learning_rate": 2.5917871811505786e-06, | |
| "loss": 0.7963, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.5708154506437768, | |
| "grad_norm": 2.819028377532959, | |
| "learning_rate": 2.589332324787345e-06, | |
| "loss": 0.7876, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.5722460658082976, | |
| "grad_norm": 6.72185754776001, | |
| "learning_rate": 2.58687127930116e-06, | |
| "loss": 0.7474, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5736766809728183, | |
| "grad_norm": 5.983644008636475, | |
| "learning_rate": 2.5844040586746383e-06, | |
| "loss": 0.7863, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.575107296137339, | |
| "grad_norm": 6.598376274108887, | |
| "learning_rate": 2.581930676925478e-06, | |
| "loss": 0.8686, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.5765379113018598, | |
| "grad_norm": 15.069884300231934, | |
| "learning_rate": 2.579451148106382e-06, | |
| "loss": 0.8143, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.5779685264663805, | |
| "grad_norm": 6.5639119148254395, | |
| "learning_rate": 2.576965486304978e-06, | |
| "loss": 0.712, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.5793991416309013, | |
| "grad_norm": 3.1110270023345947, | |
| "learning_rate": 2.5744737056437407e-06, | |
| "loss": 0.8277, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.580829756795422, | |
| "grad_norm": 3.178307294845581, | |
| "learning_rate": 2.571975820279906e-06, | |
| "loss": 0.7377, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.5822603719599427, | |
| "grad_norm": 1.4912009239196777, | |
| "learning_rate": 2.5694718444053977e-06, | |
| "loss": 0.8098, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.5836909871244635, | |
| "grad_norm": 1.6244900226593018, | |
| "learning_rate": 2.5669617922467407e-06, | |
| "loss": 0.8304, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5851216022889842, | |
| "grad_norm": 5.3474016189575195, | |
| "learning_rate": 2.5644456780649842e-06, | |
| "loss": 0.8797, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.586552217453505, | |
| "grad_norm": 6.614544868469238, | |
| "learning_rate": 2.561923516155619e-06, | |
| "loss": 0.7439, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.5879828326180258, | |
| "grad_norm": 8.531089782714844, | |
| "learning_rate": 2.5593953208484957e-06, | |
| "loss": 0.7857, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.5894134477825465, | |
| "grad_norm": 3.9704976081848145, | |
| "learning_rate": 2.556861106507745e-06, | |
| "loss": 0.7818, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.5908440629470673, | |
| "grad_norm": 19.362394332885742, | |
| "learning_rate": 2.554320887531696e-06, | |
| "loss": 0.7372, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.592274678111588, | |
| "grad_norm": 4.459641933441162, | |
| "learning_rate": 2.551774678352791e-06, | |
| "loss": 0.7558, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.5937052932761088, | |
| "grad_norm": 3.2259392738342285, | |
| "learning_rate": 2.549222493437509e-06, | |
| "loss": 0.8202, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.5951359084406295, | |
| "grad_norm": 8.601910591125488, | |
| "learning_rate": 2.5466643472862773e-06, | |
| "loss": 0.8521, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.5965665236051502, | |
| "grad_norm": 36.73543167114258, | |
| "learning_rate": 2.544100254433396e-06, | |
| "loss": 0.884, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.597997138769671, | |
| "grad_norm": 4.288121223449707, | |
| "learning_rate": 2.541530229446949e-06, | |
| "loss": 0.8053, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.5994277539341917, | |
| "grad_norm": 1.6672669649124146, | |
| "learning_rate": 2.538954286928726e-06, | |
| "loss": 0.7844, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.6008583690987125, | |
| "grad_norm": 6.948642253875732, | |
| "learning_rate": 2.5363724415141366e-06, | |
| "loss": 0.8092, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6022889842632332, | |
| "grad_norm": 5.498805999755859, | |
| "learning_rate": 2.5337847078721275e-06, | |
| "loss": 0.8096, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.6037195994277539, | |
| "grad_norm": 2.4374125003814697, | |
| "learning_rate": 2.531191100705102e-06, | |
| "loss": 0.8779, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.6051502145922747, | |
| "grad_norm": 7.563881874084473, | |
| "learning_rate": 2.5285916347488315e-06, | |
| "loss": 0.8159, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.6065808297567954, | |
| "grad_norm": 1.8715702295303345, | |
| "learning_rate": 2.525986324772377e-06, | |
| "loss": 0.7818, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.6080114449213162, | |
| "grad_norm": 6.312496185302734, | |
| "learning_rate": 2.5233751855780012e-06, | |
| "loss": 0.7421, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.6094420600858369, | |
| "grad_norm": 9.23725414276123, | |
| "learning_rate": 2.5207582320010873e-06, | |
| "loss": 0.8207, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.6108726752503576, | |
| "grad_norm": 8.034300804138184, | |
| "learning_rate": 2.518135478910051e-06, | |
| "loss": 0.8379, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.6123032904148784, | |
| "grad_norm": 18.190195083618164, | |
| "learning_rate": 2.5155069412062605e-06, | |
| "loss": 0.8071, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.6137339055793991, | |
| "grad_norm": 1.6293121576309204, | |
| "learning_rate": 2.51287263382395e-06, | |
| "loss": 0.8994, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.6151645207439199, | |
| "grad_norm": 12.373995780944824, | |
| "learning_rate": 2.5102325717301316e-06, | |
| "loss": 0.7766, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6165951359084406, | |
| "grad_norm": 3.5726394653320312, | |
| "learning_rate": 2.507586769924517e-06, | |
| "loss": 0.8163, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.6180257510729614, | |
| "grad_norm": 21.729354858398438, | |
| "learning_rate": 2.5049352434394263e-06, | |
| "loss": 0.8227, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.6194563662374821, | |
| "grad_norm": 9.771985054016113, | |
| "learning_rate": 2.502278007339705e-06, | |
| "loss": 0.7762, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.6208869814020028, | |
| "grad_norm": 4.523687362670898, | |
| "learning_rate": 2.4996150767226375e-06, | |
| "loss": 0.7464, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.6223175965665236, | |
| "grad_norm": 5.442951202392578, | |
| "learning_rate": 2.496946466717865e-06, | |
| "loss": 0.7712, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.6237482117310443, | |
| "grad_norm": 4.3297600746154785, | |
| "learning_rate": 2.494272192487293e-06, | |
| "loss": 0.7618, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.6251788268955651, | |
| "grad_norm": 9.589028358459473, | |
| "learning_rate": 2.4915922692250107e-06, | |
| "loss": 0.8449, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.6266094420600858, | |
| "grad_norm": 1.7547855377197266, | |
| "learning_rate": 2.4889067121572023e-06, | |
| "loss": 0.8368, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.6280400572246065, | |
| "grad_norm": 32.727203369140625, | |
| "learning_rate": 2.486215536542061e-06, | |
| "loss": 0.7986, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.6294706723891274, | |
| "grad_norm": 25.33466911315918, | |
| "learning_rate": 2.4835187576697013e-06, | |
| "loss": 0.8372, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.630901287553648, | |
| "grad_norm": 4.323023319244385, | |
| "learning_rate": 2.480816390862075e-06, | |
| "loss": 0.7125, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.6323319027181689, | |
| "grad_norm": 1.7013431787490845, | |
| "learning_rate": 2.4781084514728797e-06, | |
| "loss": 0.8322, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.6337625178826896, | |
| "grad_norm": 3.388103485107422, | |
| "learning_rate": 2.475394954887476e-06, | |
| "loss": 0.8479, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.6351931330472103, | |
| "grad_norm": 1.854880452156067, | |
| "learning_rate": 2.4726759165227963e-06, | |
| "loss": 0.8113, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.6366237482117311, | |
| "grad_norm": 17.75172996520996, | |
| "learning_rate": 2.469951351827262e-06, | |
| "loss": 0.913, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.6380543633762518, | |
| "grad_norm": 57.98564910888672, | |
| "learning_rate": 2.467221276280689e-06, | |
| "loss": 0.8532, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.6394849785407726, | |
| "grad_norm": 16.528905868530273, | |
| "learning_rate": 2.4644857053942066e-06, | |
| "loss": 0.7039, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.6409155937052933, | |
| "grad_norm": 2.7571375370025635, | |
| "learning_rate": 2.4617446547101648e-06, | |
| "loss": 0.7315, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.642346208869814, | |
| "grad_norm": 1.71315336227417, | |
| "learning_rate": 2.4589981398020472e-06, | |
| "loss": 0.8122, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.6437768240343348, | |
| "grad_norm": 10.909632682800293, | |
| "learning_rate": 2.456246176274384e-06, | |
| "loss": 0.8142, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6452074391988555, | |
| "grad_norm": 2.0243256092071533, | |
| "learning_rate": 2.4534887797626616e-06, | |
| "loss": 0.7944, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.6466380543633763, | |
| "grad_norm": 3.194434404373169, | |
| "learning_rate": 2.4507259659332335e-06, | |
| "loss": 0.7259, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.648068669527897, | |
| "grad_norm": 6.121618270874023, | |
| "learning_rate": 2.447957750483233e-06, | |
| "loss": 0.7809, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.6494992846924177, | |
| "grad_norm": 5.905685901641846, | |
| "learning_rate": 2.4451841491404837e-06, | |
| "loss": 0.7678, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.6509298998569385, | |
| "grad_norm": 2.6199986934661865, | |
| "learning_rate": 2.4424051776634074e-06, | |
| "loss": 0.858, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.6523605150214592, | |
| "grad_norm": 3.9428181648254395, | |
| "learning_rate": 2.4396208518409392e-06, | |
| "loss": 0.8447, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.65379113018598, | |
| "grad_norm": 10.59566593170166, | |
| "learning_rate": 2.4368311874924335e-06, | |
| "loss": 0.7262, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.6552217453505007, | |
| "grad_norm": 2.320530652999878, | |
| "learning_rate": 2.434036200467577e-06, | |
| "loss": 0.7948, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.6566523605150214, | |
| "grad_norm": 1.7014998197555542, | |
| "learning_rate": 2.431235906646297e-06, | |
| "loss": 0.795, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.6580829756795422, | |
| "grad_norm": 3.835496187210083, | |
| "learning_rate": 2.4284303219386723e-06, | |
| "loss": 0.791, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6595135908440629, | |
| "grad_norm": 2.4570517539978027, | |
| "learning_rate": 2.4256194622848413e-06, | |
| "loss": 0.7939, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.6609442060085837, | |
| "grad_norm": 2.4925966262817383, | |
| "learning_rate": 2.4228033436549135e-06, | |
| "loss": 0.7902, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.6623748211731044, | |
| "grad_norm": 4.428621292114258, | |
| "learning_rate": 2.4199819820488774e-06, | |
| "loss": 0.7936, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.6638054363376252, | |
| "grad_norm": 2.6506454944610596, | |
| "learning_rate": 2.417155393496509e-06, | |
| "loss": 0.7503, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.6652360515021459, | |
| "grad_norm": 5.980336666107178, | |
| "learning_rate": 2.4143235940572825e-06, | |
| "loss": 0.7956, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 5.393733978271484, | |
| "learning_rate": 2.4114865998202785e-06, | |
| "loss": 0.8161, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.6680972818311874, | |
| "grad_norm": 7.725034713745117, | |
| "learning_rate": 2.4086444269040905e-06, | |
| "loss": 0.835, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.6695278969957081, | |
| "grad_norm": 10.333250999450684, | |
| "learning_rate": 2.4057970914567367e-06, | |
| "loss": 0.8684, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.670958512160229, | |
| "grad_norm": 7.202269554138184, | |
| "learning_rate": 2.4029446096555665e-06, | |
| "loss": 0.7689, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.6723891273247496, | |
| "grad_norm": 3.6261048316955566, | |
| "learning_rate": 2.4000869977071677e-06, | |
| "loss": 0.846, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.6738197424892703, | |
| "grad_norm": 6.523050785064697, | |
| "learning_rate": 2.3972242718472758e-06, | |
| "loss": 0.854, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.6752503576537912, | |
| "grad_norm": 6.202769756317139, | |
| "learning_rate": 2.3943564483406825e-06, | |
| "loss": 0.7847, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.6766809728183119, | |
| "grad_norm": 2.8411245346069336, | |
| "learning_rate": 2.391483543481141e-06, | |
| "loss": 0.7264, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.6781115879828327, | |
| "grad_norm": 4.087616920471191, | |
| "learning_rate": 2.388605573591273e-06, | |
| "loss": 0.832, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.6795422031473534, | |
| "grad_norm": 3.4109015464782715, | |
| "learning_rate": 2.385722555022482e-06, | |
| "loss": 0.7944, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.6809728183118741, | |
| "grad_norm": 2.6394965648651123, | |
| "learning_rate": 2.382834504154852e-06, | |
| "loss": 0.7663, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.6824034334763949, | |
| "grad_norm": 1.812250018119812, | |
| "learning_rate": 2.3799414373970595e-06, | |
| "loss": 0.7917, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.6838340486409156, | |
| "grad_norm": 3.3612866401672363, | |
| "learning_rate": 2.3770433711862792e-06, | |
| "loss": 0.8315, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.6852646638054364, | |
| "grad_norm": 1.5638633966445923, | |
| "learning_rate": 2.3741403219880914e-06, | |
| "loss": 0.8377, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.6866952789699571, | |
| "grad_norm": 2.297668695449829, | |
| "learning_rate": 2.3712323062963865e-06, | |
| "loss": 0.7572, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6881258941344778, | |
| "grad_norm": 6.158240795135498, | |
| "learning_rate": 2.3683193406332724e-06, | |
| "loss": 0.8389, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.6895565092989986, | |
| "grad_norm": 2.0638744831085205, | |
| "learning_rate": 2.3654014415489823e-06, | |
| "loss": 0.7253, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.6909871244635193, | |
| "grad_norm": 2.175001382827759, | |
| "learning_rate": 2.362478625621777e-06, | |
| "loss": 0.8104, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.6924177396280401, | |
| "grad_norm": 1.4559204578399658, | |
| "learning_rate": 2.3595509094578526e-06, | |
| "loss": 0.7884, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.6938483547925608, | |
| "grad_norm": 5.243185520172119, | |
| "learning_rate": 2.3566183096912486e-06, | |
| "loss": 0.7642, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.6952789699570815, | |
| "grad_norm": 2.665585994720459, | |
| "learning_rate": 2.353680842983749e-06, | |
| "loss": 0.7022, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.6967095851216023, | |
| "grad_norm": 2.043215274810791, | |
| "learning_rate": 2.35073852602479e-06, | |
| "loss": 0.8458, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.698140200286123, | |
| "grad_norm": 22.086647033691406, | |
| "learning_rate": 2.347791375531365e-06, | |
| "loss": 0.7665, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.6995708154506438, | |
| "grad_norm": 7.558642387390137, | |
| "learning_rate": 2.34483940824793e-06, | |
| "loss": 0.844, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.7010014306151645, | |
| "grad_norm": 7.840277671813965, | |
| "learning_rate": 2.341882640946308e-06, | |
| "loss": 0.8423, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7024320457796852, | |
| "grad_norm": 5.021843433380127, | |
| "learning_rate": 2.3389210904255924e-06, | |
| "loss": 0.8149, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.703862660944206, | |
| "grad_norm": 3.7846174240112305, | |
| "learning_rate": 2.3359547735120533e-06, | |
| "loss": 0.8246, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.7052932761087267, | |
| "grad_norm": 2.6051504611968994, | |
| "learning_rate": 2.332983707059043e-06, | |
| "loss": 0.7554, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.7067238912732475, | |
| "grad_norm": 3.5369930267333984, | |
| "learning_rate": 2.3300079079468966e-06, | |
| "loss": 0.8198, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.7081545064377682, | |
| "grad_norm": 3.8711929321289062, | |
| "learning_rate": 2.3270273930828395e-06, | |
| "loss": 0.8471, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.709585121602289, | |
| "grad_norm": 7.332760334014893, | |
| "learning_rate": 2.3240421794008887e-06, | |
| "loss": 0.8014, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.7110157367668097, | |
| "grad_norm": 10.128408432006836, | |
| "learning_rate": 2.32105228386176e-06, | |
| "loss": 0.8255, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.7124463519313304, | |
| "grad_norm": 12.858423233032227, | |
| "learning_rate": 2.318057723452766e-06, | |
| "loss": 0.7532, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.7138769670958512, | |
| "grad_norm": 14.100822448730469, | |
| "learning_rate": 2.3150585151877275e-06, | |
| "loss": 0.8493, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.7153075822603719, | |
| "grad_norm": 6.1690802574157715, | |
| "learning_rate": 2.312054676106869e-06, | |
| "loss": 0.8536, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7167381974248928, | |
| "grad_norm": 4.373723983764648, | |
| "learning_rate": 2.3090462232767273e-06, | |
| "loss": 0.6945, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.7181688125894135, | |
| "grad_norm": 3.3550474643707275, | |
| "learning_rate": 2.306033173790051e-06, | |
| "loss": 0.8152, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.7195994277539342, | |
| "grad_norm": 3.153048515319824, | |
| "learning_rate": 2.303015544765706e-06, | |
| "loss": 0.7717, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.721030042918455, | |
| "grad_norm": 1.9680156707763672, | |
| "learning_rate": 2.2999933533485773e-06, | |
| "loss": 0.8112, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.7224606580829757, | |
| "grad_norm": 1.8092211484909058, | |
| "learning_rate": 2.296966616709471e-06, | |
| "loss": 0.7915, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.7238912732474965, | |
| "grad_norm": 2.4597418308258057, | |
| "learning_rate": 2.2939353520450174e-06, | |
| "loss": 0.8475, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.7253218884120172, | |
| "grad_norm": 2.957054853439331, | |
| "learning_rate": 2.2908995765775724e-06, | |
| "loss": 0.7414, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.7267525035765379, | |
| "grad_norm": 6.677426338195801, | |
| "learning_rate": 2.287859307555122e-06, | |
| "loss": 0.8409, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.7281831187410587, | |
| "grad_norm": 1.2464028596878052, | |
| "learning_rate": 2.284814562251181e-06, | |
| "loss": 0.743, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.7296137339055794, | |
| "grad_norm": 2.3922863006591797, | |
| "learning_rate": 2.2817653579646976e-06, | |
| "loss": 0.8122, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.7310443490701002, | |
| "grad_norm": 2.2561073303222656, | |
| "learning_rate": 2.2787117120199536e-06, | |
| "loss": 0.8087, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.7324749642346209, | |
| "grad_norm": 2.277667284011841, | |
| "learning_rate": 2.275653641766466e-06, | |
| "loss": 0.7543, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.7339055793991416, | |
| "grad_norm": 4.844744682312012, | |
| "learning_rate": 2.2725911645788896e-06, | |
| "loss": 0.7403, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.7353361945636624, | |
| "grad_norm": 2.275442123413086, | |
| "learning_rate": 2.269524297856918e-06, | |
| "loss": 0.8568, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.7367668097281831, | |
| "grad_norm": 1.839421272277832, | |
| "learning_rate": 2.266453059025182e-06, | |
| "loss": 0.8456, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.7381974248927039, | |
| "grad_norm": 5.043838977813721, | |
| "learning_rate": 2.2633774655331557e-06, | |
| "loss": 0.8047, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.7396280400572246, | |
| "grad_norm": 28.97209930419922, | |
| "learning_rate": 2.2602975348550526e-06, | |
| "loss": 0.7526, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.7410586552217453, | |
| "grad_norm": 1.293421983718872, | |
| "learning_rate": 2.2572132844897287e-06, | |
| "loss": 0.7508, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.7424892703862661, | |
| "grad_norm": 3.2048988342285156, | |
| "learning_rate": 2.2541247319605834e-06, | |
| "loss": 0.8266, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.7439198855507868, | |
| "grad_norm": 2.890925884246826, | |
| "learning_rate": 2.251031894815458e-06, | |
| "loss": 0.8708, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.7453505007153076, | |
| "grad_norm": 11.851993560791016, | |
| "learning_rate": 2.2479347906265375e-06, | |
| "loss": 0.8088, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.7467811158798283, | |
| "grad_norm": 9.119662284851074, | |
| "learning_rate": 2.2448334369902512e-06, | |
| "loss": 0.7403, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.748211731044349, | |
| "grad_norm": 2.4148502349853516, | |
| "learning_rate": 2.2417278515271717e-06, | |
| "loss": 0.8282, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.7496423462088698, | |
| "grad_norm": 4.801558494567871, | |
| "learning_rate": 2.2386180518819133e-06, | |
| "loss": 0.8236, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.7510729613733905, | |
| "grad_norm": 6.738923072814941, | |
| "learning_rate": 2.2355040557230362e-06, | |
| "loss": 0.8058, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.7510729613733905, | |
| "eval_loss": 0.9472324252128601, | |
| "eval_runtime": 64.2532, | |
| "eval_samples_per_second": 6.459, | |
| "eval_steps_per_second": 0.405, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.7525035765379113, | |
| "grad_norm": 2.7826318740844727, | |
| "learning_rate": 2.232385880742942e-06, | |
| "loss": 0.8036, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.753934191702432, | |
| "grad_norm": 2.154601573944092, | |
| "learning_rate": 2.229263544657774e-06, | |
| "loss": 0.7827, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.7553648068669528, | |
| "grad_norm": 5.298775672912598, | |
| "learning_rate": 2.226137065207318e-06, | |
| "loss": 0.8632, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.7567954220314735, | |
| "grad_norm": 4.424346446990967, | |
| "learning_rate": 2.223006460154901e-06, | |
| "loss": 0.84, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.7582260371959942, | |
| "grad_norm": 1.773184061050415, | |
| "learning_rate": 2.219871747287289e-06, | |
| "loss": 0.7129, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.759656652360515, | |
| "grad_norm": 2.4234611988067627, | |
| "learning_rate": 2.216732944414588e-06, | |
| "loss": 0.8844, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.7610872675250357, | |
| "grad_norm": 8.778568267822266, | |
| "learning_rate": 2.2135900693701396e-06, | |
| "loss": 0.7412, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.7625178826895566, | |
| "grad_norm": 2.856816530227661, | |
| "learning_rate": 2.210443140010424e-06, | |
| "loss": 0.8266, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.7639484978540773, | |
| "grad_norm": 1.9950830936431885, | |
| "learning_rate": 2.2072921742149547e-06, | |
| "loss": 0.7138, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.765379113018598, | |
| "grad_norm": 4.11127233505249, | |
| "learning_rate": 2.2041371898861797e-06, | |
| "loss": 0.7274, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.7668097281831188, | |
| "grad_norm": 6.386568069458008, | |
| "learning_rate": 2.2009782049493786e-06, | |
| "loss": 0.7266, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.7682403433476395, | |
| "grad_norm": 52.481101989746094, | |
| "learning_rate": 2.197815237352559e-06, | |
| "loss": 0.7578, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.7696709585121603, | |
| "grad_norm": 1.5614707469940186, | |
| "learning_rate": 2.1946483050663577e-06, | |
| "loss": 0.7825, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.771101573676681, | |
| "grad_norm": 1.0652791261672974, | |
| "learning_rate": 2.191477426083938e-06, | |
| "loss": 0.7794, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.7725321888412017, | |
| "grad_norm": 3.390814781188965, | |
| "learning_rate": 2.188302618420884e-06, | |
| "loss": 0.7919, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7739628040057225, | |
| "grad_norm": 1.7528146505355835, | |
| "learning_rate": 2.1851239001151045e-06, | |
| "loss": 0.8441, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.7753934191702432, | |
| "grad_norm": 2.2587413787841797, | |
| "learning_rate": 2.181941289226724e-06, | |
| "loss": 0.7683, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.776824034334764, | |
| "grad_norm": 1.5743305683135986, | |
| "learning_rate": 2.178754803837983e-06, | |
| "loss": 0.7909, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.7782546494992847, | |
| "grad_norm": 1.3611418008804321, | |
| "learning_rate": 2.1755644620531374e-06, | |
| "loss": 0.7889, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.7796852646638054, | |
| "grad_norm": 4.035607814788818, | |
| "learning_rate": 2.172370281998352e-06, | |
| "loss": 0.8698, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.7811158798283262, | |
| "grad_norm": 2.1121273040771484, | |
| "learning_rate": 2.169172281821599e-06, | |
| "loss": 0.8374, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.7825464949928469, | |
| "grad_norm": 2.133861541748047, | |
| "learning_rate": 2.1659704796925556e-06, | |
| "loss": 0.7605, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.7839771101573677, | |
| "grad_norm": 3.578118085861206, | |
| "learning_rate": 2.1627648938024992e-06, | |
| "loss": 0.7709, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.7854077253218884, | |
| "grad_norm": 2.089550018310547, | |
| "learning_rate": 2.1595555423642063e-06, | |
| "loss": 0.8255, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.7868383404864091, | |
| "grad_norm": 11.813118934631348, | |
| "learning_rate": 2.1563424436118457e-06, | |
| "loss": 0.7723, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.7882689556509299, | |
| "grad_norm": 3.343435764312744, | |
| "learning_rate": 2.153125615800879e-06, | |
| "loss": 0.7733, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.7896995708154506, | |
| "grad_norm": 2.129638433456421, | |
| "learning_rate": 2.149905077207953e-06, | |
| "loss": 0.8172, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.7911301859799714, | |
| "grad_norm": 4.0213518142700195, | |
| "learning_rate": 2.146680846130798e-06, | |
| "loss": 0.7916, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.7925608011444921, | |
| "grad_norm": 2.1677591800689697, | |
| "learning_rate": 2.1434529408881236e-06, | |
| "loss": 0.7638, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.7939914163090128, | |
| "grad_norm": 16.7386417388916, | |
| "learning_rate": 2.1402213798195154e-06, | |
| "loss": 0.8264, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.7954220314735336, | |
| "grad_norm": 10.93736457824707, | |
| "learning_rate": 2.136986181285328e-06, | |
| "loss": 0.7442, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.7968526466380543, | |
| "grad_norm": 1.9464385509490967, | |
| "learning_rate": 2.133747363666584e-06, | |
| "loss": 0.7404, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.7982832618025751, | |
| "grad_norm": 4.377130031585693, | |
| "learning_rate": 2.130504945364867e-06, | |
| "loss": 0.8033, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.7997138769670958, | |
| "grad_norm": 1.937048077583313, | |
| "learning_rate": 2.127258944802219e-06, | |
| "loss": 0.6928, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.8011444921316166, | |
| "grad_norm": 14.530089378356934, | |
| "learning_rate": 2.124009380421035e-06, | |
| "loss": 0.7674, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.8025751072961373, | |
| "grad_norm": 2.9048678874969482, | |
| "learning_rate": 2.1207562706839576e-06, | |
| "loss": 0.8203, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.804005722460658, | |
| "grad_norm": 1.4509227275848389, | |
| "learning_rate": 2.117499634073772e-06, | |
| "loss": 0.8966, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.8054363376251789, | |
| "grad_norm": 3.235529661178589, | |
| "learning_rate": 2.114239489093303e-06, | |
| "loss": 0.873, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.8068669527896996, | |
| "grad_norm": 2.0383121967315674, | |
| "learning_rate": 2.110975854265307e-06, | |
| "loss": 0.7683, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.8082975679542204, | |
| "grad_norm": 7.3494086265563965, | |
| "learning_rate": 2.10770874813237e-06, | |
| "loss": 0.8416, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.8097281831187411, | |
| "grad_norm": 5.675449848175049, | |
| "learning_rate": 2.104438189256799e-06, | |
| "loss": 0.7911, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.8111587982832618, | |
| "grad_norm": 4.777390480041504, | |
| "learning_rate": 2.1011641962205187e-06, | |
| "loss": 0.8528, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.8125894134477826, | |
| "grad_norm": 2.8145751953125, | |
| "learning_rate": 2.0978867876249645e-06, | |
| "loss": 0.7943, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.8140200286123033, | |
| "grad_norm": 5.517285346984863, | |
| "learning_rate": 2.0946059820909782e-06, | |
| "loss": 0.8388, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.8154506437768241, | |
| "grad_norm": 1.663217306137085, | |
| "learning_rate": 2.0913217982587015e-06, | |
| "loss": 0.8075, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8168812589413448, | |
| "grad_norm": 2.412997245788574, | |
| "learning_rate": 2.088034254787471e-06, | |
| "loss": 0.8201, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.8183118741058655, | |
| "grad_norm": 2.8591926097869873, | |
| "learning_rate": 2.0847433703557086e-06, | |
| "loss": 0.7948, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.8197424892703863, | |
| "grad_norm": 1.8389852046966553, | |
| "learning_rate": 2.0814491636608215e-06, | |
| "loss": 0.8375, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.821173104434907, | |
| "grad_norm": 1.8744090795516968, | |
| "learning_rate": 2.0781516534190904e-06, | |
| "loss": 0.8258, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.8226037195994278, | |
| "grad_norm": 1.6970771551132202, | |
| "learning_rate": 2.0748508583655664e-06, | |
| "loss": 0.7844, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.8240343347639485, | |
| "grad_norm": 2.463869333267212, | |
| "learning_rate": 2.0715467972539623e-06, | |
| "loss": 0.7811, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.8254649499284692, | |
| "grad_norm": 5.908227443695068, | |
| "learning_rate": 2.068239488856549e-06, | |
| "loss": 0.7585, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.82689556509299, | |
| "grad_norm": 2.3920488357543945, | |
| "learning_rate": 2.0649289519640455e-06, | |
| "loss": 0.7492, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.8283261802575107, | |
| "grad_norm": 3.978250503540039, | |
| "learning_rate": 2.0616152053855146e-06, | |
| "loss": 0.7396, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.8297567954220315, | |
| "grad_norm": 7.113371849060059, | |
| "learning_rate": 2.0582982679482547e-06, | |
| "loss": 0.8467, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.8311874105865522, | |
| "grad_norm": 2.1145403385162354, | |
| "learning_rate": 2.0549781584976937e-06, | |
| "loss": 0.8825, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.8326180257510729, | |
| "grad_norm": 1.185595989227295, | |
| "learning_rate": 2.0516548958972816e-06, | |
| "loss": 0.769, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.8340486409155937, | |
| "grad_norm": 2.6032955646514893, | |
| "learning_rate": 2.0483284990283833e-06, | |
| "loss": 0.791, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.8354792560801144, | |
| "grad_norm": 2.5867621898651123, | |
| "learning_rate": 2.0449989867901698e-06, | |
| "loss": 0.8191, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.8369098712446352, | |
| "grad_norm": 5.271536827087402, | |
| "learning_rate": 2.041666378099515e-06, | |
| "loss": 0.777, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.8383404864091559, | |
| "grad_norm": 1.7477766275405884, | |
| "learning_rate": 2.0383306918908827e-06, | |
| "loss": 0.7011, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.8397711015736766, | |
| "grad_norm": 3.4318323135375977, | |
| "learning_rate": 2.0349919471162245e-06, | |
| "loss": 0.867, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.8412017167381974, | |
| "grad_norm": 8.079400062561035, | |
| "learning_rate": 2.031650162744867e-06, | |
| "loss": 0.8089, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.8426323319027181, | |
| "grad_norm": 2.9076359272003174, | |
| "learning_rate": 2.028305357763408e-06, | |
| "loss": 0.8009, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.844062947067239, | |
| "grad_norm": 1.9895862340927124, | |
| "learning_rate": 2.024957551175607e-06, | |
| "loss": 0.9391, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.8454935622317596, | |
| "grad_norm": 1.4979381561279297, | |
| "learning_rate": 2.0216067620022773e-06, | |
| "loss": 0.6863, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.8469241773962805, | |
| "grad_norm": 1.5766220092773438, | |
| "learning_rate": 2.0182530092811776e-06, | |
| "loss": 0.8043, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.8483547925608012, | |
| "grad_norm": 62.486412048339844, | |
| "learning_rate": 2.0148963120669043e-06, | |
| "loss": 0.7341, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.8497854077253219, | |
| "grad_norm": 6.319479942321777, | |
| "learning_rate": 2.0115366894307833e-06, | |
| "loss": 0.8319, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.8512160228898427, | |
| "grad_norm": 4.788107395172119, | |
| "learning_rate": 2.0081741604607617e-06, | |
| "loss": 0.8415, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.8526466380543634, | |
| "grad_norm": 5.281350135803223, | |
| "learning_rate": 2.004808744261299e-06, | |
| "loss": 0.8006, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.8540772532188842, | |
| "grad_norm": 8.550089836120605, | |
| "learning_rate": 2.001440459953258e-06, | |
| "loss": 0.8473, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.8555078683834049, | |
| "grad_norm": 2.373152732849121, | |
| "learning_rate": 1.998069326673798e-06, | |
| "loss": 0.7599, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.8569384835479256, | |
| "grad_norm": 1.6767165660858154, | |
| "learning_rate": 1.994695363576265e-06, | |
| "loss": 0.7986, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.8583690987124464, | |
| "grad_norm": 2.729363441467285, | |
| "learning_rate": 1.991318589830081e-06, | |
| "loss": 0.8142, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8597997138769671, | |
| "grad_norm": 9.5516357421875, | |
| "learning_rate": 1.9879390246206394e-06, | |
| "loss": 0.7423, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.8612303290414879, | |
| "grad_norm": 1.492181420326233, | |
| "learning_rate": 1.9845566871491923e-06, | |
| "loss": 0.8123, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.8626609442060086, | |
| "grad_norm": 1.3610990047454834, | |
| "learning_rate": 1.9811715966327413e-06, | |
| "loss": 0.7944, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.8640915593705293, | |
| "grad_norm": 2.7227566242218018, | |
| "learning_rate": 1.9777837723039323e-06, | |
| "loss": 0.8195, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.8655221745350501, | |
| "grad_norm": 6.30021858215332, | |
| "learning_rate": 1.9743932334109423e-06, | |
| "loss": 0.774, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.8669527896995708, | |
| "grad_norm": 2.1827492713928223, | |
| "learning_rate": 1.97099999921737e-06, | |
| "loss": 0.7981, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.8683834048640916, | |
| "grad_norm": 1.8844138383865356, | |
| "learning_rate": 1.96760408900213e-06, | |
| "loss": 0.7882, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.8698140200286123, | |
| "grad_norm": 3.7884268760681152, | |
| "learning_rate": 1.9642055220593394e-06, | |
| "loss": 0.7905, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.871244635193133, | |
| "grad_norm": 1.2026420831680298, | |
| "learning_rate": 1.9608043176982095e-06, | |
| "loss": 0.8302, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.8726752503576538, | |
| "grad_norm": 3.9259285926818848, | |
| "learning_rate": 1.957400495242938e-06, | |
| "loss": 0.775, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.8741058655221745, | |
| "grad_norm": 2.2979843616485596, | |
| "learning_rate": 1.9539940740325953e-06, | |
| "loss": 0.8282, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.8755364806866953, | |
| "grad_norm": 25.16666603088379, | |
| "learning_rate": 1.950585073421018e-06, | |
| "loss": 0.7903, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.876967095851216, | |
| "grad_norm": 2.016211748123169, | |
| "learning_rate": 1.947173512776699e-06, | |
| "loss": 0.7878, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.8783977110157367, | |
| "grad_norm": 3.2067463397979736, | |
| "learning_rate": 1.9437594114826734e-06, | |
| "loss": 0.7854, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.8798283261802575, | |
| "grad_norm": 4.444864273071289, | |
| "learning_rate": 1.940342788936413e-06, | |
| "loss": 0.844, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.8812589413447782, | |
| "grad_norm": 3.628343105316162, | |
| "learning_rate": 1.9369236645497137e-06, | |
| "loss": 0.7698, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.882689556509299, | |
| "grad_norm": 1.8619632720947266, | |
| "learning_rate": 1.933502057748587e-06, | |
| "loss": 0.7731, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.8841201716738197, | |
| "grad_norm": 4.017360210418701, | |
| "learning_rate": 1.9300779879731462e-06, | |
| "loss": 0.8335, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.8855507868383404, | |
| "grad_norm": 4.365695953369141, | |
| "learning_rate": 1.9266514746775006e-06, | |
| "loss": 0.7448, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.8869814020028612, | |
| "grad_norm": 3.6699016094207764, | |
| "learning_rate": 1.9232225373296406e-06, | |
| "loss": 0.8343, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.8884120171673819, | |
| "grad_norm": 0.9214816093444824, | |
| "learning_rate": 1.9197911954113295e-06, | |
| "loss": 0.7744, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.8898426323319027, | |
| "grad_norm": 9.310022354125977, | |
| "learning_rate": 1.916357468417994e-06, | |
| "loss": 0.8854, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.8912732474964234, | |
| "grad_norm": 1.421976923942566, | |
| "learning_rate": 1.9129213758586094e-06, | |
| "loss": 0.8246, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.8927038626609443, | |
| "grad_norm": 1.6473592519760132, | |
| "learning_rate": 1.909482937255592e-06, | |
| "loss": 0.8423, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.894134477825465, | |
| "grad_norm": 3.704306125640869, | |
| "learning_rate": 1.9060421721446884e-06, | |
| "loss": 0.8118, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.8955650929899857, | |
| "grad_norm": 22.0517635345459, | |
| "learning_rate": 1.9025991000748615e-06, | |
| "loss": 0.8045, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.8969957081545065, | |
| "grad_norm": 3.9099020957946777, | |
| "learning_rate": 1.8991537406081833e-06, | |
| "loss": 0.8319, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.8984263233190272, | |
| "grad_norm": 1.8165937662124634, | |
| "learning_rate": 1.8957061133197202e-06, | |
| "loss": 0.7867, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.899856938483548, | |
| "grad_norm": 1.5057600736618042, | |
| "learning_rate": 1.8922562377974244e-06, | |
| "loss": 0.8217, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.9012875536480687, | |
| "grad_norm": 3.3929216861724854, | |
| "learning_rate": 1.8888041336420212e-06, | |
| "loss": 0.7126, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.9027181688125894, | |
| "grad_norm": 1.0596497058868408, | |
| "learning_rate": 1.8853498204668986e-06, | |
| "loss": 0.7926, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.9041487839771102, | |
| "grad_norm": 5.535174369812012, | |
| "learning_rate": 1.881893317897994e-06, | |
| "loss": 0.749, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.9055793991416309, | |
| "grad_norm": 5.7785964012146, | |
| "learning_rate": 1.8784346455736855e-06, | |
| "loss": 0.8318, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.9070100143061517, | |
| "grad_norm": 1.2321951389312744, | |
| "learning_rate": 1.8749738231446784e-06, | |
| "loss": 0.8232, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.9084406294706724, | |
| "grad_norm": 3.309943199157715, | |
| "learning_rate": 1.8715108702738928e-06, | |
| "loss": 0.8027, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.9098712446351931, | |
| "grad_norm": 2.805023193359375, | |
| "learning_rate": 1.8680458066363548e-06, | |
| "loss": 0.7425, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.9113018597997139, | |
| "grad_norm": 1.852483868598938, | |
| "learning_rate": 1.8645786519190823e-06, | |
| "loss": 0.7809, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.9127324749642346, | |
| "grad_norm": 1.6780593395233154, | |
| "learning_rate": 1.8611094258209734e-06, | |
| "loss": 0.7843, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.9141630901287554, | |
| "grad_norm": 1.102247953414917, | |
| "learning_rate": 1.857638148052695e-06, | |
| "loss": 0.7515, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.9155937052932761, | |
| "grad_norm": 9.121733665466309, | |
| "learning_rate": 1.8541648383365718e-06, | |
| "loss": 0.7945, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9170243204577968, | |
| "grad_norm": 19.972715377807617, | |
| "learning_rate": 1.8506895164064718e-06, | |
| "loss": 0.8476, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.9184549356223176, | |
| "grad_norm": 3.2186429500579834, | |
| "learning_rate": 1.8472122020076958e-06, | |
| "loss": 0.6715, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.9198855507868383, | |
| "grad_norm": 6.097784042358398, | |
| "learning_rate": 1.8437329148968656e-06, | |
| "loss": 0.7966, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.9213161659513591, | |
| "grad_norm": 2.0366463661193848, | |
| "learning_rate": 1.8402516748418104e-06, | |
| "loss": 0.8192, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.9227467811158798, | |
| "grad_norm": 2.5847008228302, | |
| "learning_rate": 1.8367685016214566e-06, | |
| "loss": 0.7565, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.9241773962804005, | |
| "grad_norm": 9.477577209472656, | |
| "learning_rate": 1.8332834150257114e-06, | |
| "loss": 0.8442, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.9256080114449213, | |
| "grad_norm": 7.726278781890869, | |
| "learning_rate": 1.8297964348553555e-06, | |
| "loss": 0.6881, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.927038626609442, | |
| "grad_norm": 3.332657814025879, | |
| "learning_rate": 1.8263075809219276e-06, | |
| "loss": 0.8475, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.9284692417739628, | |
| "grad_norm": 3.3939545154571533, | |
| "learning_rate": 1.8228168730476105e-06, | |
| "loss": 0.7308, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.9298998569384835, | |
| "grad_norm": 1.3222719430923462, | |
| "learning_rate": 1.8193243310651228e-06, | |
| "loss": 0.7714, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9313304721030042, | |
| "grad_norm": 5.846932888031006, | |
| "learning_rate": 1.8158299748176019e-06, | |
| "loss": 0.7393, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.932761087267525, | |
| "grad_norm": 1.6963729858398438, | |
| "learning_rate": 1.812333824158494e-06, | |
| "loss": 0.756, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.9341917024320457, | |
| "grad_norm": 1.2512105703353882, | |
| "learning_rate": 1.8088358989514405e-06, | |
| "loss": 0.8292, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.9356223175965666, | |
| "grad_norm": 4.08266544342041, | |
| "learning_rate": 1.805336219070164e-06, | |
| "loss": 0.7543, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.9370529327610873, | |
| "grad_norm": 2.852705955505371, | |
| "learning_rate": 1.8018348043983574e-06, | |
| "loss": 0.7735, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.9384835479256081, | |
| "grad_norm": 1.9104331731796265, | |
| "learning_rate": 1.79833167482957e-06, | |
| "loss": 0.7555, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.9399141630901288, | |
| "grad_norm": 2.230699300765991, | |
| "learning_rate": 1.7948268502670936e-06, | |
| "loss": 0.8005, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.9413447782546495, | |
| "grad_norm": 1.6662317514419556, | |
| "learning_rate": 1.7913203506238506e-06, | |
| "loss": 0.922, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.9427753934191703, | |
| "grad_norm": 6.263296604156494, | |
| "learning_rate": 1.787812195822281e-06, | |
| "loss": 0.8096, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.944206008583691, | |
| "grad_norm": 3.0145373344421387, | |
| "learning_rate": 1.7843024057942278e-06, | |
| "loss": 0.7369, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.9456366237482118, | |
| "grad_norm": 2.3436765670776367, | |
| "learning_rate": 1.7807910004808256e-06, | |
| "loss": 0.761, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.9470672389127325, | |
| "grad_norm": 1.2780580520629883, | |
| "learning_rate": 1.7772779998323859e-06, | |
| "loss": 0.8346, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.9484978540772532, | |
| "grad_norm": 3.3852529525756836, | |
| "learning_rate": 1.7737634238082838e-06, | |
| "loss": 0.7956, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.949928469241774, | |
| "grad_norm": 4.87917947769165, | |
| "learning_rate": 1.7702472923768456e-06, | |
| "loss": 0.8228, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.9513590844062947, | |
| "grad_norm": 18.506868362426758, | |
| "learning_rate": 1.766729625515235e-06, | |
| "loss": 0.7943, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.9527896995708155, | |
| "grad_norm": 4.777498245239258, | |
| "learning_rate": 1.7632104432093383e-06, | |
| "loss": 0.7994, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.9542203147353362, | |
| "grad_norm": 1.219874382019043, | |
| "learning_rate": 1.7596897654536527e-06, | |
| "loss": 0.8897, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.9556509298998569, | |
| "grad_norm": 1.1841962337493896, | |
| "learning_rate": 1.7561676122511722e-06, | |
| "loss": 0.8273, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.9570815450643777, | |
| "grad_norm": 3.4952194690704346, | |
| "learning_rate": 1.7526440036132735e-06, | |
| "loss": 0.766, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.9585121602288984, | |
| "grad_norm": 1.1049143075942993, | |
| "learning_rate": 1.749118959559601e-06, | |
| "loss": 0.7345, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.9599427753934192, | |
| "grad_norm": 1.2833698987960815, | |
| "learning_rate": 1.745592500117957e-06, | |
| "loss": 0.806, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.9613733905579399, | |
| "grad_norm": 4.3774518966674805, | |
| "learning_rate": 1.742064645324183e-06, | |
| "loss": 0.7199, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.9628040057224606, | |
| "grad_norm": 4.67322301864624, | |
| "learning_rate": 1.7385354152220507e-06, | |
| "loss": 0.8035, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.9642346208869814, | |
| "grad_norm": 5.434276580810547, | |
| "learning_rate": 1.7350048298631435e-06, | |
| "loss": 0.8651, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.9656652360515021, | |
| "grad_norm": 2.621474027633667, | |
| "learning_rate": 1.731472909306746e-06, | |
| "loss": 0.772, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.9670958512160229, | |
| "grad_norm": 2.7498602867126465, | |
| "learning_rate": 1.7279396736197291e-06, | |
| "loss": 0.7756, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.9685264663805436, | |
| "grad_norm": 3.2077572345733643, | |
| "learning_rate": 1.7244051428764343e-06, | |
| "loss": 0.7203, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.9699570815450643, | |
| "grad_norm": 3.252988338470459, | |
| "learning_rate": 1.7208693371585628e-06, | |
| "loss": 0.8783, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.9713876967095851, | |
| "grad_norm": 2.9252920150756836, | |
| "learning_rate": 1.7173322765550588e-06, | |
| "loss": 0.7418, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.9728183118741058, | |
| "grad_norm": 3.183591842651367, | |
| "learning_rate": 1.7137939811619956e-06, | |
| "loss": 0.7614, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.9742489270386266, | |
| "grad_norm": 3.029395341873169, | |
| "learning_rate": 1.7102544710824628e-06, | |
| "loss": 0.8751, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.9756795422031473, | |
| "grad_norm": 5.665907382965088, | |
| "learning_rate": 1.7067137664264521e-06, | |
| "loss": 0.8122, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.977110157367668, | |
| "grad_norm": 10.361516952514648, | |
| "learning_rate": 1.7031718873107404e-06, | |
| "loss": 0.8093, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.9785407725321889, | |
| "grad_norm": 10.015640258789062, | |
| "learning_rate": 1.699628853858779e-06, | |
| "loss": 0.8042, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.9799713876967096, | |
| "grad_norm": 2.4526281356811523, | |
| "learning_rate": 1.6960846862005769e-06, | |
| "loss": 0.6861, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.9814020028612304, | |
| "grad_norm": 4.162567138671875, | |
| "learning_rate": 1.692539404472587e-06, | |
| "loss": 0.7906, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.9828326180257511, | |
| "grad_norm": 1.5269864797592163, | |
| "learning_rate": 1.6889930288175922e-06, | |
| "loss": 0.8598, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.9842632331902719, | |
| "grad_norm": 4.929915428161621, | |
| "learning_rate": 1.6854455793845915e-06, | |
| "loss": 0.785, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.9856938483547926, | |
| "grad_norm": 3.590336322784424, | |
| "learning_rate": 1.6818970763286826e-06, | |
| "loss": 0.774, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.9871244635193133, | |
| "grad_norm": 8.861334800720215, | |
| "learning_rate": 1.6783475398109513e-06, | |
| "loss": 0.7606, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.9885550786838341, | |
| "grad_norm": 1.1489014625549316, | |
| "learning_rate": 1.6747969899983546e-06, | |
| "loss": 0.8077, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.9899856938483548, | |
| "grad_norm": 2.970811367034912, | |
| "learning_rate": 1.6712454470636052e-06, | |
| "loss": 0.6827, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.9914163090128756, | |
| "grad_norm": 2.4784224033355713, | |
| "learning_rate": 1.6676929311850608e-06, | |
| "loss": 0.7306, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.9928469241773963, | |
| "grad_norm": 1.8776549100875854, | |
| "learning_rate": 1.6641394625466055e-06, | |
| "loss": 0.7379, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.994277539341917, | |
| "grad_norm": 1.7985637187957764, | |
| "learning_rate": 1.6605850613375356e-06, | |
| "loss": 0.7949, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.9957081545064378, | |
| "grad_norm": 3.027981996536255, | |
| "learning_rate": 1.6570297477524488e-06, | |
| "loss": 0.8686, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.9971387696709585, | |
| "grad_norm": 1.519041657447815, | |
| "learning_rate": 1.6534735419911228e-06, | |
| "loss": 0.7968, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.9985693848354793, | |
| "grad_norm": 3.942765712738037, | |
| "learning_rate": 1.6499164642584074e-06, | |
| "loss": 0.7562, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.1095448732376099, | |
| "learning_rate": 1.6463585347641054e-06, | |
| "loss": 0.8442, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.0014306151645207, | |
| "grad_norm": 1.9735312461853027, | |
| "learning_rate": 1.6427997737228582e-06, | |
| "loss": 0.7842, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0014306151645207, | |
| "eval_loss": 0.9359034895896912, | |
| "eval_runtime": 64.0219, | |
| "eval_samples_per_second": 6.482, | |
| "eval_steps_per_second": 0.406, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0028612303290414, | |
| "grad_norm": 4.721752643585205, | |
| "learning_rate": 1.6392402013540328e-06, | |
| "loss": 0.8099, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.0042918454935623, | |
| "grad_norm": 2.144127130508423, | |
| "learning_rate": 1.635679837881606e-06, | |
| "loss": 0.8072, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.005722460658083, | |
| "grad_norm": 1.4669064283370972, | |
| "learning_rate": 1.6321187035340477e-06, | |
| "loss": 0.7411, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.0071530758226037, | |
| "grad_norm": 3.2362279891967773, | |
| "learning_rate": 1.6285568185442092e-06, | |
| "loss": 0.7697, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.0085836909871244, | |
| "grad_norm": 3.9374539852142334, | |
| "learning_rate": 1.6249942031492063e-06, | |
| "loss": 0.8036, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.0100143061516451, | |
| "grad_norm": 4.1698126792907715, | |
| "learning_rate": 1.6214308775903035e-06, | |
| "loss": 0.8324, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.011444921316166, | |
| "grad_norm": 2.475919246673584, | |
| "learning_rate": 1.6178668621128018e-06, | |
| "loss": 0.7851, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.0128755364806867, | |
| "grad_norm": 9.091358184814453, | |
| "learning_rate": 1.6143021769659212e-06, | |
| "loss": 0.7688, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.0143061516452074, | |
| "grad_norm": 1.0087482929229736, | |
| "learning_rate": 1.6107368424026866e-06, | |
| "loss": 0.8104, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.0157367668097281, | |
| "grad_norm": 4.268504619598389, | |
| "learning_rate": 1.6071708786798126e-06, | |
| "loss": 0.8231, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.0171673819742488, | |
| "grad_norm": 3.690303087234497, | |
| "learning_rate": 1.6036043060575882e-06, | |
| "loss": 0.7511, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.0185979971387698, | |
| "grad_norm": 3.737053871154785, | |
| "learning_rate": 1.6000371447997617e-06, | |
| "loss": 0.8103, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.0200286123032904, | |
| "grad_norm": 2.4901950359344482, | |
| "learning_rate": 1.596469415173427e-06, | |
| "loss": 0.8233, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.0214592274678111, | |
| "grad_norm": 108.90562438964844, | |
| "learning_rate": 1.5929011374489059e-06, | |
| "loss": 0.7623, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.0228898426323318, | |
| "grad_norm": 3.225177049636841, | |
| "learning_rate": 1.5893323318996348e-06, | |
| "loss": 0.8646, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.0243204577968525, | |
| "grad_norm": 7.861708164215088, | |
| "learning_rate": 1.5857630188020494e-06, | |
| "loss": 0.8483, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.0257510729613735, | |
| "grad_norm": 2.513399600982666, | |
| "learning_rate": 1.5821932184354677e-06, | |
| "loss": 0.8675, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.0271816881258942, | |
| "grad_norm": 3.3864715099334717, | |
| "learning_rate": 1.5786229510819777e-06, | |
| "loss": 0.8231, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.0286123032904149, | |
| "grad_norm": 8.62854290008545, | |
| "learning_rate": 1.5750522370263203e-06, | |
| "loss": 0.7884, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.0300429184549356, | |
| "grad_norm": 4.026301383972168, | |
| "learning_rate": 1.5714810965557728e-06, | |
| "loss": 0.7832, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.0314735336194563, | |
| "grad_norm": 5.8504438400268555, | |
| "learning_rate": 1.5679095499600376e-06, | |
| "loss": 0.8102, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.0329041487839772, | |
| "grad_norm": 3.6803553104400635, | |
| "learning_rate": 1.5643376175311233e-06, | |
| "loss": 0.7454, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.0343347639484979, | |
| "grad_norm": 5.682314395904541, | |
| "learning_rate": 1.5607653195632304e-06, | |
| "loss": 0.7855, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.0357653791130186, | |
| "grad_norm": 8.800222396850586, | |
| "learning_rate": 1.5571926763526365e-06, | |
| "loss": 0.7561, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.0371959942775393, | |
| "grad_norm": 2.693606376647949, | |
| "learning_rate": 1.5536197081975814e-06, | |
| "loss": 0.8077, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.0386266094420602, | |
| "grad_norm": 7.366818428039551, | |
| "learning_rate": 1.5500464353981495e-06, | |
| "loss": 0.758, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.040057224606581, | |
| "grad_norm": 2.4745495319366455, | |
| "learning_rate": 1.5464728782561578e-06, | |
| "loss": 0.8134, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.0414878397711016, | |
| "grad_norm": 4.274849891662598, | |
| "learning_rate": 1.542899057075038e-06, | |
| "loss": 0.7351, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.0429184549356223, | |
| "grad_norm": 2.3312735557556152, | |
| "learning_rate": 1.5393249921597215e-06, | |
| "loss": 0.7486, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.044349070100143, | |
| "grad_norm": 2.961493492126465, | |
| "learning_rate": 1.5357507038165258e-06, | |
| "loss": 0.8082, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.0457796852646637, | |
| "grad_norm": 3.3071088790893555, | |
| "learning_rate": 1.5321762123530366e-06, | |
| "loss": 0.8408, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.0472103004291846, | |
| "grad_norm": 3.7048757076263428, | |
| "learning_rate": 1.5286015380779939e-06, | |
| "loss": 0.6624, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.0486409155937053, | |
| "grad_norm": 2.3817408084869385, | |
| "learning_rate": 1.525026701301177e-06, | |
| "loss": 0.7843, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.050071530758226, | |
| "grad_norm": 1.2996212244033813, | |
| "learning_rate": 1.5214517223332873e-06, | |
| "loss": 0.6905, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.0515021459227467, | |
| "grad_norm": 2.558300018310547, | |
| "learning_rate": 1.5178766214858356e-06, | |
| "loss": 0.7479, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.0529327610872676, | |
| "grad_norm": 3.06276273727417, | |
| "learning_rate": 1.5143014190710241e-06, | |
| "loss": 0.826, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.0543633762517883, | |
| "grad_norm": 9.476898193359375, | |
| "learning_rate": 1.5107261354016317e-06, | |
| "loss": 0.8496, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.055793991416309, | |
| "grad_norm": 1.7778562307357788, | |
| "learning_rate": 1.5071507907909004e-06, | |
| "loss": 0.7557, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.0572246065808297, | |
| "grad_norm": 1.8848568201065063, | |
| "learning_rate": 1.503575405552417e-06, | |
| "loss": 0.8162, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.0586552217453504, | |
| "grad_norm": 0.8061392307281494, | |
| "learning_rate": 1.5e-06, | |
| "loss": 0.7872, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.0600858369098713, | |
| "grad_norm": 5.786372661590576, | |
| "learning_rate": 1.496424594447583e-06, | |
| "loss": 0.8272, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.061516452074392, | |
| "grad_norm": 1.484350323677063, | |
| "learning_rate": 1.4928492092091e-06, | |
| "loss": 0.7515, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.0629470672389127, | |
| "grad_norm": 3.867645502090454, | |
| "learning_rate": 1.4892738645983686e-06, | |
| "loss": 0.8213, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.0643776824034334, | |
| "grad_norm": 2.6978371143341064, | |
| "learning_rate": 1.4856985809289764e-06, | |
| "loss": 0.7573, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.0658082975679541, | |
| "grad_norm": 5.597418785095215, | |
| "learning_rate": 1.4821233785141647e-06, | |
| "loss": 0.7814, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.067238912732475, | |
| "grad_norm": 2.4046719074249268, | |
| "learning_rate": 1.4785482776667128e-06, | |
| "loss": 0.8052, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.0686695278969958, | |
| "grad_norm": 2.482250452041626, | |
| "learning_rate": 1.4749732986988233e-06, | |
| "loss": 0.7652, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.0701001430615165, | |
| "grad_norm": 5.594193935394287, | |
| "learning_rate": 1.4713984619220064e-06, | |
| "loss": 0.6645, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.0715307582260372, | |
| "grad_norm": 3.4292051792144775, | |
| "learning_rate": 1.4678237876469637e-06, | |
| "loss": 0.7883, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.0729613733905579, | |
| "grad_norm": 0.7807212471961975, | |
| "learning_rate": 1.4642492961834743e-06, | |
| "loss": 0.78, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.0743919885550788, | |
| "grad_norm": 5.383660316467285, | |
| "learning_rate": 1.4606750078402786e-06, | |
| "loss": 0.7539, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.0758226037195995, | |
| "grad_norm": 4.694250583648682, | |
| "learning_rate": 1.4571009429249621e-06, | |
| "loss": 0.7208, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.0772532188841202, | |
| "grad_norm": 1.829797387123108, | |
| "learning_rate": 1.4535271217438427e-06, | |
| "loss": 0.763, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.0786838340486409, | |
| "grad_norm": 20.187421798706055, | |
| "learning_rate": 1.4499535646018508e-06, | |
| "loss": 0.7726, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.0801144492131616, | |
| "grad_norm": 3.4587745666503906, | |
| "learning_rate": 1.446380291802419e-06, | |
| "loss": 0.7618, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.0815450643776825, | |
| "grad_norm": 2.4537343978881836, | |
| "learning_rate": 1.4428073236473637e-06, | |
| "loss": 0.8274, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.0829756795422032, | |
| "grad_norm": 4.690003395080566, | |
| "learning_rate": 1.4392346804367697e-06, | |
| "loss": 0.7229, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.084406294706724, | |
| "grad_norm": 2.620816946029663, | |
| "learning_rate": 1.4356623824688768e-06, | |
| "loss": 0.7523, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.0858369098712446, | |
| "grad_norm": 2.812201499938965, | |
| "learning_rate": 1.4320904500399625e-06, | |
| "loss": 0.7251, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.0872675250357653, | |
| "grad_norm": 1.717846393585205, | |
| "learning_rate": 1.4285189034442273e-06, | |
| "loss": 0.81, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.0886981402002862, | |
| "grad_norm": 3.324570655822754, | |
| "learning_rate": 1.4249477629736802e-06, | |
| "loss": 0.7907, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.090128755364807, | |
| "grad_norm": 3.35800838470459, | |
| "learning_rate": 1.4213770489180224e-06, | |
| "loss": 0.7245, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.0915593705293276, | |
| "grad_norm": 3.3062188625335693, | |
| "learning_rate": 1.4178067815645326e-06, | |
| "loss": 0.7933, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.0929899856938483, | |
| "grad_norm": 16.04672622680664, | |
| "learning_rate": 1.414236981197951e-06, | |
| "loss": 0.7359, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.094420600858369, | |
| "grad_norm": 1.6228106021881104, | |
| "learning_rate": 1.4106676681003653e-06, | |
| "loss": 0.806, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.09585121602289, | |
| "grad_norm": 5.070892333984375, | |
| "learning_rate": 1.4070988625510942e-06, | |
| "loss": 0.784, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.0972818311874106, | |
| "grad_norm": 9.049479484558105, | |
| "learning_rate": 1.403530584826573e-06, | |
| "loss": 0.7501, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.0987124463519313, | |
| "grad_norm": 2.303457260131836, | |
| "learning_rate": 1.3999628552002386e-06, | |
| "loss": 0.7539, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.100143061516452, | |
| "grad_norm": 4.238282680511475, | |
| "learning_rate": 1.3963956939424123e-06, | |
| "loss": 0.7909, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.1015736766809727, | |
| "grad_norm": 5.631208419799805, | |
| "learning_rate": 1.3928291213201877e-06, | |
| "loss": 0.8202, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.1030042918454936, | |
| "grad_norm": 1.7331924438476562, | |
| "learning_rate": 1.3892631575973137e-06, | |
| "loss": 0.849, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.1044349070100143, | |
| "grad_norm": 4.782192707061768, | |
| "learning_rate": 1.3856978230340789e-06, | |
| "loss": 0.819, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.105865522174535, | |
| "grad_norm": 2.9614789485931396, | |
| "learning_rate": 1.3821331378871983e-06, | |
| "loss": 0.8061, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.1072961373390557, | |
| "grad_norm": 3.1825926303863525, | |
| "learning_rate": 1.3785691224096972e-06, | |
| "loss": 0.8027, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.1087267525035764, | |
| "grad_norm": 1.6604760885238647, | |
| "learning_rate": 1.3750057968507944e-06, | |
| "loss": 0.7238, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.1101573676680974, | |
| "grad_norm": 1.8294752836227417, | |
| "learning_rate": 1.3714431814557916e-06, | |
| "loss": 0.8283, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.111587982832618, | |
| "grad_norm": 6.401926517486572, | |
| "learning_rate": 1.3678812964659528e-06, | |
| "loss": 0.7288, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.1130185979971388, | |
| "grad_norm": 3.224818468093872, | |
| "learning_rate": 1.3643201621183948e-06, | |
| "loss": 0.8541, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.1144492131616595, | |
| "grad_norm": 1.71248459815979, | |
| "learning_rate": 1.3607597986459677e-06, | |
| "loss": 0.7835, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.1158798283261802, | |
| "grad_norm": 3.652742624282837, | |
| "learning_rate": 1.3572002262771425e-06, | |
| "loss": 0.8003, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.117310443490701, | |
| "grad_norm": 0.81279057264328, | |
| "learning_rate": 1.3536414652358953e-06, | |
| "loss": 0.7865, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.1187410586552218, | |
| "grad_norm": 6.66923713684082, | |
| "learning_rate": 1.3500835357415933e-06, | |
| "loss": 0.8885, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.1201716738197425, | |
| "grad_norm": 3.3524577617645264, | |
| "learning_rate": 1.3465264580088777e-06, | |
| "loss": 0.7786, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.1216022889842632, | |
| "grad_norm": 4.309314727783203, | |
| "learning_rate": 1.342970252247552e-06, | |
| "loss": 0.784, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.123032904148784, | |
| "grad_norm": 10.110477447509766, | |
| "learning_rate": 1.3394149386624647e-06, | |
| "loss": 0.7979, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.1244635193133048, | |
| "grad_norm": 1.8501849174499512, | |
| "learning_rate": 1.3358605374533952e-06, | |
| "loss": 0.8531, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.1258941344778255, | |
| "grad_norm": 2.0311262607574463, | |
| "learning_rate": 1.3323070688149395e-06, | |
| "loss": 0.7445, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.1273247496423462, | |
| "grad_norm": 1.5606796741485596, | |
| "learning_rate": 1.3287545529363951e-06, | |
| "loss": 0.7768, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.128755364806867, | |
| "grad_norm": 4.589614391326904, | |
| "learning_rate": 1.3252030100016462e-06, | |
| "loss": 0.7829, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.1301859799713876, | |
| "grad_norm": 1.5389312505722046, | |
| "learning_rate": 1.321652460189049e-06, | |
| "loss": 0.787, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.1316165951359085, | |
| "grad_norm": 2.4592175483703613, | |
| "learning_rate": 1.318102923671318e-06, | |
| "loss": 0.8379, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.1330472103004292, | |
| "grad_norm": 1.0238618850708008, | |
| "learning_rate": 1.314554420615409e-06, | |
| "loss": 0.7934, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.13447782546495, | |
| "grad_norm": 3.073195695877075, | |
| "learning_rate": 1.3110069711824081e-06, | |
| "loss": 0.8114, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.1359084406294706, | |
| "grad_norm": 1.4695512056350708, | |
| "learning_rate": 1.3074605955274136e-06, | |
| "loss": 0.7787, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.1373390557939915, | |
| "grad_norm": 2.683389663696289, | |
| "learning_rate": 1.3039153137994239e-06, | |
| "loss": 0.7827, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.1387696709585122, | |
| "grad_norm": 1.7253704071044922, | |
| "learning_rate": 1.3003711461412214e-06, | |
| "loss": 0.798, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.140200286123033, | |
| "grad_norm": 16.745397567749023, | |
| "learning_rate": 1.2968281126892603e-06, | |
| "loss": 0.7709, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.1416309012875536, | |
| "grad_norm": 2.683840751647949, | |
| "learning_rate": 1.2932862335735486e-06, | |
| "loss": 0.7775, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.1430615164520743, | |
| "grad_norm": 7.146876811981201, | |
| "learning_rate": 1.2897455289175373e-06, | |
| "loss": 0.8856, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.144492131616595, | |
| "grad_norm": 1.972984790802002, | |
| "learning_rate": 1.2862060188380051e-06, | |
| "loss": 0.7153, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.145922746781116, | |
| "grad_norm": 2.476194143295288, | |
| "learning_rate": 1.2826677234449419e-06, | |
| "loss": 0.8171, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.1473533619456366, | |
| "grad_norm": 2.416992425918579, | |
| "learning_rate": 1.2791306628414377e-06, | |
| "loss": 0.814, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.1487839771101573, | |
| "grad_norm": 10.751389503479004, | |
| "learning_rate": 1.275594857123566e-06, | |
| "loss": 0.7874, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.150214592274678, | |
| "grad_norm": 1.4024333953857422, | |
| "learning_rate": 1.2720603263802716e-06, | |
| "loss": 0.8824, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.151645207439199, | |
| "grad_norm": 1.4597464799880981, | |
| "learning_rate": 1.2685270906932546e-06, | |
| "loss": 0.7573, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.1530758226037197, | |
| "grad_norm": 2.488672971725464, | |
| "learning_rate": 1.2649951701368566e-06, | |
| "loss": 0.717, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.1545064377682404, | |
| "grad_norm": 10.042638778686523, | |
| "learning_rate": 1.2614645847779498e-06, | |
| "loss": 0.7655, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.155937052932761, | |
| "grad_norm": 5.5453901290893555, | |
| "learning_rate": 1.2579353546758169e-06, | |
| "loss": 0.707, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.1573676680972818, | |
| "grad_norm": 9.400655746459961, | |
| "learning_rate": 1.2544074998820431e-06, | |
| "loss": 0.8075, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.1587982832618025, | |
| "grad_norm": 1.1171351671218872, | |
| "learning_rate": 1.2508810404403991e-06, | |
| "loss": 0.7257, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.1602288984263234, | |
| "grad_norm": 1.9322105646133423, | |
| "learning_rate": 1.2473559963867266e-06, | |
| "loss": 0.6525, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.161659513590844, | |
| "grad_norm": 2.5018885135650635, | |
| "learning_rate": 1.2438323877488274e-06, | |
| "loss": 0.6813, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.1630901287553648, | |
| "grad_norm": 4.477802276611328, | |
| "learning_rate": 1.2403102345463473e-06, | |
| "loss": 0.7791, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.1645207439198855, | |
| "grad_norm": 1.7652959823608398, | |
| "learning_rate": 1.2367895567906618e-06, | |
| "loss": 0.7778, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.1659513590844064, | |
| "grad_norm": 1.8609610795974731, | |
| "learning_rate": 1.233270374484765e-06, | |
| "loss": 0.7831, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.167381974248927, | |
| "grad_norm": 5.632737636566162, | |
| "learning_rate": 1.2297527076231542e-06, | |
| "loss": 0.7406, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.1688125894134478, | |
| "grad_norm": 4.156643867492676, | |
| "learning_rate": 1.2262365761917163e-06, | |
| "loss": 0.8467, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.1702432045779685, | |
| "grad_norm": 6.219330310821533, | |
| "learning_rate": 1.2227220001676142e-06, | |
| "loss": 0.8302, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.1716738197424892, | |
| "grad_norm": 3.3409154415130615, | |
| "learning_rate": 1.2192089995191743e-06, | |
| "loss": 0.8674, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.17310443490701, | |
| "grad_norm": 3.474548101425171, | |
| "learning_rate": 1.2156975942057719e-06, | |
| "loss": 0.8351, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.1745350500715308, | |
| "grad_norm": 3.2273216247558594, | |
| "learning_rate": 1.212187804177719e-06, | |
| "loss": 0.857, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.1759656652360515, | |
| "grad_norm": 1.604404091835022, | |
| "learning_rate": 1.2086796493761495e-06, | |
| "loss": 0.8938, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.1773962804005722, | |
| "grad_norm": 1.4558448791503906, | |
| "learning_rate": 1.2051731497329063e-06, | |
| "loss": 0.7917, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.178826895565093, | |
| "grad_norm": 2.538985013961792, | |
| "learning_rate": 1.2016683251704303e-06, | |
| "loss": 0.7406, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.1802575107296138, | |
| "grad_norm": 1.2528947591781616, | |
| "learning_rate": 1.1981651956016425e-06, | |
| "loss": 0.8545, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.1816881258941345, | |
| "grad_norm": 1.4131247997283936, | |
| "learning_rate": 1.194663780929836e-06, | |
| "loss": 0.7394, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.1831187410586552, | |
| "grad_norm": 16.873014450073242, | |
| "learning_rate": 1.1911641010485598e-06, | |
| "loss": 0.8212, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.184549356223176, | |
| "grad_norm": 6.866806507110596, | |
| "learning_rate": 1.187666175841506e-06, | |
| "loss": 0.9203, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.1859799713876966, | |
| "grad_norm": 1.7047280073165894, | |
| "learning_rate": 1.184170025182398e-06, | |
| "loss": 0.7769, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.1874105865522175, | |
| "grad_norm": 5.180852890014648, | |
| "learning_rate": 1.1806756689348775e-06, | |
| "loss": 0.791, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.1888412017167382, | |
| "grad_norm": 3.131958484649658, | |
| "learning_rate": 1.1771831269523896e-06, | |
| "loss": 0.7949, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.190271816881259, | |
| "grad_norm": 1.1318491697311401, | |
| "learning_rate": 1.1736924190780725e-06, | |
| "loss": 0.7955, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.1917024320457796, | |
| "grad_norm": 6.675893306732178, | |
| "learning_rate": 1.1702035651446442e-06, | |
| "loss": 0.7918, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.1931330472103003, | |
| "grad_norm": 8.012784004211426, | |
| "learning_rate": 1.1667165849742884e-06, | |
| "loss": 0.7151, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.1945636623748213, | |
| "grad_norm": 1.9865070581436157, | |
| "learning_rate": 1.1632314983785435e-06, | |
| "loss": 0.8307, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.195994277539342, | |
| "grad_norm": 6.1861677169799805, | |
| "learning_rate": 1.1597483251581895e-06, | |
| "loss": 0.7981, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.1974248927038627, | |
| "grad_norm": 2.7006752490997314, | |
| "learning_rate": 1.1562670851031345e-06, | |
| "loss": 0.8067, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.1988555078683834, | |
| "grad_norm": 1.065775752067566, | |
| "learning_rate": 1.1527877979923043e-06, | |
| "loss": 0.759, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.200286123032904, | |
| "grad_norm": 1.9265739917755127, | |
| "learning_rate": 1.1493104835935287e-06, | |
| "loss": 0.7376, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.201716738197425, | |
| "grad_norm": 1.4268121719360352, | |
| "learning_rate": 1.1458351616634283e-06, | |
| "loss": 0.7874, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.2031473533619457, | |
| "grad_norm": 2.659268856048584, | |
| "learning_rate": 1.1423618519473052e-06, | |
| "loss": 0.8201, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.2045779685264664, | |
| "grad_norm": 3.1713037490844727, | |
| "learning_rate": 1.1388905741790269e-06, | |
| "loss": 0.8612, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.206008583690987, | |
| "grad_norm": 10.63504695892334, | |
| "learning_rate": 1.1354213480809178e-06, | |
| "loss": 0.7408, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.207439198855508, | |
| "grad_norm": 5.266157627105713, | |
| "learning_rate": 1.1319541933636455e-06, | |
| "loss": 0.8414, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.2088698140200287, | |
| "grad_norm": 2.5737879276275635, | |
| "learning_rate": 1.1284891297261075e-06, | |
| "loss": 0.8581, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.2103004291845494, | |
| "grad_norm": 4.128069877624512, | |
| "learning_rate": 1.1250261768553221e-06, | |
| "loss": 0.8162, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.21173104434907, | |
| "grad_norm": 2.4845378398895264, | |
| "learning_rate": 1.1215653544263147e-06, | |
| "loss": 0.7017, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.2131616595135908, | |
| "grad_norm": 2.9242730140686035, | |
| "learning_rate": 1.118106682102006e-06, | |
| "loss": 0.8214, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.2145922746781115, | |
| "grad_norm": 3.1195361614227295, | |
| "learning_rate": 1.1146501795331017e-06, | |
| "loss": 0.8892, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.2160228898426324, | |
| "grad_norm": 1.8963371515274048, | |
| "learning_rate": 1.111195866357979e-06, | |
| "loss": 0.7455, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.217453505007153, | |
| "grad_norm": 4.30813455581665, | |
| "learning_rate": 1.107743762202576e-06, | |
| "loss": 0.7363, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.2188841201716738, | |
| "grad_norm": 1.2631362676620483, | |
| "learning_rate": 1.10429388668028e-06, | |
| "loss": 0.7979, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.2203147353361945, | |
| "grad_norm": 1.1063506603240967, | |
| "learning_rate": 1.1008462593918172e-06, | |
| "loss": 0.8217, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.2217453505007154, | |
| "grad_norm": 5.987213611602783, | |
| "learning_rate": 1.0974008999251385e-06, | |
| "loss": 0.7839, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.2231759656652361, | |
| "grad_norm": 1.2211673259735107, | |
| "learning_rate": 1.0939578278553117e-06, | |
| "loss": 0.7484, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.2246065808297568, | |
| "grad_norm": 29.422378540039062, | |
| "learning_rate": 1.0905170627444082e-06, | |
| "loss": 0.7305, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.2260371959942775, | |
| "grad_norm": 0.9755321741104126, | |
| "learning_rate": 1.0870786241413909e-06, | |
| "loss": 0.728, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.2274678111587982, | |
| "grad_norm": 2.794478178024292, | |
| "learning_rate": 1.083642531582006e-06, | |
| "loss": 0.763, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.228898426323319, | |
| "grad_norm": 9.834367752075195, | |
| "learning_rate": 1.0802088045886703e-06, | |
| "loss": 0.7693, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.2303290414878398, | |
| "grad_norm": 1.6742088794708252, | |
| "learning_rate": 1.0767774626703599e-06, | |
| "loss": 0.7502, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.2317596566523605, | |
| "grad_norm": 1.3184466361999512, | |
| "learning_rate": 1.0733485253224997e-06, | |
| "loss": 0.7145, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.2331902718168812, | |
| "grad_norm": 2.6459200382232666, | |
| "learning_rate": 1.069922012026854e-06, | |
| "loss": 0.7881, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.234620886981402, | |
| "grad_norm": 2.640869379043579, | |
| "learning_rate": 1.0664979422514134e-06, | |
| "loss": 0.7546, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.2360515021459229, | |
| "grad_norm": 3.070185899734497, | |
| "learning_rate": 1.0630763354502864e-06, | |
| "loss": 0.7508, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.2374821173104436, | |
| "grad_norm": 1.690168857574463, | |
| "learning_rate": 1.0596572110635875e-06, | |
| "loss": 0.8324, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.2389127324749643, | |
| "grad_norm": 2.343522071838379, | |
| "learning_rate": 1.056240588517327e-06, | |
| "loss": 0.8546, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.240343347639485, | |
| "grad_norm": 2.629617691040039, | |
| "learning_rate": 1.0528264872233018e-06, | |
| "loss": 0.8052, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.2417739628040056, | |
| "grad_norm": 5.790884971618652, | |
| "learning_rate": 1.049414926578982e-06, | |
| "loss": 0.8059, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.2432045779685263, | |
| "grad_norm": 3.549689292907715, | |
| "learning_rate": 1.0460059259674048e-06, | |
| "loss": 0.6624, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.2446351931330473, | |
| "grad_norm": 6.8801493644714355, | |
| "learning_rate": 1.0425995047570625e-06, | |
| "loss": 0.751, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.246065808297568, | |
| "grad_norm": 3.7252180576324463, | |
| "learning_rate": 1.0391956823017906e-06, | |
| "loss": 0.6847, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.2474964234620887, | |
| "grad_norm": 3.222304582595825, | |
| "learning_rate": 1.0357944779406609e-06, | |
| "loss": 0.8095, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.2489270386266094, | |
| "grad_norm": 1.8582981824874878, | |
| "learning_rate": 1.0323959109978703e-06, | |
| "loss": 0.7937, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.2503576537911303, | |
| "grad_norm": 2.6876213550567627, | |
| "learning_rate": 1.0290000007826299e-06, | |
| "loss": 0.7574, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.251788268955651, | |
| "grad_norm": 1.8542571067810059, | |
| "learning_rate": 1.0256067665890578e-06, | |
| "loss": 0.7267, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.251788268955651, | |
| "eval_loss": 0.9304266571998596, | |
| "eval_runtime": 66.8532, | |
| "eval_samples_per_second": 6.208, | |
| "eval_steps_per_second": 0.389, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.2532188841201717, | |
| "grad_norm": 3.313300609588623, | |
| "learning_rate": 1.0222162276960676e-06, | |
| "loss": 0.8148, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.2546494992846924, | |
| "grad_norm": 2.9693450927734375, | |
| "learning_rate": 1.0188284033672586e-06, | |
| "loss": 0.737, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.256080114449213, | |
| "grad_norm": 1.4272849559783936, | |
| "learning_rate": 1.015443312850808e-06, | |
| "loss": 0.9017, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.2575107296137338, | |
| "grad_norm": 1.6904128789901733, | |
| "learning_rate": 1.0120609753793609e-06, | |
| "loss": 0.75, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.2589413447782547, | |
| "grad_norm": 4.684359550476074, | |
| "learning_rate": 1.0086814101699191e-06, | |
| "loss": 0.711, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.2603719599427754, | |
| "grad_norm": 1.3708410263061523, | |
| "learning_rate": 1.0053046364237354e-06, | |
| "loss": 0.8005, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.261802575107296, | |
| "grad_norm": 1.4521434307098389, | |
| "learning_rate": 1.0019306733262022e-06, | |
| "loss": 0.818, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.263233190271817, | |
| "grad_norm": 0.9280107617378235, | |
| "learning_rate": 9.985595400467423e-07, | |
| "loss": 0.7696, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.2646638054363377, | |
| "grad_norm": 2.259516477584839, | |
| "learning_rate": 9.951912557387014e-07, | |
| "loss": 0.8095, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.2660944206008584, | |
| "grad_norm": 1.7881183624267578, | |
| "learning_rate": 9.918258395392388e-07, | |
| "loss": 0.837, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.2675250357653791, | |
| "grad_norm": 3.3883907794952393, | |
| "learning_rate": 9.88463310569217e-07, | |
| "loss": 0.8968, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.2689556509298998, | |
| "grad_norm": 1.247185230255127, | |
| "learning_rate": 9.851036879330958e-07, | |
| "loss": 0.7996, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.2703862660944205, | |
| "grad_norm": 2.4265060424804688, | |
| "learning_rate": 9.817469907188227e-07, | |
| "loss": 0.6631, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.2718168812589412, | |
| "grad_norm": 4.242371082305908, | |
| "learning_rate": 9.783932379977228e-07, | |
| "loss": 0.7746, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.2732474964234621, | |
| "grad_norm": 4.2158660888671875, | |
| "learning_rate": 9.75042448824393e-07, | |
| "loss": 0.7862, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.2746781115879828, | |
| "grad_norm": 2.9039363861083984, | |
| "learning_rate": 9.716946422365922e-07, | |
| "loss": 0.7609, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.2761087267525035, | |
| "grad_norm": 4.17219877243042, | |
| "learning_rate": 9.683498372551335e-07, | |
| "loss": 0.7278, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.2775393419170245, | |
| "grad_norm": 3.1430556774139404, | |
| "learning_rate": 9.650080528837762e-07, | |
| "loss": 0.8266, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.2789699570815452, | |
| "grad_norm": 8.886442184448242, | |
| "learning_rate": 9.616693081091172e-07, | |
| "loss": 0.7685, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.2804005722460658, | |
| "grad_norm": 1.9755185842514038, | |
| "learning_rate": 9.58333621900485e-07, | |
| "loss": 0.7883, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.2818311874105865, | |
| "grad_norm": 2.893641710281372, | |
| "learning_rate": 9.550010132098303e-07, | |
| "loss": 0.7261, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.2832618025751072, | |
| "grad_norm": 1.6755917072296143, | |
| "learning_rate": 9.51671500971617e-07, | |
| "loss": 0.8368, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.284692417739628, | |
| "grad_norm": 3.195072889328003, | |
| "learning_rate": 9.483451041027182e-07, | |
| "loss": 0.855, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.2861230329041489, | |
| "grad_norm": 1.5989915132522583, | |
| "learning_rate": 9.450218415023063e-07, | |
| "loss": 0.8193, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.2875536480686696, | |
| "grad_norm": 4.059481620788574, | |
| "learning_rate": 9.417017320517456e-07, | |
| "loss": 0.7388, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.2889842632331903, | |
| "grad_norm": 4.821532249450684, | |
| "learning_rate": 9.383847946144855e-07, | |
| "loss": 0.7063, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.290414878397711, | |
| "grad_norm": 16.60176658630371, | |
| "learning_rate": 9.350710480359549e-07, | |
| "loss": 0.7916, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.2918454935622319, | |
| "grad_norm": 5.774556636810303, | |
| "learning_rate": 9.317605111434513e-07, | |
| "loss": 0.8476, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.2932761087267526, | |
| "grad_norm": 1.998368263244629, | |
| "learning_rate": 9.284532027460378e-07, | |
| "loss": 0.7909, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.2947067238912733, | |
| "grad_norm": 2.349731206893921, | |
| "learning_rate": 9.251491416344341e-07, | |
| "loss": 0.8264, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.296137339055794, | |
| "grad_norm": 2.667130947113037, | |
| "learning_rate": 9.2184834658091e-07, | |
| "loss": 0.6402, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.2975679542203147, | |
| "grad_norm": 1.8666576147079468, | |
| "learning_rate": 9.185508363391787e-07, | |
| "loss": 0.8442, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 1.2989985693848354, | |
| "grad_norm": 2.20011043548584, | |
| "learning_rate": 9.152566296442919e-07, | |
| "loss": 0.8345, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.3004291845493563, | |
| "grad_norm": 1.1894949674606323, | |
| "learning_rate": 9.119657452125299e-07, | |
| "loss": 0.8069, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 1.301859799713877, | |
| "grad_norm": 2.52988862991333, | |
| "learning_rate": 9.086782017412988e-07, | |
| "loss": 0.7534, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.3032904148783977, | |
| "grad_norm": 3.5195047855377197, | |
| "learning_rate": 9.053940179090225e-07, | |
| "loss": 0.7125, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.3047210300429184, | |
| "grad_norm": 3.777909994125366, | |
| "learning_rate": 9.021132123750361e-07, | |
| "loss": 0.7886, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.3061516452074393, | |
| "grad_norm": 3.459988832473755, | |
| "learning_rate": 8.988358037794821e-07, | |
| "loss": 0.8223, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.30758226037196, | |
| "grad_norm": 1.278838038444519, | |
| "learning_rate": 8.955618107432014e-07, | |
| "loss": 0.8042, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.3090128755364807, | |
| "grad_norm": 2.881751775741577, | |
| "learning_rate": 8.922912518676302e-07, | |
| "loss": 0.8053, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.3104434907010014, | |
| "grad_norm": 3.7574193477630615, | |
| "learning_rate": 8.890241457346934e-07, | |
| "loss": 0.7679, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.311874105865522, | |
| "grad_norm": 6.835153102874756, | |
| "learning_rate": 8.857605109066977e-07, | |
| "loss": 0.757, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.3133047210300428, | |
| "grad_norm": 2.608959913253784, | |
| "learning_rate": 8.825003659262284e-07, | |
| "loss": 0.7314, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.3147353361945637, | |
| "grad_norm": 2.777501344680786, | |
| "learning_rate": 8.792437293160431e-07, | |
| "loss": 0.7734, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 1.3161659513590844, | |
| "grad_norm": 1.376322627067566, | |
| "learning_rate": 8.759906195789654e-07, | |
| "loss": 0.8299, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.3175965665236051, | |
| "grad_norm": 5.208398342132568, | |
| "learning_rate": 8.727410551977812e-07, | |
| "loss": 0.6947, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 1.3190271816881258, | |
| "grad_norm": 1.6894828081130981, | |
| "learning_rate": 8.694950546351335e-07, | |
| "loss": 0.7012, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.3204577968526467, | |
| "grad_norm": 4.928938388824463, | |
| "learning_rate": 8.662526363334164e-07, | |
| "loss": 0.818, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 1.3218884120171674, | |
| "grad_norm": 1.4015679359436035, | |
| "learning_rate": 8.630138187146725e-07, | |
| "loss": 0.7557, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.3233190271816881, | |
| "grad_norm": 1.5027586221694946, | |
| "learning_rate": 8.597786201804853e-07, | |
| "loss": 0.8091, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.3247496423462088, | |
| "grad_norm": 1.433759331703186, | |
| "learning_rate": 8.56547059111877e-07, | |
| "loss": 0.7719, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.3261802575107295, | |
| "grad_norm": 1.4195560216903687, | |
| "learning_rate": 8.533191538692026e-07, | |
| "loss": 0.7916, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 1.3276108726752502, | |
| "grad_norm": 2.4685306549072266, | |
| "learning_rate": 8.500949227920477e-07, | |
| "loss": 0.7753, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.3290414878397712, | |
| "grad_norm": 1.3594095706939697, | |
| "learning_rate": 8.468743841991219e-07, | |
| "loss": 0.7694, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 1.3304721030042919, | |
| "grad_norm": 2.4916977882385254, | |
| "learning_rate": 8.436575563881544e-07, | |
| "loss": 0.7889, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.3319027181688126, | |
| "grad_norm": 5.942515850067139, | |
| "learning_rate": 8.404444576357943e-07, | |
| "loss": 0.7976, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 1.2734025716781616, | |
| "learning_rate": 8.372351061975014e-07, | |
| "loss": 0.8291, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.3347639484978542, | |
| "grad_norm": 4.3545732498168945, | |
| "learning_rate": 8.340295203074449e-07, | |
| "loss": 0.8092, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 1.3361945636623749, | |
| "grad_norm": 2.437654733657837, | |
| "learning_rate": 8.308277181784017e-07, | |
| "loss": 0.7858, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.3376251788268956, | |
| "grad_norm": 2.960955858230591, | |
| "learning_rate": 8.27629718001649e-07, | |
| "loss": 0.8502, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.3390557939914163, | |
| "grad_norm": 3.9844677448272705, | |
| "learning_rate": 8.244355379468631e-07, | |
| "loss": 0.7174, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.340486409155937, | |
| "grad_norm": 3.1742899417877197, | |
| "learning_rate": 8.212451961620176e-07, | |
| "loss": 0.7704, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 1.3419170243204577, | |
| "grad_norm": 2.129551410675049, | |
| "learning_rate": 8.180587107732766e-07, | |
| "loss": 0.7319, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.3433476394849786, | |
| "grad_norm": 1.7495276927947998, | |
| "learning_rate": 8.148760998848951e-07, | |
| "loss": 0.7423, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 1.3447782546494993, | |
| "grad_norm": 2.0302577018737793, | |
| "learning_rate": 8.116973815791154e-07, | |
| "loss": 0.7748, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.34620886981402, | |
| "grad_norm": 1.8777068853378296, | |
| "learning_rate": 8.085225739160623e-07, | |
| "loss": 0.7707, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 1.3476394849785407, | |
| "grad_norm": 2.8703246116638184, | |
| "learning_rate": 8.053516949336425e-07, | |
| "loss": 0.7156, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.3490701001430616, | |
| "grad_norm": 2.731548309326172, | |
| "learning_rate": 8.021847626474412e-07, | |
| "loss": 0.8371, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 1.3505007153075823, | |
| "grad_norm": 4.414968490600586, | |
| "learning_rate": 7.990217950506219e-07, | |
| "loss": 0.7124, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.351931330472103, | |
| "grad_norm": 1.4502582550048828, | |
| "learning_rate": 7.958628101138203e-07, | |
| "loss": 0.7313, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.3533619456366237, | |
| "grad_norm": 3.5596978664398193, | |
| "learning_rate": 7.927078257850451e-07, | |
| "loss": 0.7698, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.3547925608011444, | |
| "grad_norm": 1.3692398071289062, | |
| "learning_rate": 7.895568599895763e-07, | |
| "loss": 0.7405, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 1.356223175965665, | |
| "grad_norm": 2.794085741043091, | |
| "learning_rate": 7.864099306298608e-07, | |
| "loss": 0.775, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.357653791130186, | |
| "grad_norm": 5.740682601928711, | |
| "learning_rate": 7.832670555854122e-07, | |
| "loss": 0.8187, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 1.3590844062947067, | |
| "grad_norm": 3.9949023723602295, | |
| "learning_rate": 7.801282527127108e-07, | |
| "loss": 0.797, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.3605150214592274, | |
| "grad_norm": 1.2518641948699951, | |
| "learning_rate": 7.769935398450992e-07, | |
| "loss": 0.7613, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 1.3619456366237483, | |
| "grad_norm": 1.4318602085113525, | |
| "learning_rate": 7.738629347926818e-07, | |
| "loss": 0.7331, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.363376251788269, | |
| "grad_norm": 2.8898508548736572, | |
| "learning_rate": 7.707364553422264e-07, | |
| "loss": 0.7671, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 1.3648068669527897, | |
| "grad_norm": 4.7733473777771, | |
| "learning_rate": 7.676141192570586e-07, | |
| "loss": 0.8436, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.3662374821173104, | |
| "grad_norm": 2.112035036087036, | |
| "learning_rate": 7.644959442769636e-07, | |
| "loss": 0.7985, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.3676680972818311, | |
| "grad_norm": 4.145442485809326, | |
| "learning_rate": 7.613819481180869e-07, | |
| "loss": 0.8581, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.3690987124463518, | |
| "grad_norm": 1.2203041315078735, | |
| "learning_rate": 7.582721484728289e-07, | |
| "loss": 0.6751, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 1.3705293276108725, | |
| "grad_norm": 2.124601364135742, | |
| "learning_rate": 7.551665630097485e-07, | |
| "loss": 0.8874, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.3719599427753935, | |
| "grad_norm": 2.922088623046875, | |
| "learning_rate": 7.520652093734624e-07, | |
| "loss": 0.7966, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 1.3733905579399142, | |
| "grad_norm": 23.925447463989258, | |
| "learning_rate": 7.489681051845424e-07, | |
| "loss": 0.8503, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.3748211731044349, | |
| "grad_norm": 1.904219150543213, | |
| "learning_rate": 7.458752680394165e-07, | |
| "loss": 0.7959, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 1.3762517882689558, | |
| "grad_norm": 1.1502995491027832, | |
| "learning_rate": 7.427867155102712e-07, | |
| "loss": 0.7655, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.3776824034334765, | |
| "grad_norm": 7.088009357452393, | |
| "learning_rate": 7.397024651449477e-07, | |
| "loss": 0.7752, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 1.3791130185979972, | |
| "grad_norm": 1.1466937065124512, | |
| "learning_rate": 7.366225344668442e-07, | |
| "loss": 0.7847, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.3805436337625179, | |
| "grad_norm": 6.192886829376221, | |
| "learning_rate": 7.335469409748178e-07, | |
| "loss": 0.7846, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.3819742489270386, | |
| "grad_norm": 4.334934711456299, | |
| "learning_rate": 7.304757021430825e-07, | |
| "loss": 0.6667, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.3834048640915593, | |
| "grad_norm": 2.754920482635498, | |
| "learning_rate": 7.2740883542111e-07, | |
| "loss": 0.7744, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 1.3848354792560802, | |
| "grad_norm": 11.55642032623291, | |
| "learning_rate": 7.243463582335341e-07, | |
| "loss": 0.7909, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.386266094420601, | |
| "grad_norm": 11.184896469116211, | |
| "learning_rate": 7.212882879800468e-07, | |
| "loss": 0.7766, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 1.3876967095851216, | |
| "grad_norm": 3.4106950759887695, | |
| "learning_rate": 7.182346420353022e-07, | |
| "loss": 0.8393, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.3891273247496423, | |
| "grad_norm": 9.028657913208008, | |
| "learning_rate": 7.151854377488189e-07, | |
| "loss": 0.819, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 1.3905579399141632, | |
| "grad_norm": 2.576897144317627, | |
| "learning_rate": 7.121406924448783e-07, | |
| "loss": 0.8373, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.391988555078684, | |
| "grad_norm": 1.7887141704559326, | |
| "learning_rate": 7.091004234224274e-07, | |
| "loss": 0.8596, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 1.3934191702432046, | |
| "grad_norm": 2.7552521228790283, | |
| "learning_rate": 7.060646479549828e-07, | |
| "loss": 0.8854, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.3948497854077253, | |
| "grad_norm": 1.784921407699585, | |
| "learning_rate": 7.030333832905291e-07, | |
| "loss": 0.731, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.396280400572246, | |
| "grad_norm": 4.638574123382568, | |
| "learning_rate": 7.000066466514225e-07, | |
| "loss": 0.7751, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.3977110157367667, | |
| "grad_norm": 2.5338118076324463, | |
| "learning_rate": 6.969844552342939e-07, | |
| "loss": 0.7342, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 1.3991416309012876, | |
| "grad_norm": 3.9059221744537354, | |
| "learning_rate": 6.939668262099494e-07, | |
| "loss": 0.8343, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.4005722460658083, | |
| "grad_norm": 2.2744622230529785, | |
| "learning_rate": 6.909537767232728e-07, | |
| "loss": 0.8063, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 1.402002861230329, | |
| "grad_norm": 8.209087371826172, | |
| "learning_rate": 6.87945323893131e-07, | |
| "loss": 0.7646, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.4034334763948497, | |
| "grad_norm": 1.3790867328643799, | |
| "learning_rate": 6.849414848122728e-07, | |
| "loss": 0.8081, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 1.4048640915593706, | |
| "grad_norm": 3.088000535964966, | |
| "learning_rate": 6.819422765472337e-07, | |
| "loss": 0.867, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.4062947067238913, | |
| "grad_norm": 1.6987063884735107, | |
| "learning_rate": 6.789477161382405e-07, | |
| "loss": 0.7473, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 1.407725321888412, | |
| "grad_norm": 1.63704514503479, | |
| "learning_rate": 6.759578205991113e-07, | |
| "loss": 0.7635, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.4091559370529327, | |
| "grad_norm": 2.0188796520233154, | |
| "learning_rate": 6.729726069171605e-07, | |
| "loss": 0.7787, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.4105865522174534, | |
| "grad_norm": 1.2314823865890503, | |
| "learning_rate": 6.699920920531034e-07, | |
| "loss": 0.7567, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.4120171673819741, | |
| "grad_norm": 6.099488258361816, | |
| "learning_rate": 6.670162929409572e-07, | |
| "loss": 0.8228, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 1.413447782546495, | |
| "grad_norm": 2.057798385620117, | |
| "learning_rate": 6.640452264879465e-07, | |
| "loss": 0.7335, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.4148783977110158, | |
| "grad_norm": 1.429482340812683, | |
| "learning_rate": 6.61078909574408e-07, | |
| "loss": 0.8056, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 1.4163090128755365, | |
| "grad_norm": 1.7895710468292236, | |
| "learning_rate": 6.581173590536924e-07, | |
| "loss": 0.6972, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.4177396280400572, | |
| "grad_norm": 1.1860177516937256, | |
| "learning_rate": 6.551605917520704e-07, | |
| "loss": 0.7852, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 1.419170243204578, | |
| "grad_norm": 2.0887012481689453, | |
| "learning_rate": 6.522086244686351e-07, | |
| "loss": 0.8344, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.4206008583690988, | |
| "grad_norm": 1.4745193719863892, | |
| "learning_rate": 6.492614739752104e-07, | |
| "loss": 0.7405, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 1.4220314735336195, | |
| "grad_norm": 17.525732040405273, | |
| "learning_rate": 6.463191570162516e-07, | |
| "loss": 0.8515, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.4234620886981402, | |
| "grad_norm": 3.040510892868042, | |
| "learning_rate": 6.433816903087513e-07, | |
| "loss": 0.8162, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.4248927038626609, | |
| "grad_norm": 41.955718994140625, | |
| "learning_rate": 6.404490905421474e-07, | |
| "loss": 0.7542, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.4263233190271816, | |
| "grad_norm": 2.4796228408813477, | |
| "learning_rate": 6.375213743782236e-07, | |
| "loss": 0.8064, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 1.4277539341917025, | |
| "grad_norm": 3.1929125785827637, | |
| "learning_rate": 6.345985584510177e-07, | |
| "loss": 0.7785, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.4291845493562232, | |
| "grad_norm": 1.4972928762435913, | |
| "learning_rate": 6.316806593667274e-07, | |
| "loss": 0.7456, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 1.4306151645207439, | |
| "grad_norm": 1.4708969593048096, | |
| "learning_rate": 6.28767693703614e-07, | |
| "loss": 0.7775, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4320457796852646, | |
| "grad_norm": 5.087403774261475, | |
| "learning_rate": 6.258596780119087e-07, | |
| "loss": 0.8118, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 1.4334763948497855, | |
| "grad_norm": 1.518306016921997, | |
| "learning_rate": 6.229566288137212e-07, | |
| "loss": 0.7894, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.4349070100143062, | |
| "grad_norm": 6.86577033996582, | |
| "learning_rate": 6.200585626029412e-07, | |
| "loss": 0.8725, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 1.436337625178827, | |
| "grad_norm": 1.6665362119674683, | |
| "learning_rate": 6.171654958451484e-07, | |
| "loss": 0.7696, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.4377682403433476, | |
| "grad_norm": 9.65654468536377, | |
| "learning_rate": 6.142774449775181e-07, | |
| "loss": 0.8192, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.4391988555078683, | |
| "grad_norm": 3.128150701522827, | |
| "learning_rate": 6.113944264087269e-07, | |
| "loss": 0.8093, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.440629470672389, | |
| "grad_norm": 3.613922357559204, | |
| "learning_rate": 6.085164565188594e-07, | |
| "loss": 0.7531, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 1.44206008583691, | |
| "grad_norm": 3.4265799522399902, | |
| "learning_rate": 6.056435516593175e-07, | |
| "loss": 0.7629, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.4434907010014306, | |
| "grad_norm": 3.6590576171875, | |
| "learning_rate": 6.027757281527242e-07, | |
| "loss": 0.747, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 1.4449213161659513, | |
| "grad_norm": 7.065302848815918, | |
| "learning_rate": 5.999130022928323e-07, | |
| "loss": 0.7662, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.4463519313304722, | |
| "grad_norm": 2.217602491378784, | |
| "learning_rate": 5.970553903444338e-07, | |
| "loss": 0.7692, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 1.447782546494993, | |
| "grad_norm": 2.588672399520874, | |
| "learning_rate": 5.942029085432636e-07, | |
| "loss": 0.7657, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.4492131616595136, | |
| "grad_norm": 1.4080536365509033, | |
| "learning_rate": 5.913555730959096e-07, | |
| "loss": 0.7697, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 1.4506437768240343, | |
| "grad_norm": 1.4369243383407593, | |
| "learning_rate": 5.88513400179722e-07, | |
| "loss": 0.7933, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.452074391988555, | |
| "grad_norm": 3.819899320602417, | |
| "learning_rate": 5.856764059427178e-07, | |
| "loss": 0.7487, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.4535050071530757, | |
| "grad_norm": 1.862250804901123, | |
| "learning_rate": 5.828446065034912e-07, | |
| "loss": 0.7765, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.4549356223175964, | |
| "grad_norm": 1.8112690448760986, | |
| "learning_rate": 5.80018017951123e-07, | |
| "loss": 0.8474, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 1.4563662374821174, | |
| "grad_norm": 2.6052050590515137, | |
| "learning_rate": 5.771966563450868e-07, | |
| "loss": 0.7542, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.457796852646638, | |
| "grad_norm": 8.262088775634766, | |
| "learning_rate": 5.743805377151587e-07, | |
| "loss": 0.7811, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 1.4592274678111588, | |
| "grad_norm": 2.0252511501312256, | |
| "learning_rate": 5.715696780613279e-07, | |
| "loss": 0.8363, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.4606580829756797, | |
| "grad_norm": 3.229971408843994, | |
| "learning_rate": 5.687640933537032e-07, | |
| "loss": 0.722, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 1.4620886981402004, | |
| "grad_norm": 2.0548818111419678, | |
| "learning_rate": 5.659637995324229e-07, | |
| "loss": 0.7691, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.463519313304721, | |
| "grad_norm": 2.4392716884613037, | |
| "learning_rate": 5.631688125075667e-07, | |
| "loss": 0.7619, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 1.4649499284692418, | |
| "grad_norm": 7.450191497802734, | |
| "learning_rate": 5.603791481590612e-07, | |
| "loss": 0.8198, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.4663805436337625, | |
| "grad_norm": 3.3907644748687744, | |
| "learning_rate": 5.575948223365925e-07, | |
| "loss": 0.7469, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.4678111587982832, | |
| "grad_norm": 1.4037667512893677, | |
| "learning_rate": 5.548158508595166e-07, | |
| "loss": 0.7584, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.469241773962804, | |
| "grad_norm": 2.314279556274414, | |
| "learning_rate": 5.520422495167671e-07, | |
| "loss": 0.7725, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 1.4706723891273248, | |
| "grad_norm": 1.8584074974060059, | |
| "learning_rate": 5.492740340667664e-07, | |
| "loss": 0.7752, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.4721030042918455, | |
| "grad_norm": 1.3589491844177246, | |
| "learning_rate": 5.465112202373385e-07, | |
| "loss": 0.769, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 1.4735336194563662, | |
| "grad_norm": 0.9419125914573669, | |
| "learning_rate": 5.43753823725616e-07, | |
| "loss": 0.7325, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.474964234620887, | |
| "grad_norm": 1.696077823638916, | |
| "learning_rate": 5.410018601979525e-07, | |
| "loss": 0.7432, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 1.4763948497854078, | |
| "grad_norm": 3.0386385917663574, | |
| "learning_rate": 5.382553452898354e-07, | |
| "loss": 0.7708, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.4778254649499285, | |
| "grad_norm": 1.6025636196136475, | |
| "learning_rate": 5.355142946057936e-07, | |
| "loss": 0.7812, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 1.4792560801144492, | |
| "grad_norm": 1.280683994293213, | |
| "learning_rate": 5.327787237193109e-07, | |
| "loss": 0.8416, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.48068669527897, | |
| "grad_norm": 1.0730654001235962, | |
| "learning_rate": 5.300486481727383e-07, | |
| "loss": 0.7834, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 1.4821173104434906, | |
| "grad_norm": 3.347235679626465, | |
| "learning_rate": 5.273240834772038e-07, | |
| "loss": 0.7814, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.4835479256080115, | |
| "grad_norm": 2.4247682094573975, | |
| "learning_rate": 5.246050451125244e-07, | |
| "loss": 0.795, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 1.4849785407725322, | |
| "grad_norm": 2.305119752883911, | |
| "learning_rate": 5.218915485271206e-07, | |
| "loss": 0.8216, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.486409155937053, | |
| "grad_norm": 4.8329901695251465, | |
| "learning_rate": 5.191836091379255e-07, | |
| "loss": 0.7352, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 1.4878397711015736, | |
| "grad_norm": 2.9933252334594727, | |
| "learning_rate": 5.164812423302991e-07, | |
| "loss": 0.7846, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.4892703862660945, | |
| "grad_norm": 2.515021324157715, | |
| "learning_rate": 5.137844634579393e-07, | |
| "loss": 0.8154, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 1.4907010014306152, | |
| "grad_norm": 1.579648494720459, | |
| "learning_rate": 5.110932878427982e-07, | |
| "loss": 0.7556, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.492131616595136, | |
| "grad_norm": 1.0330595970153809, | |
| "learning_rate": 5.0840773077499e-07, | |
| "loss": 0.8217, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 1.4935622317596566, | |
| "grad_norm": 4.242507457733154, | |
| "learning_rate": 5.057278075127074e-07, | |
| "loss": 0.8441, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.4949928469241773, | |
| "grad_norm": 3.4716875553131104, | |
| "learning_rate": 5.030535332821356e-07, | |
| "loss": 0.7702, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 1.496423462088698, | |
| "grad_norm": 14.659123420715332, | |
| "learning_rate": 5.00384923277363e-07, | |
| "loss": 0.7983, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.497854077253219, | |
| "grad_norm": 2.3961856365203857, | |
| "learning_rate": 4.977219926602959e-07, | |
| "loss": 0.8693, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 1.4992846924177397, | |
| "grad_norm": 3.505021333694458, | |
| "learning_rate": 4.950647565605744e-07, | |
| "loss": 0.8205, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.5007153075822603, | |
| "grad_norm": 1.913122296333313, | |
| "learning_rate": 4.924132300754835e-07, | |
| "loss": 0.8566, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 1.5021459227467813, | |
| "grad_norm": 1.38058340549469, | |
| "learning_rate": 4.897674282698685e-07, | |
| "loss": 0.7602, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.5021459227467813, | |
| "eval_loss": 0.927307665348053, | |
| "eval_runtime": 63.9053, | |
| "eval_samples_per_second": 6.494, | |
| "eval_steps_per_second": 0.407, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.503576537911302, | |
| "grad_norm": 1.8451398611068726, | |
| "learning_rate": 4.871273661760507e-07, | |
| "loss": 0.8307, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 1.5050071530758227, | |
| "grad_norm": 1.7605483531951904, | |
| "learning_rate": 4.844930587937399e-07, | |
| "loss": 0.7784, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.5064377682403434, | |
| "grad_norm": 2.6857316493988037, | |
| "learning_rate": 4.818645210899492e-07, | |
| "loss": 0.8508, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 1.507868383404864, | |
| "grad_norm": 4.418957710266113, | |
| "learning_rate": 4.792417679989133e-07, | |
| "loss": 0.8581, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.5092989985693848, | |
| "grad_norm": 1.829487919807434, | |
| "learning_rate": 4.76624814421999e-07, | |
| "loss": 0.7126, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 1.5107296137339055, | |
| "grad_norm": 5.856306076049805, | |
| "learning_rate": 4.7401367522762304e-07, | |
| "loss": 0.7673, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.5121602288984262, | |
| "grad_norm": 18.10394859313965, | |
| "learning_rate": 4.714083652511686e-07, | |
| "loss": 0.8228, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 1.513590844062947, | |
| "grad_norm": 1.5220907926559448, | |
| "learning_rate": 4.6880889929489865e-07, | |
| "loss": 0.8537, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.5150214592274678, | |
| "grad_norm": 8.02856731414795, | |
| "learning_rate": 4.662152921278726e-07, | |
| "loss": 0.8248, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 1.5164520743919887, | |
| "grad_norm": 2.229973554611206, | |
| "learning_rate": 4.636275584858641e-07, | |
| "loss": 0.8259, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.5178826895565094, | |
| "grad_norm": 7.8895416259765625, | |
| "learning_rate": 4.610457130712745e-07, | |
| "loss": 0.7989, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 1.51931330472103, | |
| "grad_norm": 4.06100606918335, | |
| "learning_rate": 4.5846977055305117e-07, | |
| "loss": 0.8214, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.5207439198855508, | |
| "grad_norm": 7.079894065856934, | |
| "learning_rate": 4.5589974556660456e-07, | |
| "loss": 0.8546, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 1.5221745350500715, | |
| "grad_norm": 1.5825847387313843, | |
| "learning_rate": 4.5333565271372316e-07, | |
| "loss": 0.6878, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.5236051502145922, | |
| "grad_norm": 2.358546257019043, | |
| "learning_rate": 4.507775065624916e-07, | |
| "loss": 0.7321, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 1.525035765379113, | |
| "grad_norm": 3.348055839538574, | |
| "learning_rate": 4.48225321647209e-07, | |
| "loss": 0.7788, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.5264663805436338, | |
| "grad_norm": 4.638083457946777, | |
| "learning_rate": 4.456791124683043e-07, | |
| "loss": 0.7619, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 1.5278969957081545, | |
| "grad_norm": 2.1720707416534424, | |
| "learning_rate": 4.431388934922545e-07, | |
| "loss": 0.8027, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.5293276108726752, | |
| "grad_norm": 3.190173625946045, | |
| "learning_rate": 4.4060467915150454e-07, | |
| "loss": 0.7065, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 1.5307582260371961, | |
| "grad_norm": 3.1954455375671387, | |
| "learning_rate": 4.380764838443813e-07, | |
| "loss": 0.7435, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.5321888412017168, | |
| "grad_norm": 2.271794557571411, | |
| "learning_rate": 4.35554321935016e-07, | |
| "loss": 0.7707, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 1.5336194563662375, | |
| "grad_norm": 1.6862138509750366, | |
| "learning_rate": 4.330382077532594e-07, | |
| "loss": 0.7988, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.5350500715307582, | |
| "grad_norm": 2.501862049102783, | |
| "learning_rate": 4.305281555946025e-07, | |
| "loss": 0.7269, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 1.536480686695279, | |
| "grad_norm": 6.872259140014648, | |
| "learning_rate": 4.2802417972009416e-07, | |
| "loss": 0.7131, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.5379113018597996, | |
| "grad_norm": 4.220912933349609, | |
| "learning_rate": 4.2552629435625944e-07, | |
| "loss": 0.772, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.5393419170243203, | |
| "grad_norm": 14.172344207763672, | |
| "learning_rate": 4.2303451369502167e-07, | |
| "loss": 0.8208, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.5407725321888412, | |
| "grad_norm": 3.8137381076812744, | |
| "learning_rate": 4.2054885189361833e-07, | |
| "loss": 0.7236, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 1.542203147353362, | |
| "grad_norm": 1.7150403261184692, | |
| "learning_rate": 4.1806932307452187e-07, | |
| "loss": 0.7771, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.5436337625178826, | |
| "grad_norm": 2.4068055152893066, | |
| "learning_rate": 4.1559594132536164e-07, | |
| "loss": 0.8226, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 1.5450643776824036, | |
| "grad_norm": 6.004452228546143, | |
| "learning_rate": 4.1312872069884015e-07, | |
| "loss": 0.7727, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.5464949928469243, | |
| "grad_norm": 1.6341569423675537, | |
| "learning_rate": 4.1066767521265524e-07, | |
| "loss": 0.7553, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 1.547925608011445, | |
| "grad_norm": 2.0458455085754395, | |
| "learning_rate": 4.0821281884942145e-07, | |
| "loss": 0.8625, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.5493562231759657, | |
| "grad_norm": 1.1154307126998901, | |
| "learning_rate": 4.05764165556588e-07, | |
| "loss": 0.725, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 1.5507868383404864, | |
| "grad_norm": 4.723168849945068, | |
| "learning_rate": 4.033217292463613e-07, | |
| "loss": 0.8132, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.552217453505007, | |
| "grad_norm": 2.8204376697540283, | |
| "learning_rate": 4.008855237956261e-07, | |
| "loss": 0.7391, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.5536480686695278, | |
| "grad_norm": 5.638607501983643, | |
| "learning_rate": 3.9845556304586554e-07, | |
| "loss": 0.862, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.5550786838340487, | |
| "grad_norm": 2.7092974185943604, | |
| "learning_rate": 3.9603186080308253e-07, | |
| "loss": 0.7355, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 1.5565092989985694, | |
| "grad_norm": 2.155038356781006, | |
| "learning_rate": 3.936144308377229e-07, | |
| "loss": 0.7857, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.55793991416309, | |
| "grad_norm": 1.2008074522018433, | |
| "learning_rate": 3.9120328688459554e-07, | |
| "loss": 0.7398, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 1.559370529327611, | |
| "grad_norm": 2.4443776607513428, | |
| "learning_rate": 3.887984426427943e-07, | |
| "loss": 0.6986, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.5608011444921317, | |
| "grad_norm": 1.3269106149673462, | |
| "learning_rate": 3.863999117756221e-07, | |
| "loss": 0.8451, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 1.5622317596566524, | |
| "grad_norm": 2.9758455753326416, | |
| "learning_rate": 3.8400770791051087e-07, | |
| "loss": 0.8204, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.563662374821173, | |
| "grad_norm": 1.9098341464996338, | |
| "learning_rate": 3.8162184463894503e-07, | |
| "loss": 0.7557, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 1.5650929899856938, | |
| "grad_norm": 1.6109716892242432, | |
| "learning_rate": 3.7924233551638575e-07, | |
| "loss": 0.7489, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.5665236051502145, | |
| "grad_norm": 1.1516788005828857, | |
| "learning_rate": 3.768691940621913e-07, | |
| "loss": 0.7758, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 1.5679542203147352, | |
| "grad_norm": 0.825720489025116, | |
| "learning_rate": 3.745024337595418e-07, | |
| "loss": 0.7843, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.5693848354792561, | |
| "grad_norm": 1.0096527338027954, | |
| "learning_rate": 3.721420680553634e-07, | |
| "loss": 0.7708, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 1.5708154506437768, | |
| "grad_norm": 1.44577956199646, | |
| "learning_rate": 3.697881103602497e-07, | |
| "loss": 0.7596, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.5722460658082977, | |
| "grad_norm": 2.351330518722534, | |
| "learning_rate": 3.674405740483868e-07, | |
| "loss": 0.7222, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 1.5736766809728184, | |
| "grad_norm": 3.4750962257385254, | |
| "learning_rate": 3.6509947245747826e-07, | |
| "loss": 0.7588, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.5751072961373391, | |
| "grad_norm": 2.525667667388916, | |
| "learning_rate": 3.627648188886674e-07, | |
| "loss": 0.841, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 1.5765379113018598, | |
| "grad_norm": 6.404210567474365, | |
| "learning_rate": 3.604366266064625e-07, | |
| "loss": 0.7888, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.5779685264663805, | |
| "grad_norm": 1.4588615894317627, | |
| "learning_rate": 3.5811490883866165e-07, | |
| "loss": 0.6871, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 1.5793991416309012, | |
| "grad_norm": 2.6901755332946777, | |
| "learning_rate": 3.557996787762785e-07, | |
| "loss": 0.8005, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.580829756795422, | |
| "grad_norm": 1.7762101888656616, | |
| "learning_rate": 3.534909495734653e-07, | |
| "loss": 0.7128, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 1.5822603719599426, | |
| "grad_norm": 2.374758243560791, | |
| "learning_rate": 3.511887343474388e-07, | |
| "loss": 0.784, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.5836909871244635, | |
| "grad_norm": 1.9807848930358887, | |
| "learning_rate": 3.488930461784075e-07, | |
| "loss": 0.7985, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 1.5851216022889842, | |
| "grad_norm": 5.555301189422607, | |
| "learning_rate": 3.46603898109495e-07, | |
| "loss": 0.8539, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.5865522174535052, | |
| "grad_norm": 1.7881734371185303, | |
| "learning_rate": 3.443213031466664e-07, | |
| "loss": 0.7204, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 1.5879828326180259, | |
| "grad_norm": 2.2678470611572266, | |
| "learning_rate": 3.420452742586562e-07, | |
| "loss": 0.7618, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.5894134477825466, | |
| "grad_norm": 1.7865197658538818, | |
| "learning_rate": 3.397758243768925e-07, | |
| "loss": 0.753, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 1.5908440629470673, | |
| "grad_norm": 3.1574225425720215, | |
| "learning_rate": 3.375129663954233e-07, | |
| "loss": 0.7138, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.592274678111588, | |
| "grad_norm": 2.2759108543395996, | |
| "learning_rate": 3.3525671317084643e-07, | |
| "loss": 0.7308, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 1.5937052932761087, | |
| "grad_norm": 1.156466007232666, | |
| "learning_rate": 3.330070775222324e-07, | |
| "loss": 0.7906, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.5951359084406294, | |
| "grad_norm": 40.72386169433594, | |
| "learning_rate": 3.30764072231054e-07, | |
| "loss": 0.8223, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 1.59656652360515, | |
| "grad_norm": 2.194326639175415, | |
| "learning_rate": 3.285277100411138e-07, | |
| "loss": 0.8578, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.597997138769671, | |
| "grad_norm": 3.6407439708709717, | |
| "learning_rate": 3.2629800365847046e-07, | |
| "loss": 0.78, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 1.5994277539341917, | |
| "grad_norm": 1.0178086757659912, | |
| "learning_rate": 3.240749657513667e-07, | |
| "loss": 0.7566, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.6008583690987126, | |
| "grad_norm": 1.544061303138733, | |
| "learning_rate": 3.2185860895015945e-07, | |
| "loss": 0.7867, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 1.6022889842632333, | |
| "grad_norm": 1.3069376945495605, | |
| "learning_rate": 3.1964894584724467e-07, | |
| "loss": 0.7854, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.603719599427754, | |
| "grad_norm": 2.242849111557007, | |
| "learning_rate": 3.1744598899698815e-07, | |
| "loss": 0.849, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 1.6051502145922747, | |
| "grad_norm": 4.343907356262207, | |
| "learning_rate": 3.152497509156543e-07, | |
| "loss": 0.7896, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.6065808297567954, | |
| "grad_norm": 3.1958019733428955, | |
| "learning_rate": 3.1306024408133354e-07, | |
| "loss": 0.7529, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 1.608011444921316, | |
| "grad_norm": 4.552048206329346, | |
| "learning_rate": 3.108774809338721e-07, | |
| "loss": 0.7182, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.6094420600858368, | |
| "grad_norm": 5.193328857421875, | |
| "learning_rate": 3.087014738748025e-07, | |
| "loss": 0.7959, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.6108726752503575, | |
| "grad_norm": 2.845714569091797, | |
| "learning_rate": 3.0653223526727086e-07, | |
| "loss": 0.8154, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.6123032904148784, | |
| "grad_norm": 1.2329682111740112, | |
| "learning_rate": 3.0436977743596823e-07, | |
| "loss": 0.7836, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 1.613733905579399, | |
| "grad_norm": 1.965511679649353, | |
| "learning_rate": 3.0221411266706067e-07, | |
| "loss": 0.865, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.61516452074392, | |
| "grad_norm": 1.1454925537109375, | |
| "learning_rate": 3.000652532081185e-07, | |
| "loss": 0.7543, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 1.6165951359084407, | |
| "grad_norm": 2.9890522956848145, | |
| "learning_rate": 2.979232112680466e-07, | |
| "loss": 0.7906, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.6180257510729614, | |
| "grad_norm": 3.1727652549743652, | |
| "learning_rate": 2.95787999017017e-07, | |
| "loss": 0.7959, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 1.6194563662374821, | |
| "grad_norm": 3.714076042175293, | |
| "learning_rate": 2.9365962858639733e-07, | |
| "loss": 0.7517, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.6208869814020028, | |
| "grad_norm": 2.584320545196533, | |
| "learning_rate": 2.915381120686825e-07, | |
| "loss": 0.7209, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 1.6223175965665235, | |
| "grad_norm": 2.656510829925537, | |
| "learning_rate": 2.8942346151742793e-07, | |
| "loss": 0.7495, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.6237482117310442, | |
| "grad_norm": 2.237746238708496, | |
| "learning_rate": 2.8731568894717843e-07, | |
| "loss": 0.7395, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 1.6251788268955651, | |
| "grad_norm": 3.3685977458953857, | |
| "learning_rate": 2.852148063334006e-07, | |
| "loss": 0.8202, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.6266094420600858, | |
| "grad_norm": 2.4544692039489746, | |
| "learning_rate": 2.831208256124167e-07, | |
| "loss": 0.8121, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 1.6280400572246065, | |
| "grad_norm": 3.5506999492645264, | |
| "learning_rate": 2.8103375868133424e-07, | |
| "loss": 0.7756, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.6294706723891275, | |
| "grad_norm": 3.116994619369507, | |
| "learning_rate": 2.789536173979794e-07, | |
| "loss": 0.8122, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 1.6309012875536482, | |
| "grad_norm": 8.765533447265625, | |
| "learning_rate": 2.768804135808313e-07, | |
| "loss": 0.6921, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.6323319027181689, | |
| "grad_norm": 2.5757832527160645, | |
| "learning_rate": 2.748141590089515e-07, | |
| "loss": 0.8041, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 1.6337625178826896, | |
| "grad_norm": 3.618260622024536, | |
| "learning_rate": 2.727548654219193e-07, | |
| "loss": 0.823, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.6351931330472103, | |
| "grad_norm": 1.3914964199066162, | |
| "learning_rate": 2.707025445197659e-07, | |
| "loss": 0.7844, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 1.636623748211731, | |
| "grad_norm": 5.06028413772583, | |
| "learning_rate": 2.686572079629054e-07, | |
| "loss": 0.8875, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.6380543633762517, | |
| "grad_norm": 4.079840183258057, | |
| "learning_rate": 2.6661886737206966e-07, | |
| "loss": 0.8285, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 1.6394849785407726, | |
| "grad_norm": 1.7283517122268677, | |
| "learning_rate": 2.6458753432824387e-07, | |
| "loss": 0.6827, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.6409155937052933, | |
| "grad_norm": 3.4194791316986084, | |
| "learning_rate": 2.625632203725979e-07, | |
| "loss": 0.7079, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 1.642346208869814, | |
| "grad_norm": 4.089590549468994, | |
| "learning_rate": 2.605459370064224e-07, | |
| "loss": 0.7858, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.643776824034335, | |
| "grad_norm": 1.229331135749817, | |
| "learning_rate": 2.58535695691064e-07, | |
| "loss": 0.791, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 1.6452074391988556, | |
| "grad_norm": 1.524109959602356, | |
| "learning_rate": 2.5653250784785883e-07, | |
| "loss": 0.7691, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.6466380543633763, | |
| "grad_norm": 2.404613494873047, | |
| "learning_rate": 2.545363848580679e-07, | |
| "loss": 0.703, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 1.648068669527897, | |
| "grad_norm": 1.462568759918213, | |
| "learning_rate": 2.525473380628127e-07, | |
| "loss": 0.7592, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.6494992846924177, | |
| "grad_norm": 2.3987367153167725, | |
| "learning_rate": 2.505653787630121e-07, | |
| "loss": 0.7462, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 1.6509298998569384, | |
| "grad_norm": 2.1042797565460205, | |
| "learning_rate": 2.4859051821931515e-07, | |
| "loss": 0.8334, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.652360515021459, | |
| "grad_norm": 2.755420446395874, | |
| "learning_rate": 2.466227676520395e-07, | |
| "loss": 0.8181, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 1.65379113018598, | |
| "grad_norm": 11.293910026550293, | |
| "learning_rate": 2.4466213824110745e-07, | |
| "loss": 0.7035, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.6552217453505007, | |
| "grad_norm": 1.2045400142669678, | |
| "learning_rate": 2.427086411259812e-07, | |
| "loss": 0.7634, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 1.6566523605150214, | |
| "grad_norm": 1.108940839767456, | |
| "learning_rate": 2.4076228740559996e-07, | |
| "loss": 0.7702, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.6580829756795423, | |
| "grad_norm": 7.84178352355957, | |
| "learning_rate": 2.3882308813831857e-07, | |
| "loss": 0.771, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 1.659513590844063, | |
| "grad_norm": 7.632559299468994, | |
| "learning_rate": 2.36891054341842e-07, | |
| "loss": 0.77, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.6609442060085837, | |
| "grad_norm": 2.9848833084106445, | |
| "learning_rate": 2.349661969931643e-07, | |
| "loss": 0.7671, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 1.6623748211731044, | |
| "grad_norm": 7.07189416885376, | |
| "learning_rate": 2.3304852702850688e-07, | |
| "loss": 0.772, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.6638054363376251, | |
| "grad_norm": 2.8970439434051514, | |
| "learning_rate": 2.3113805534325465e-07, | |
| "loss": 0.7272, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 1.6652360515021458, | |
| "grad_norm": 2.363818883895874, | |
| "learning_rate": 2.2923479279189464e-07, | |
| "loss": 0.7735, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 6.8089599609375, | |
| "learning_rate": 2.2733875018795586e-07, | |
| "loss": 0.7952, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 1.6680972818311874, | |
| "grad_norm": 5.405824661254883, | |
| "learning_rate": 2.2544993830394571e-07, | |
| "loss": 0.8125, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.6695278969957081, | |
| "grad_norm": 1.5793416500091553, | |
| "learning_rate": 2.2356836787128947e-07, | |
| "loss": 0.8465, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 1.670958512160229, | |
| "grad_norm": 1.910159945487976, | |
| "learning_rate": 2.2169404958027095e-07, | |
| "loss": 0.7499, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.6723891273247498, | |
| "grad_norm": 10.456279754638672, | |
| "learning_rate": 2.198269940799691e-07, | |
| "loss": 0.8234, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 1.6738197424892705, | |
| "grad_norm": 4.157973766326904, | |
| "learning_rate": 2.1796721197819868e-07, | |
| "loss": 0.8318, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.6752503576537912, | |
| "grad_norm": 0.991513729095459, | |
| "learning_rate": 2.1611471384145126e-07, | |
| "loss": 0.7611, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 1.6766809728183119, | |
| "grad_norm": 0.8860572576522827, | |
| "learning_rate": 2.1426951019483327e-07, | |
| "loss": 0.7057, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.6781115879828326, | |
| "grad_norm": 2.586033344268799, | |
| "learning_rate": 2.1243161152200629e-07, | |
| "loss": 0.8086, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 1.6795422031473533, | |
| "grad_norm": 2.3332133293151855, | |
| "learning_rate": 2.1060102826512983e-07, | |
| "loss": 0.7717, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.680972818311874, | |
| "grad_norm": 2.051971197128296, | |
| "learning_rate": 2.087777708247991e-07, | |
| "loss": 0.7448, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.6824034334763949, | |
| "grad_norm": 5.876535892486572, | |
| "learning_rate": 2.0696184955998675e-07, | |
| "loss": 0.7681, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.6838340486409156, | |
| "grad_norm": 2.5695269107818604, | |
| "learning_rate": 2.0515327478798601e-07, | |
| "loss": 0.8074, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 1.6852646638054365, | |
| "grad_norm": 8.719694137573242, | |
| "learning_rate": 2.033520567843491e-07, | |
| "loss": 0.8109, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.6866952789699572, | |
| "grad_norm": 2.353991985321045, | |
| "learning_rate": 2.015582057828302e-07, | |
| "loss": 0.7361, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 1.688125894134478, | |
| "grad_norm": 5.169013023376465, | |
| "learning_rate": 1.9977173197532845e-07, | |
| "loss": 0.8165, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.6895565092989986, | |
| "grad_norm": 1.9157449007034302, | |
| "learning_rate": 1.979926455118279e-07, | |
| "loss": 0.7044, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 1.6909871244635193, | |
| "grad_norm": 4.792452812194824, | |
| "learning_rate": 1.9622095650034077e-07, | |
| "loss": 0.7902, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.69241773962804, | |
| "grad_norm": 1.4491595029830933, | |
| "learning_rate": 1.94456675006851e-07, | |
| "loss": 0.7668, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 1.6938483547925607, | |
| "grad_norm": 2.2091567516326904, | |
| "learning_rate": 1.9269981105525559e-07, | |
| "loss": 0.7461, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.6952789699570814, | |
| "grad_norm": 1.9733930826187134, | |
| "learning_rate": 1.909503746273078e-07, | |
| "loss": 0.6816, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 1.6967095851216023, | |
| "grad_norm": 3.1535139083862305, | |
| "learning_rate": 1.89208375662562e-07, | |
| "loss": 0.8196, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.698140200286123, | |
| "grad_norm": 2.412435531616211, | |
| "learning_rate": 1.8747382405831515e-07, | |
| "loss": 0.7442, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 1.699570815450644, | |
| "grad_norm": 3.027723550796509, | |
| "learning_rate": 1.8574672966955125e-07, | |
| "loss": 0.823, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.7010014306151646, | |
| "grad_norm": 1.3484646081924438, | |
| "learning_rate": 1.8402710230888685e-07, | |
| "loss": 0.8225, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 1.7024320457796853, | |
| "grad_norm": 4.724902629852295, | |
| "learning_rate": 1.823149517465128e-07, | |
| "loss": 0.7957, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.703862660944206, | |
| "grad_norm": 3.8161709308624268, | |
| "learning_rate": 1.8061028771014004e-07, | |
| "loss": 0.8052, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 1.7052932761087267, | |
| "grad_norm": 2.081833839416504, | |
| "learning_rate": 1.7891311988494523e-07, | |
| "loss": 0.7378, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.7067238912732474, | |
| "grad_norm": 2.2098007202148438, | |
| "learning_rate": 1.772234579135138e-07, | |
| "loss": 0.7968, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 1.7081545064377681, | |
| "grad_norm": 1.3511004447937012, | |
| "learning_rate": 1.7554131139578622e-07, | |
| "loss": 0.8255, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.709585121602289, | |
| "grad_norm": 11.756885528564453, | |
| "learning_rate": 1.73866689889004e-07, | |
| "loss": 0.78, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 1.7110157367668097, | |
| "grad_norm": 1.7030614614486694, | |
| "learning_rate": 1.7219960290765402e-07, | |
| "loss": 0.8037, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.7124463519313304, | |
| "grad_norm": 3.0442252159118652, | |
| "learning_rate": 1.705400599234152e-07, | |
| "loss": 0.7357, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 1.7138769670958514, | |
| "grad_norm": 3.3682615756988525, | |
| "learning_rate": 1.6888807036510562e-07, | |
| "loss": 0.8288, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.715307582260372, | |
| "grad_norm": 1.4772732257843018, | |
| "learning_rate": 1.6724364361862682e-07, | |
| "loss": 0.8346, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 1.7167381974248928, | |
| "grad_norm": 1.5449028015136719, | |
| "learning_rate": 1.6560678902691223e-07, | |
| "loss": 0.6765, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.7181688125894135, | |
| "grad_norm": 1.7480943202972412, | |
| "learning_rate": 1.639775158898732e-07, | |
| "loss": 0.796, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 1.7195994277539342, | |
| "grad_norm": 4.165433406829834, | |
| "learning_rate": 1.62355833464347e-07, | |
| "loss": 0.752, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.7210300429184548, | |
| "grad_norm": 1.7983890771865845, | |
| "learning_rate": 1.6074175096404382e-07, | |
| "loss": 0.7895, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 1.7224606580829755, | |
| "grad_norm": 1.4561206102371216, | |
| "learning_rate": 1.5913527755949308e-07, | |
| "loss": 0.7682, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.7238912732474965, | |
| "grad_norm": 1.1166143417358398, | |
| "learning_rate": 1.5753642237799426e-07, | |
| "loss": 0.825, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 1.7253218884120172, | |
| "grad_norm": 1.4510133266448975, | |
| "learning_rate": 1.5594519450356204e-07, | |
| "loss": 0.7234, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.7267525035765379, | |
| "grad_norm": 3.046424627304077, | |
| "learning_rate": 1.5436160297687614e-07, | |
| "loss": 0.8216, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 1.7281831187410588, | |
| "grad_norm": 5.349708080291748, | |
| "learning_rate": 1.527856567952306e-07, | |
| "loss": 0.7233, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.7296137339055795, | |
| "grad_norm": 2.7202823162078857, | |
| "learning_rate": 1.5121736491248127e-07, | |
| "loss": 0.7901, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 1.7310443490701002, | |
| "grad_norm": 2.2550981044769287, | |
| "learning_rate": 1.4965673623899495e-07, | |
| "loss": 0.7899, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.7324749642346209, | |
| "grad_norm": 2.9862146377563477, | |
| "learning_rate": 1.481037796416009e-07, | |
| "loss": 0.7367, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 1.7339055793991416, | |
| "grad_norm": 0.9522223472595215, | |
| "learning_rate": 1.4655850394353738e-07, | |
| "loss": 0.7218, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.7353361945636623, | |
| "grad_norm": 2.175283670425415, | |
| "learning_rate": 1.450209179244038e-07, | |
| "loss": 0.8367, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 1.736766809728183, | |
| "grad_norm": 7.380995750427246, | |
| "learning_rate": 1.434910303201102e-07, | |
| "loss": 0.8238, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.738197424892704, | |
| "grad_norm": 1.5405120849609375, | |
| "learning_rate": 1.41968849822827e-07, | |
| "loss": 0.787, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 1.7396280400572246, | |
| "grad_norm": 3.323050022125244, | |
| "learning_rate": 1.404543850809364e-07, | |
| "loss": 0.7354, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.7410586552217453, | |
| "grad_norm": 16.810117721557617, | |
| "learning_rate": 1.389476446989828e-07, | |
| "loss": 0.7283, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 1.7424892703862662, | |
| "grad_norm": 1.5554410219192505, | |
| "learning_rate": 1.3744863723762457e-07, | |
| "loss": 0.8043, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.743919885550787, | |
| "grad_norm": 1.5318583250045776, | |
| "learning_rate": 1.359573712135842e-07, | |
| "loss": 0.8493, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 1.7453505007153076, | |
| "grad_norm": 1.6202287673950195, | |
| "learning_rate": 1.3447385509960085e-07, | |
| "loss": 0.7898, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.7467811158798283, | |
| "grad_norm": 2.8205626010894775, | |
| "learning_rate": 1.3299809732438277e-07, | |
| "loss": 0.7225, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 1.748211731044349, | |
| "grad_norm": 5.016057014465332, | |
| "learning_rate": 1.3153010627255728e-07, | |
| "loss": 0.8083, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.7496423462088697, | |
| "grad_norm": 3.2081761360168457, | |
| "learning_rate": 1.3006989028462536e-07, | |
| "loss": 0.806, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 1.7510729613733904, | |
| "grad_norm": 4.852132797241211, | |
| "learning_rate": 1.286174576569134e-07, | |
| "loss": 0.7865, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.7525035765379113, | |
| "grad_norm": 4.22651481628418, | |
| "learning_rate": 1.271728166415258e-07, | |
| "loss": 0.7865, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.7525035765379113, | |
| "eval_loss": 0.9261357188224792, | |
| "eval_runtime": 64.6017, | |
| "eval_samples_per_second": 6.424, | |
| "eval_steps_per_second": 0.402, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.753934191702432, | |
| "grad_norm": 1.1874042749404907, | |
| "learning_rate": 1.2573597544629795e-07, | |
| "loss": 0.7648, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.755364806866953, | |
| "grad_norm": 3.088524341583252, | |
| "learning_rate": 1.2430694223475087e-07, | |
| "loss": 0.8424, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 1.7567954220314737, | |
| "grad_norm": 2.089639902114868, | |
| "learning_rate": 1.2288572512604341e-07, | |
| "loss": 0.8197, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.7582260371959944, | |
| "grad_norm": 1.833664059638977, | |
| "learning_rate": 1.2147233219492627e-07, | |
| "loss": 0.6933, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 1.759656652360515, | |
| "grad_norm": 4.207241535186768, | |
| "learning_rate": 1.2006677147169754e-07, | |
| "loss": 0.8613, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.7610872675250357, | |
| "grad_norm": 5.451657772064209, | |
| "learning_rate": 1.1866905094215508e-07, | |
| "loss": 0.7253, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 1.7625178826895564, | |
| "grad_norm": 2.7151124477386475, | |
| "learning_rate": 1.1727917854755238e-07, | |
| "loss": 0.8098, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.7639484978540771, | |
| "grad_norm": 1.2078485488891602, | |
| "learning_rate": 1.1589716218455359e-07, | |
| "loss": 0.6965, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 1.7653791130185978, | |
| "grad_norm": 1.4734965562820435, | |
| "learning_rate": 1.1452300970518758e-07, | |
| "loss": 0.7128, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.7668097281831188, | |
| "grad_norm": 3.2850356101989746, | |
| "learning_rate": 1.1315672891680429e-07, | |
| "loss": 0.7104, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 1.7682403433476395, | |
| "grad_norm": 1.9388680458068848, | |
| "learning_rate": 1.117983275820304e-07, | |
| "loss": 0.7422, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.7696709585121604, | |
| "grad_norm": 82.46575164794922, | |
| "learning_rate": 1.1044781341872411e-07, | |
| "loss": 0.7632, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 1.771101573676681, | |
| "grad_norm": 3.337305784225464, | |
| "learning_rate": 1.0910519409993247e-07, | |
| "loss": 0.76, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.7725321888412018, | |
| "grad_norm": 2.8676528930664062, | |
| "learning_rate": 1.0777047725384786e-07, | |
| "loss": 0.7758, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 1.7739628040057225, | |
| "grad_norm": 21.342599868774414, | |
| "learning_rate": 1.064436704637633e-07, | |
| "loss": 0.8218, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.7753934191702432, | |
| "grad_norm": 1.6680625677108765, | |
| "learning_rate": 1.0512478126803071e-07, | |
| "loss": 0.7485, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 1.7768240343347639, | |
| "grad_norm": 5.325804710388184, | |
| "learning_rate": 1.038138171600177e-07, | |
| "loss": 0.7723, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.7782546494992846, | |
| "grad_norm": 3.2667267322540283, | |
| "learning_rate": 1.0251078558806486e-07, | |
| "loss": 0.77, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 1.7796852646638053, | |
| "grad_norm": 3.370208501815796, | |
| "learning_rate": 1.0121569395544272e-07, | |
| "loss": 0.8516, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.7811158798283262, | |
| "grad_norm": 4.472996711730957, | |
| "learning_rate": 9.9928549620312e-08, | |
| "loss": 0.8197, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 1.782546494992847, | |
| "grad_norm": 2.5200583934783936, | |
| "learning_rate": 9.864935989567874e-08, | |
| "loss": 0.7444, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.7839771101573678, | |
| "grad_norm": 2.0389504432678223, | |
| "learning_rate": 9.737813204935497e-08, | |
| "loss": 0.7552, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 1.7854077253218885, | |
| "grad_norm": 3.2909703254699707, | |
| "learning_rate": 9.611487330391688e-08, | |
| "loss": 0.8065, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.7868383404864092, | |
| "grad_norm": 3.057483434677124, | |
| "learning_rate": 9.485959083666324e-08, | |
| "loss": 0.7563, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 1.78826895565093, | |
| "grad_norm": 8.188149452209473, | |
| "learning_rate": 9.361229177957486e-08, | |
| "loss": 0.757, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.7896995708154506, | |
| "grad_norm": 2.4237565994262695, | |
| "learning_rate": 9.23729832192749e-08, | |
| "loss": 0.7992, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 1.7911301859799713, | |
| "grad_norm": 1.6685830354690552, | |
| "learning_rate": 9.114167219698744e-08, | |
| "loss": 0.7748, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.792560801144492, | |
| "grad_norm": 4.239346981048584, | |
| "learning_rate": 8.991836570849743e-08, | |
| "loss": 0.7456, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 1.7939914163090127, | |
| "grad_norm": 2.0644781589508057, | |
| "learning_rate": 8.870307070411288e-08, | |
| "loss": 0.8112, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.7954220314735336, | |
| "grad_norm": 1.9066531658172607, | |
| "learning_rate": 8.749579408862269e-08, | |
| "loss": 0.7299, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 1.7968526466380543, | |
| "grad_norm": 5.838130950927734, | |
| "learning_rate": 8.629654272125887e-08, | |
| "loss": 0.7255, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.7982832618025753, | |
| "grad_norm": 2.705153226852417, | |
| "learning_rate": 8.510532341565807e-08, | |
| "loss": 0.7872, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 1.799713876967096, | |
| "grad_norm": 2.5030932426452637, | |
| "learning_rate": 8.392214293982165e-08, | |
| "loss": 0.6766, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.8011444921316166, | |
| "grad_norm": 2.564344882965088, | |
| "learning_rate": 8.274700801607744e-08, | |
| "loss": 0.7533, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 1.8025751072961373, | |
| "grad_norm": 9.728470802307129, | |
| "learning_rate": 8.157992532104269e-08, | |
| "loss": 0.8039, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.804005722460658, | |
| "grad_norm": 3.0228323936462402, | |
| "learning_rate": 8.042090148558479e-08, | |
| "loss": 0.8776, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 1.8054363376251787, | |
| "grad_norm": 1.9111461639404297, | |
| "learning_rate": 7.926994309478403e-08, | |
| "loss": 0.8547, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.8068669527896994, | |
| "grad_norm": 2.468275547027588, | |
| "learning_rate": 7.812705668789671e-08, | |
| "loss": 0.7513, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 1.8082975679542204, | |
| "grad_norm": 2.3994827270507812, | |
| "learning_rate": 7.699224875831717e-08, | |
| "loss": 0.8268, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.809728183118741, | |
| "grad_norm": 1.6180237531661987, | |
| "learning_rate": 7.586552575354144e-08, | |
| "loss": 0.7764, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 1.8111587982832618, | |
| "grad_norm": 2.5159072875976562, | |
| "learning_rate": 7.47468940751303e-08, | |
| "loss": 0.8373, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.8125894134477827, | |
| "grad_norm": 1.236160159111023, | |
| "learning_rate": 7.36363600786733e-08, | |
| "loss": 0.7767, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 1.8140200286123034, | |
| "grad_norm": 3.060023307800293, | |
| "learning_rate": 7.253393007375231e-08, | |
| "loss": 0.8235, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.815450643776824, | |
| "grad_norm": 4.7666120529174805, | |
| "learning_rate": 7.143961032390533e-08, | |
| "loss": 0.7897, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 1.8168812589413448, | |
| "grad_norm": 1.959795594215393, | |
| "learning_rate": 7.035340704659244e-08, | |
| "loss": 0.8028, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.8183118741058655, | |
| "grad_norm": 1.297690510749817, | |
| "learning_rate": 6.927532641315821e-08, | |
| "loss": 0.776, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 1.8197424892703862, | |
| "grad_norm": 3.889566421508789, | |
| "learning_rate": 6.8205374548798e-08, | |
| "loss": 0.822, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.8211731044349069, | |
| "grad_norm": 2.258944272994995, | |
| "learning_rate": 6.714355753252394e-08, | |
| "loss": 0.8079, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 1.8226037195994278, | |
| "grad_norm": 3.4968879222869873, | |
| "learning_rate": 6.60898813971283e-08, | |
| "loss": 0.7688, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.8240343347639485, | |
| "grad_norm": 2.931837797164917, | |
| "learning_rate": 6.504435212915049e-08, | |
| "loss": 0.7655, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.8254649499284692, | |
| "grad_norm": 1.8585553169250488, | |
| "learning_rate": 6.400697566884367e-08, | |
| "loss": 0.7458, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.8268955650929901, | |
| "grad_norm": 1.4828190803527832, | |
| "learning_rate": 6.297775791013933e-08, | |
| "loss": 0.7337, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 1.8283261802575108, | |
| "grad_norm": 3.6852729320526123, | |
| "learning_rate": 6.195670470061505e-08, | |
| "loss": 0.7259, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.8297567954220315, | |
| "grad_norm": 2.430832624435425, | |
| "learning_rate": 6.094382184146085e-08, | |
| "loss": 0.8294, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 1.8311874105865522, | |
| "grad_norm": 1.5084558725357056, | |
| "learning_rate": 5.99391150874466e-08, | |
| "loss": 0.8652, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.832618025751073, | |
| "grad_norm": 3.6525607109069824, | |
| "learning_rate": 5.894259014688824e-08, | |
| "loss": 0.7514, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 1.8340486409155936, | |
| "grad_norm": 1.948525309562683, | |
| "learning_rate": 5.7954252681617304e-08, | |
| "loss": 0.7769, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.8354792560801143, | |
| "grad_norm": 1.8478093147277832, | |
| "learning_rate": 5.697410830694633e-08, | |
| "loss": 0.8044, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 1.8369098712446352, | |
| "grad_norm": 1.2266122102737427, | |
| "learning_rate": 5.600216259163893e-08, | |
| "loss": 0.7641, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.838340486409156, | |
| "grad_norm": 0.9987815618515015, | |
| "learning_rate": 5.5038421057877654e-08, | |
| "loss": 0.6867, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 1.8397711015736766, | |
| "grad_norm": 2.728739023208618, | |
| "learning_rate": 5.4082889181231497e-08, | |
| "loss": 0.8508, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.8412017167381975, | |
| "grad_norm": 1.628726840019226, | |
| "learning_rate": 5.313557239062627e-08, | |
| "loss": 0.7974, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 1.8426323319027182, | |
| "grad_norm": 2.028298854827881, | |
| "learning_rate": 5.219647606831329e-08, | |
| "loss": 0.7859, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.844062947067239, | |
| "grad_norm": 2.2269015312194824, | |
| "learning_rate": 5.126560554983822e-08, | |
| "loss": 0.9191, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 1.8454935622317596, | |
| "grad_norm": 5.080014705657959, | |
| "learning_rate": 5.034296612401129e-08, | |
| "loss": 0.6733, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.8469241773962803, | |
| "grad_norm": 3.053027629852295, | |
| "learning_rate": 4.942856303287779e-08, | |
| "loss": 0.7883, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 1.848354792560801, | |
| "grad_norm": 1.7156245708465576, | |
| "learning_rate": 4.852240147168696e-08, | |
| "loss": 0.7215, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.8497854077253217, | |
| "grad_norm": 1.3909878730773926, | |
| "learning_rate": 4.762448658886298e-08, | |
| "loss": 0.8188, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 1.8512160228898427, | |
| "grad_norm": 5.936245441436768, | |
| "learning_rate": 4.673482348597685e-08, | |
| "loss": 0.8267, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.8526466380543634, | |
| "grad_norm": 18.523326873779297, | |
| "learning_rate": 4.585341721771574e-08, | |
| "loss": 0.7863, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 1.8540772532188843, | |
| "grad_norm": 2.28387713432312, | |
| "learning_rate": 4.4980272791855015e-08, | |
| "loss": 0.8343, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.855507868383405, | |
| "grad_norm": 1.3548191785812378, | |
| "learning_rate": 4.4115395169230074e-08, | |
| "loss": 0.7428, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 1.8569384835479257, | |
| "grad_norm": 3.7556676864624023, | |
| "learning_rate": 4.325878926370791e-08, | |
| "loss": 0.7839, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.8583690987124464, | |
| "grad_norm": 3.6090095043182373, | |
| "learning_rate": 4.241045994215842e-08, | |
| "loss": 0.8006, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 1.859799713876967, | |
| "grad_norm": 1.8558502197265625, | |
| "learning_rate": 4.157041202442863e-08, | |
| "loss": 0.7306, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.8612303290414878, | |
| "grad_norm": 1.3088663816452026, | |
| "learning_rate": 4.0738650283313025e-08, | |
| "loss": 0.7975, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 1.8626609442060085, | |
| "grad_norm": 1.1639654636383057, | |
| "learning_rate": 3.991517944452827e-08, | |
| "loss": 0.7781, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.8640915593705292, | |
| "grad_norm": 2.453809976577759, | |
| "learning_rate": 3.9100004186685354e-08, | |
| "loss": 0.8048, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 1.86552217453505, | |
| "grad_norm": 1.307875394821167, | |
| "learning_rate": 3.8293129141263485e-08, | |
| "loss": 0.7623, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.8669527896995708, | |
| "grad_norm": 2.7597270011901855, | |
| "learning_rate": 3.7494558892583405e-08, | |
| "loss": 0.7839, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 1.8683834048640917, | |
| "grad_norm": 3.5831847190856934, | |
| "learning_rate": 3.670429797778163e-08, | |
| "loss": 0.7739, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.8698140200286124, | |
| "grad_norm": 2.250288724899292, | |
| "learning_rate": 3.592235088678458e-08, | |
| "loss": 0.7752, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 1.871244635193133, | |
| "grad_norm": 2.3639230728149414, | |
| "learning_rate": 3.514872206228298e-08, | |
| "loss": 0.8142, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.8726752503576538, | |
| "grad_norm": 10.222983360290527, | |
| "learning_rate": 3.438341589970684e-08, | |
| "loss": 0.7631, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 1.8741058655221745, | |
| "grad_norm": 1.4523752927780151, | |
| "learning_rate": 3.3626436747200175e-08, | |
| "loss": 0.8136, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.8755364806866952, | |
| "grad_norm": 1.7456223964691162, | |
| "learning_rate": 3.287778890559684e-08, | |
| "loss": 0.7797, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 1.876967095851216, | |
| "grad_norm": 1.4522265195846558, | |
| "learning_rate": 3.2137476628395054e-08, | |
| "loss": 0.7736, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.8783977110157366, | |
| "grad_norm": 6.369755744934082, | |
| "learning_rate": 3.1405504121734593e-08, | |
| "loss": 0.7719, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 1.8798283261802575, | |
| "grad_norm": 2.3526201248168945, | |
| "learning_rate": 3.0681875544371796e-08, | |
| "loss": 0.8312, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.8812589413447782, | |
| "grad_norm": 3.876243829727173, | |
| "learning_rate": 2.9966595007656416e-08, | |
| "loss": 0.7576, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 1.8826895565092991, | |
| "grad_norm": 2.7545125484466553, | |
| "learning_rate": 2.9259666575508494e-08, | |
| "loss": 0.7619, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.8841201716738198, | |
| "grad_norm": 9.593175888061523, | |
| "learning_rate": 2.856109426439435e-08, | |
| "loss": 0.8205, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 1.8855507868383405, | |
| "grad_norm": 5.5158257484436035, | |
| "learning_rate": 2.7870882043304957e-08, | |
| "loss": 0.7339, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.8869814020028612, | |
| "grad_norm": 1.1985629796981812, | |
| "learning_rate": 2.7189033833732614e-08, | |
| "loss": 0.8216, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 1.888412017167382, | |
| "grad_norm": 2.041839838027954, | |
| "learning_rate": 2.6515553509648793e-08, | |
| "loss": 0.7589, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.8898426323319026, | |
| "grad_norm": 2.407585859298706, | |
| "learning_rate": 2.5850444897482172e-08, | |
| "loss": 0.8723, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 1.8912732474964233, | |
| "grad_norm": 1.7742396593093872, | |
| "learning_rate": 2.519371177609714e-08, | |
| "loss": 0.8111, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.8927038626609443, | |
| "grad_norm": 1.1010509729385376, | |
| "learning_rate": 2.454535787677181e-08, | |
| "loss": 0.8269, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 1.894134477825465, | |
| "grad_norm": 2.547274351119995, | |
| "learning_rate": 2.3905386883177228e-08, | |
| "loss": 0.7992, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.8955650929899857, | |
| "grad_norm": 1.671331763267517, | |
| "learning_rate": 2.3273802431356684e-08, | |
| "loss": 0.793, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 1.8969957081545066, | |
| "grad_norm": 3.759086847305298, | |
| "learning_rate": 2.2650608109704263e-08, | |
| "loss": 0.8215, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.8984263233190273, | |
| "grad_norm": 2.3819046020507812, | |
| "learning_rate": 2.2035807458944845e-08, | |
| "loss": 0.7701, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 1.899856938483548, | |
| "grad_norm": 1.7277506589889526, | |
| "learning_rate": 2.1429403972114626e-08, | |
| "loss": 0.8075, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.9012875536480687, | |
| "grad_norm": 2.645439863204956, | |
| "learning_rate": 2.083140109453996e-08, | |
| "loss": 0.7018, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 1.9027181688125894, | |
| "grad_norm": 3.964482545852661, | |
| "learning_rate": 2.0241802223818884e-08, | |
| "loss": 0.7789, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.90414878397711, | |
| "grad_norm": 5.473621845245361, | |
| "learning_rate": 1.966061070980163e-08, | |
| "loss": 0.7389, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 1.9055793991416308, | |
| "grad_norm": 3.40977144241333, | |
| "learning_rate": 1.9087829854571137e-08, | |
| "loss": 0.82, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.9070100143061517, | |
| "grad_norm": 2.368593692779541, | |
| "learning_rate": 1.8523462912424405e-08, | |
| "loss": 0.8084, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 1.9084406294706724, | |
| "grad_norm": 1.9491324424743652, | |
| "learning_rate": 1.7967513089854336e-08, | |
| "loss": 0.791, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.909871244635193, | |
| "grad_norm": 2.3171393871307373, | |
| "learning_rate": 1.741998354553176e-08, | |
| "loss": 0.7305, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 1.911301859799714, | |
| "grad_norm": 1.2715893983840942, | |
| "learning_rate": 1.6880877390286264e-08, | |
| "loss": 0.7664, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.9127324749642347, | |
| "grad_norm": 2.1280972957611084, | |
| "learning_rate": 1.6350197687089897e-08, | |
| "loss": 0.7713, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 1.9141630901287554, | |
| "grad_norm": 1.0025123357772827, | |
| "learning_rate": 1.582794745103916e-08, | |
| "loss": 0.7392, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.915593705293276, | |
| "grad_norm": 2.628035545349121, | |
| "learning_rate": 1.5314129649337537e-08, | |
| "loss": 0.7828, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 1.9170243204577968, | |
| "grad_norm": 2.137150764465332, | |
| "learning_rate": 1.4808747201279171e-08, | |
| "loss": 0.8359, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.9184549356223175, | |
| "grad_norm": 27.349082946777344, | |
| "learning_rate": 1.4311802978232535e-08, | |
| "loss": 0.6619, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 1.9198855507868382, | |
| "grad_norm": 1.4242587089538574, | |
| "learning_rate": 1.3823299803622957e-08, | |
| "loss": 0.7845, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.9213161659513591, | |
| "grad_norm": 2.508871555328369, | |
| "learning_rate": 1.334324045291796e-08, | |
| "loss": 0.8064, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 1.9227467811158798, | |
| "grad_norm": 12.176376342773438, | |
| "learning_rate": 1.2871627653610608e-08, | |
| "loss": 0.7454, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.9241773962804005, | |
| "grad_norm": 1.3289920091629028, | |
| "learning_rate": 1.2408464085204019e-08, | |
| "loss": 0.8334, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 1.9256080114449214, | |
| "grad_norm": 2.553537368774414, | |
| "learning_rate": 1.1953752379196715e-08, | |
| "loss": 0.6796, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.9270386266094421, | |
| "grad_norm": 4.845339775085449, | |
| "learning_rate": 1.150749511906729e-08, | |
| "loss": 0.8347, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 1.9284692417739628, | |
| "grad_norm": 1.5274536609649658, | |
| "learning_rate": 1.106969484025977e-08, | |
| "loss": 0.7211, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.9298998569384835, | |
| "grad_norm": 6.8586883544921875, | |
| "learning_rate": 1.0640354030168776e-08, | |
| "loss": 0.7573, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 1.9313304721030042, | |
| "grad_norm": 17.5329647064209, | |
| "learning_rate": 1.0219475128126377e-08, | |
| "loss": 0.7283, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.932761087267525, | |
| "grad_norm": 1.1714322566986084, | |
| "learning_rate": 9.807060525387602e-09, | |
| "loss": 0.7442, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 1.9341917024320456, | |
| "grad_norm": 6.689727306365967, | |
| "learning_rate": 9.403112565116612e-09, | |
| "loss": 0.817, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.9356223175965666, | |
| "grad_norm": 4.254633903503418, | |
| "learning_rate": 9.00763354237405e-09, | |
| "loss": 0.7439, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 1.9370529327610873, | |
| "grad_norm": 0.9951338171958923, | |
| "learning_rate": 8.62062570410338e-09, | |
| "loss": 0.7618, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.9384835479256082, | |
| "grad_norm": 1.1761829853057861, | |
| "learning_rate": 8.242091249118732e-09, | |
| "loss": 0.744, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 1.9399141630901289, | |
| "grad_norm": 16.564828872680664, | |
| "learning_rate": 7.87203232809175e-09, | |
| "loss": 0.7898, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.9413447782546496, | |
| "grad_norm": 2.6274590492248535, | |
| "learning_rate": 7.510451043539923e-09, | |
| "loss": 0.9064, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 1.9427753934191703, | |
| "grad_norm": 3.510563373565674, | |
| "learning_rate": 7.15734944981411e-09, | |
| "loss": 0.7994, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.944206008583691, | |
| "grad_norm": 1.1502721309661865, | |
| "learning_rate": 6.812729553087704e-09, | |
| "loss": 0.7258, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 1.9456366237482117, | |
| "grad_norm": 2.471219539642334, | |
| "learning_rate": 6.4765933113439815e-09, | |
| "loss": 0.7513, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.9470672389127324, | |
| "grad_norm": 2.596886157989502, | |
| "learning_rate": 6.148942634366439e-09, | |
| "loss": 0.8226, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 1.948497854077253, | |
| "grad_norm": 1.2276873588562012, | |
| "learning_rate": 5.829779383726808e-09, | |
| "loss": 0.7847, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.949928469241774, | |
| "grad_norm": 10.255902290344238, | |
| "learning_rate": 5.5191053727748905e-09, | |
| "loss": 0.8118, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 1.9513590844062947, | |
| "grad_norm": 1.835872769355774, | |
| "learning_rate": 5.216922366628074e-09, | |
| "loss": 0.7836, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.9527896995708156, | |
| "grad_norm": 3.529498338699341, | |
| "learning_rate": 4.923232082161999e-09, | |
| "loss": 0.7899, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 1.9542203147353363, | |
| "grad_norm": 3.05070424079895, | |
| "learning_rate": 4.638036187999739e-09, | |
| "loss": 0.8756, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.955650929899857, | |
| "grad_norm": 3.6936333179473877, | |
| "learning_rate": 4.361336304503305e-09, | |
| "loss": 0.8157, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 1.9570815450643777, | |
| "grad_norm": 52.86602020263672, | |
| "learning_rate": 4.0931340037633214e-09, | |
| "loss": 0.7565, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.9585121602288984, | |
| "grad_norm": 1.9639256000518799, | |
| "learning_rate": 3.833430809591698e-09, | |
| "loss": 0.7229, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 1.959942775393419, | |
| "grad_norm": 1.0925458669662476, | |
| "learning_rate": 3.5822281975111395e-09, | |
| "loss": 0.7935, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.9613733905579398, | |
| "grad_norm": 7.261877059936523, | |
| "learning_rate": 3.3395275947481484e-09, | |
| "loss": 0.7111, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 1.9628040057224605, | |
| "grad_norm": 1.3601925373077393, | |
| "learning_rate": 3.105330380224536e-09, | |
| "loss": 0.7941, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.9642346208869814, | |
| "grad_norm": 5.424409866333008, | |
| "learning_rate": 2.8796378845489245e-09, | |
| "loss": 0.8544, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 1.9656652360515021, | |
| "grad_norm": 2.5825531482696533, | |
| "learning_rate": 2.6624513900102565e-09, | |
| "loss": 0.763, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.967095851216023, | |
| "grad_norm": 1.6688388586044312, | |
| "learning_rate": 2.453772130569798e-09, | |
| "loss": 0.7661, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 1.9685264663805437, | |
| "grad_norm": 2.8896663188934326, | |
| "learning_rate": 2.253601291854479e-09, | |
| "loss": 0.7118, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.9699570815450644, | |
| "grad_norm": 3.865675210952759, | |
| "learning_rate": 2.061940011149566e-09, | |
| "loss": 0.8666, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 1.9713876967095851, | |
| "grad_norm": 1.1079707145690918, | |
| "learning_rate": 1.8787893773931643e-09, | |
| "loss": 0.732, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.9728183118741058, | |
| "grad_norm": 1.7514995336532593, | |
| "learning_rate": 1.7041504311692268e-09, | |
| "loss": 0.7525, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 1.9742489270386265, | |
| "grad_norm": 2.8372395038604736, | |
| "learning_rate": 1.5380241647020564e-09, | |
| "loss": 0.8642, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.9756795422031472, | |
| "grad_norm": 2.040043592453003, | |
| "learning_rate": 1.3804115218503112e-09, | |
| "loss": 0.8039, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 1.977110157367668, | |
| "grad_norm": 1.864327311515808, | |
| "learning_rate": 1.2313133981020074e-09, | |
| "loss": 0.8012, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.9785407725321889, | |
| "grad_norm": 3.190329074859619, | |
| "learning_rate": 1.090730640569193e-09, | |
| "loss": 0.7958, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 1.9799713876967096, | |
| "grad_norm": 2.387249708175659, | |
| "learning_rate": 9.58664047983615e-10, | |
| "loss": 0.678, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.9814020028612305, | |
| "grad_norm": 8.965396881103516, | |
| "learning_rate": 8.351143706910591e-10, | |
| "loss": 0.7806, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 1.9828326180257512, | |
| "grad_norm": 1.7268003225326538, | |
| "learning_rate": 7.200823106485177e-10, | |
| "loss": 0.8479, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.9842632331902719, | |
| "grad_norm": 2.4086053371429443, | |
| "learning_rate": 6.13568521419361e-10, | |
| "loss": 0.7753, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 1.9856938483547926, | |
| "grad_norm": 2.2736830711364746, | |
| "learning_rate": 5.155736081691731e-10, | |
| "loss": 0.7656, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.9871244635193133, | |
| "grad_norm": 14.675554275512695, | |
| "learning_rate": 4.2609812766375435e-10, | |
| "loss": 0.7532, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 1.988555078683834, | |
| "grad_norm": 1.2827365398406982, | |
| "learning_rate": 3.451425882646242e-10, | |
| "loss": 0.7951, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.9899856938483547, | |
| "grad_norm": 3.003237009048462, | |
| "learning_rate": 2.727074499266902e-10, | |
| "loss": 0.6748, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 1.9914163090128756, | |
| "grad_norm": 3.035632610321045, | |
| "learning_rate": 2.0879312419574969e-10, | |
| "loss": 0.7217, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.9928469241773963, | |
| "grad_norm": 1.3384627103805542, | |
| "learning_rate": 1.5339997420549256e-10, | |
| "loss": 0.7284, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 1.994277539341917, | |
| "grad_norm": 2.3145205974578857, | |
| "learning_rate": 1.065283146765017e-10, | |
| "loss": 0.7841, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.995708154506438, | |
| "grad_norm": 3.6275460720062256, | |
| "learning_rate": 6.817841191358865e-11, | |
| "loss": 0.858, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 1.9971387696709586, | |
| "grad_norm": 3.475882053375244, | |
| "learning_rate": 3.83504838046278e-11, | |
| "loss": 0.7867, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.9985693848354793, | |
| "grad_norm": 4.988249778747559, | |
| "learning_rate": 1.7044699819057652e-11, | |
| "loss": 0.748, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.542473554611206, | |
| "learning_rate": 4.261181007381154e-12, | |
| "loss": 0.8327, | |
| "step": 1398 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1398, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 350, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.3983264234067198e+20, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |