| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.976631448884397, | |
| "global_step": 1090500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4e-05, | |
| "loss": 9.6608, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 8e-05, | |
| "loss": 8.6223, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00012, | |
| "loss": 8.3175, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00016, | |
| "loss": 7.9745, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0002, | |
| "loss": 7.6776, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00024, | |
| "loss": 7.4451, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00028, | |
| "loss": 7.2587, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00032, | |
| "loss": 7.0977, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00036, | |
| "loss": 6.9377, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0004, | |
| "loss": 6.8182, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0003999999998815762, | |
| "loss": 6.6945, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0003999999995263047, | |
| "loss": 6.5851, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00039999999893418564, | |
| "loss": 6.476, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0003999999981052189, | |
| "loss": 6.3753, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00039999999703940455, | |
| "loss": 6.2997, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00039999933291862616, | |
| "loss": 5.9559, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0003999994734068435, | |
| "loss": 6.1649, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0003999988151660478, | |
| "loss": 5.8819, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.000399997893630147, | |
| "loss": 5.8437, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0003999967088003543, | |
| "loss": 5.857, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00039999526067822954, | |
| "loss": 5.7574, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00039999354926567907, | |
| "loss": 5.6647, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00039999157456495604, | |
| "loss": 5.598, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00039998933657865997, | |
| "loss": 5.528, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00039998683530973725, | |
| "loss": 5.4848, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0003999840707614807, | |
| "loss": 5.4314, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0003999810429375299, | |
| "loss": 5.3931, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00039997775526446917, | |
| "loss": 5.3531, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00039997420116469963, | |
| "loss": 5.3126, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0003999703877510894, | |
| "loss": 5.2782, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.000399966307394198, | |
| "loss": 5.2575, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0003999619682600994, | |
| "loss": 5.2297, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0003999573616675516, | |
| "loss": 5.1976, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00039995249683579117, | |
| "loss": 5.182, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00039994736403182074, | |
| "loss": 5.1576, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00039994197352799087, | |
| "loss": 5.1435, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0003999363145395998, | |
| "loss": 5.1286, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0003999303983920581, | |
| "loss": 5.1065, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0003999242132490164, | |
| "loss": 5.0946, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0003999177714888857, | |
| "loss": 5.0748, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00039991106022373136, | |
| "loss": 5.0674, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0003999040928848998, | |
| "loss": 5.0493, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0003998968555329385, | |
| "loss": 5.039, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0003998893626520587, | |
| "loss": 5.0348, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0003998815992513638, | |
| "loss": 5.0201, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00039987357270987667, | |
| "loss": 5.0161, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0003998652914592657, | |
| "loss": 4.9991, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00039985673893135445, | |
| "loss": 4.9971, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0003998479232953792, | |
| "loss": 4.9871, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0003998388445629455, | |
| "loss": 4.9771, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0003998295122192289, | |
| "loss": 4.9726, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0003998199075931465, | |
| "loss": 4.9669, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0003998100499065675, | |
| "loss": 4.9583, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00039979992969921984, | |
| "loss": 4.9556, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0003997895364597799, | |
| "loss": 4.942, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0003997788909883795, | |
| "loss": 4.9406, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00039976797198678043, | |
| "loss": 4.9323, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00039975680130732954, | |
| "loss": 4.9277, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0003997453566010126, | |
| "loss": 4.923, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0003997336489332646, | |
| "loss": 4.9197, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0003997216904214485, | |
| "loss": 4.9051, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00039970945714034553, | |
| "loss": 4.9077, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0003996969609450725, | |
| "loss": 4.9002, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0003996842147424852, | |
| "loss": 4.9013, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00039967119303144363, | |
| "loss": 4.8946, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00039965792187247553, | |
| "loss": 4.8882, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00039964437471416833, | |
| "loss": 4.8894, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0003996305786686345, | |
| "loss": 4.8764, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0003996165061343288, | |
| "loss": 4.8782, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0003996021852748057, | |
| "loss": 4.8759, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00039958758743853225, | |
| "loss": 4.8727, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0003995727418403572, | |
| "loss": 4.8669, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0003995576187789104, | |
| "loss": 4.8694, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00039954224852018107, | |
| "loss": 4.8688, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0003995266003131184, | |
| "loss": 4.86, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00039951070547469266, | |
| "loss": 4.8559, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00039949453220433417, | |
| "loss": 4.8543, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00039947811286982935, | |
| "loss": 4.8515, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0003994614146212571, | |
| "loss": 4.8498, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00039944447087704996, | |
| "loss": 4.8443, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0003994272477381079, | |
| "loss": 4.8399, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0003994097796733338, | |
| "loss": 4.8381, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00039939203173462723, | |
| "loss": 4.8381, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00039937403944117984, | |
| "loss": 4.8354, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00039935576679607466, | |
| "loss": 4.834, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0003993372503686054, | |
| "loss": 4.8337, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0003993184531132279, | |
| "loss": 4.8304, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00039929939345843064, | |
| "loss": 4.8254, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.000399280090882382, | |
| "loss": 4.8248, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0003992605067667017, | |
| "loss": 4.8248, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0003992406803053476, | |
| "loss": 4.8246, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00039922057183181, | |
| "loss": 4.8173, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00039920024209092803, | |
| "loss": 4.8128, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00039917960962754717, | |
| "loss": 4.818, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00039915871494753167, | |
| "loss": 4.8107, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00039913755807838893, | |
| "loss": 4.8121, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00039911613904797174, | |
| "loss": 4.8116, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0003990944796965674, | |
| "loss": 4.8057, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0003990725366906298, | |
| "loss": 4.8055, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0003990503762807127, | |
| "loss": 4.8028, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00039902790967672147, | |
| "loss": 4.7969, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0003990052039152944, | |
| "loss": 4.8025, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00039898221356934855, | |
| "loss": 4.8017, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00039895896126663653, | |
| "loss": 4.7986, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0003989354470377698, | |
| "loss": 4.7991, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00039891169482063473, | |
| "loss": 4.7965, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00039888765709451975, | |
| "loss": 4.792, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00039886338196645364, | |
| "loss": 4.7862, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00039883882086954475, | |
| "loss": 4.7916, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.000398814022958251, | |
| "loss": 4.7883, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00039878893861975594, | |
| "loss": 4.7908, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00039876359257893807, | |
| "loss": 4.7877, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0003987379848691651, | |
| "loss": 4.7873, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00039871214152416957, | |
| "loss": 4.7876, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00039868601083955114, | |
| "loss": 4.7883, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00039865964511100514, | |
| "loss": 4.7893, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0003986329915890061, | |
| "loss": 4.7789, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00039860610361561096, | |
| "loss": 4.7815, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0003985789273964466, | |
| "loss": 4.7738, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0003985515173196509, | |
| "loss": 4.7753, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00039852381854628627, | |
| "loss": 4.7724, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00039849588651028544, | |
| "loss": 4.7726, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0003984676653284346, | |
| "loss": 4.7685, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0003984392114801697, | |
| "loss": 4.7715, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0003984104680382948, | |
| "loss": 4.7713, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00039838149252745204, | |
| "loss": 4.7698, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0003983522269767629, | |
| "loss": 4.7753, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00039832272995577275, | |
| "loss": 4.7652, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0003982929424502255, | |
| "loss": 4.7664, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00039826292407426207, | |
| "loss": 4.7713, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00039823264521022384, | |
| "loss": 4.7628, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0003982020751975389, | |
| "loss": 4.7682, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00039817124425512714, | |
| "loss": 4.7644, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0003981401524235768, | |
| "loss": 4.758, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00039810883122677967, | |
| "loss": 4.7622, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0003980772180008777, | |
| "loss": 4.762, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0003980453760138509, | |
| "loss": 4.7571, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00039801324155990393, | |
| "loss": 4.7619, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00039798091147522796, | |
| "loss": 4.7618, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0003979482562229017, | |
| "loss": 4.762, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.000397915340374997, | |
| "loss": 4.7562, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00039788216397484706, | |
| "loss": 4.7528, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00039784876063314606, | |
| "loss": 4.7567, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00039781506352031947, | |
| "loss": 4.7554, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00039778114007485855, | |
| "loss": 4.7494, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00039774692242662465, | |
| "loss": 4.7591, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00039771244444786484, | |
| "loss": 4.7605, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0003976777061839689, | |
| "loss": 4.7469, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00039764274280914674, | |
| "loss": 4.7506, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00039760748437268835, | |
| "loss": 4.7506, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0003975720014377832, | |
| "loss": 4.7509, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00039753622301424524, | |
| "loss": 4.7488, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00039750022070592105, | |
| "loss": 4.7544, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0003974639224835218, | |
| "loss": 4.7502, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0003974274009911748, | |
| "loss": 4.7433, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00039739058316086716, | |
| "loss": 4.7466, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0003973535798838411, | |
| "loss": 4.7469, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0003973162428990996, | |
| "loss": 4.7414, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00039727864615081464, | |
| "loss": 4.7418, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0003972407896884818, | |
| "loss": 4.7484, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00039720271180775053, | |
| "loss": 4.7454, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0003971643363267646, | |
| "loss": 4.744, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00039712577881131754, | |
| "loss": 4.7369, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00039708688477304655, | |
| "loss": 4.7375, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0003970477312731783, | |
| "loss": 4.7414, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0003970083183632576, | |
| "loss": 4.7389, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0003969686858969712, | |
| "loss": 4.7378, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0003969287545822263, | |
| "loss": 4.7372, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00039688860433410763, | |
| "loss": 4.7393, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00039684815482460387, | |
| "loss": 4.7315, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00039680748700586993, | |
| "loss": 4.7371, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00039676651951439873, | |
| "loss": 4.7353, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0003967253343389894, | |
| "loss": 4.7315, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00039668384908106706, | |
| "loss": 4.7358, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00039664210489213713, | |
| "loss": 4.7339, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00039660010182715526, | |
| "loss": 4.737, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.000396557882332566, | |
| "loss": 4.7261, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0003965153619404471, | |
| "loss": 4.7342, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0003964726257474391, | |
| "loss": 4.7293, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0003964295882518688, | |
| "loss": 4.7301, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00039638633558526285, | |
| "loss": 4.7316, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00039634278121264703, | |
| "loss": 4.7295, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0003962990122999811, | |
| "loss": 4.7332, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0003962549412794449, | |
| "loss": 4.732, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0003962106563509727, | |
| "loss": 4.7321, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00039616606891435896, | |
| "loss": 4.7276, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00039612122322838677, | |
| "loss": 4.7245, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0003960761645849172, | |
| "loss": 4.7286, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0003960308028357847, | |
| "loss": 4.7239, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0003840903997775841, | |
| "loss": 4.6145, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0003839104648613638, | |
| "loss": 4.5905, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0003837297421617577, | |
| "loss": 4.5891, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.000383548053178735, | |
| "loss": 4.5817, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0003833652155473882, | |
| "loss": 4.5765, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00038318141161813824, | |
| "loss": 4.574, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0003829966423595951, | |
| "loss": 4.5725, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00038281109496044006, | |
| "loss": 4.5666, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00038262439893236937, | |
| "loss": 4.5631, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00038243692864915963, | |
| "loss": 4.5591, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0003822483097830243, | |
| "loss": 4.5552, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00038205873050485524, | |
| "loss": 4.5543, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0003818683828312813, | |
| "loss": 4.5512, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00038167688668914063, | |
| "loss": 4.5484, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0003814844331462512, | |
| "loss": 4.5501, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0003812912171041104, | |
| "loss": 4.5431, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0003810968527621949, | |
| "loss": 4.5418, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00038090153407619305, | |
| "loss": 4.5379, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00038070526207539536, | |
| "loss": 4.538, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0003805082354937156, | |
| "loss": 4.5377, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0003803100609220069, | |
| "loss": 4.5354, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0003801111357514916, | |
| "loss": 4.5321, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.000379911062782051, | |
| "loss": 4.5327, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00037971004171739956, | |
| "loss": 4.5342, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00037950827605766894, | |
| "loss": 4.527, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00037930536293104657, | |
| "loss": 4.5297, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00037910170923078203, | |
| "loss": 4.5252, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00037889690831515295, | |
| "loss": 4.5228, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0003786911646487036, | |
| "loss": 4.5211, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00037848447931566176, | |
| "loss": 4.521, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00037827685340521773, | |
| "loss": 4.5257, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0003780684970458185, | |
| "loss": 4.5204, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0003778589942057952, | |
| "loss": 4.5209, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0003776487649924752, | |
| "loss": 4.5167, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0003774373896346034, | |
| "loss": 4.5142, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00037722507921728195, | |
| "loss": 4.5166, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0003770122622793867, | |
| "loss": 4.5127, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00037679808696909655, | |
| "loss": 4.5163, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00037658297996835357, | |
| "loss": 4.513, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0003763669424107285, | |
| "loss": 4.5078, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00037614997543469595, | |
| "loss": 4.5114, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00037593208018362834, | |
| "loss": 4.5097, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00037571369637505247, | |
| "loss": 4.5072, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00037549394987438647, | |
| "loss": 4.5084, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00037527327855580843, | |
| "loss": 4.5071, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0003750519056381631, | |
| "loss": 4.5061, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00037482938909921175, | |
| "loss": 4.5075, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0003746059512444505, | |
| "loss": 4.5079, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0003743815932513518, | |
| "loss": 4.5071, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00037415631630223755, | |
| "loss": 4.5033, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00037393012158427186, | |
| "loss": 4.505, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00037370323785818266, | |
| "loss": 4.5032, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00037347521209812743, | |
| "loss": 4.5017, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0003732465015546745, | |
| "loss": 4.502, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00037301664955431804, | |
| "loss": 4.4998, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0003727858857909254, | |
| "loss": 4.4994, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0003725544436092979, | |
| "loss": 4.4985, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.000372321860881582, | |
| "loss": 4.499, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00037208837005222694, | |
| "loss": 4.4919, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0003718542072019544, | |
| "loss": 4.4965, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00037161890477046666, | |
| "loss": 4.4972, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00037138293459993847, | |
| "loss": 4.4988, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00039974046056824423, | |
| "loss": 5.0173, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0003997382653105697, | |
| "loss": 5.1254, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0003997360630230883, | |
| "loss": 5.137, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0003997338515152591, | |
| "loss": 5.1396, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0003997316285596137, | |
| "loss": 5.1539, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00039972939860216607, | |
| "loss": 5.1836, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00039972715717864, | |
| "loss": 5.1907, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00039972490651670964, | |
| "loss": 5.2177, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00039972264888099373, | |
| "loss": 5.2218, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0003955398710520662, | |
| "loss": 4.9553, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.000395503657852559, | |
| "loss": 4.8679, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00039546729990487664, | |
| "loss": 4.8395, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0003954307972359379, | |
| "loss": 4.8217, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.000395394149872769, | |
| "loss": 4.8152, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0003953573947067854, | |
| "loss": 4.8026, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0003953204581812889, | |
| "loss": 4.8017, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0003952834141966186, | |
| "loss": 4.7977, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00039524618861807426, | |
| "loss": 4.7963, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0003952088184819814, | |
| "loss": 4.79, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0003951713414028577, | |
| "loss": 4.7877, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0003951336823792677, | |
| "loss": 4.7854, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0003950959167570807, | |
| "loss": 4.7945, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00039505796895741114, | |
| "loss": 4.7845, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00039501991490389356, | |
| "loss": 4.7821, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.000394981678440416, | |
| "loss": 4.7798, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00039494333606815397, | |
| "loss": 4.7892, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00039490481105399416, | |
| "loss": 4.7885, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.000394866141735037, | |
| "loss": 4.7838, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0003948273670255641, | |
| "loss": 4.7812, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00039478840932724265, | |
| "loss": 4.7749, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00039474934658425046, | |
| "loss": 4.7823, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00039471010062182423, | |
| "loss": 4.7809, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00039467074996088307, | |
| "loss": 4.785, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0003946312158504645, | |
| "loss": 4.7753, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00039459157738799654, | |
| "loss": 4.775, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0003945517552465506, | |
| "loss": 4.7755, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0003945118290998296, | |
| "loss": 4.7849, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00039447175922715307, | |
| "loss": 4.7806, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00039443150533232405, | |
| "loss": 4.7791, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00039439110748312647, | |
| "loss": 4.7798, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00039435056570947044, | |
| "loss": 4.7794, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0003943099207989059, | |
| "loss": 4.7821, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0003942690914103384, | |
| "loss": 4.7815, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00039422811818765134, | |
| "loss": 4.7713, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00039418704235002724, | |
| "loss": 4.7707, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00039414582302643454, | |
| "loss": 4.7764, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0003941044187712859, | |
| "loss": 4.7864, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00039406287080393925, | |
| "loss": 4.774, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0003940211791551559, | |
| "loss": 4.7698, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00039397938576284634, | |
| "loss": 4.7754, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00039393740698750394, | |
| "loss": 4.7764, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0003938952846236165, | |
| "loss": 4.7764, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00039385301870237103, | |
| "loss": 4.7747, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00039381065173618853, | |
| "loss": 4.7784, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00039376809893769117, | |
| "loss": 4.7792, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00039372544544391313, | |
| "loss": 4.7726, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0003936826488052433, | |
| "loss": 4.7736, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0003936396659988803, | |
| "loss": 4.7759, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00039359653982441555, | |
| "loss": 4.7719, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00039355327031377916, | |
| "loss": 4.7775, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0003935099009833917, | |
| "loss": 4.7814, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00039346634503988233, | |
| "loss": 4.7722, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0003934226458565957, | |
| "loss": 4.7745, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.000393378847379798, | |
| "loss": 4.7748, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00039333486195728426, | |
| "loss": 4.7774, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00039329077759239523, | |
| "loss": 4.7777, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.000393246506060789, | |
| "loss": 4.7707, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0003932021359382358, | |
| "loss": 4.7792, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.000393157578428518, | |
| "loss": 4.7711, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0003931128779076294, | |
| "loss": 4.7712, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0003930680793235711, | |
| "loss": 4.7732, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00039302309302266194, | |
| "loss": 4.7753, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00039297800901073876, | |
| "loss": 4.7747, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0003929327370629047, | |
| "loss": 4.7756, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0003928873222703692, | |
| "loss": 4.7733, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0003928418102956833, | |
| "loss": 4.7714, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0003927961100574846, | |
| "loss": 4.773, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0003927503129900122, | |
| "loss": 4.7742, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0003927043274413583, | |
| "loss": 4.7757, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0003926582454165936, | |
| "loss": 4.7738, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00039261202103549754, | |
| "loss": 4.7675, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0003925656078478171, | |
| "loss": 4.7782, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00039251905208725256, | |
| "loss": 4.7703, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00039247235378827314, | |
| "loss": 4.7726, | |
| "step": 321000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0003924255598974257, | |
| "loss": 4.7679, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00039237857676789823, | |
| "loss": 4.767, | |
| "step": 323000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0003923314984006603, | |
| "loss": 4.7621, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0003922842305795883, | |
| "loss": 4.7717, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00039223686787524505, | |
| "loss": 4.7682, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0003921893155024742, | |
| "loss": 4.783, | |
| "step": 327000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0003921416686011523, | |
| "loss": 4.7705, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0003920938318173703, | |
| "loss": 4.7678, | |
| "step": 329000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0003920459008600368, | |
| "loss": 4.7697, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0003919977798067727, | |
| "loss": 4.7749, | |
| "step": 331000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00039194956493523547, | |
| "loss": 4.7797, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00039190115975485935, | |
| "loss": 4.7678, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0003918526124935473, | |
| "loss": 4.7674, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0003918039719474887, | |
| "loss": 4.7711, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0003917551407742319, | |
| "loss": 4.7686, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00039170621667219887, | |
| "loss": 4.7708, | |
| "step": 337000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00039165710173146836, | |
| "loss": 4.7681, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0003916078942182069, | |
| "loss": 4.7697, | |
| "step": 339000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0003915584956553133, | |
| "loss": 4.7665, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00039150900487640804, | |
| "loss": 4.7757, | |
| "step": 341000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00039145932283750107, | |
| "loss": 4.7653, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00038084013217180266, | |
| "loss": 4.7047, | |
| "step": 343000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0003807297139737221, | |
| "loss": 4.6748, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0003806191054905468, | |
| "loss": 4.6694, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00038050808546821253, | |
| "loss": 4.6753, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0003803967646603707, | |
| "loss": 4.6691, | |
| "step": 347000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0003802851432525181, | |
| "loss": 4.6667, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0003801733335024691, | |
| "loss": 4.6662, | |
| "step": 349000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0003800611117532231, | |
| "loss": 4.6614, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.000379948589963274, | |
| "loss": 4.6566, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00037983588129147694, | |
| "loss": 4.6586, | |
| "step": 352000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0003797227602826864, | |
| "loss": 4.6598, | |
| "step": 353000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00037960933979699685, | |
| "loss": 4.6561, | |
| "step": 354000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0003794956200234039, | |
| "loss": 4.649, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00037938171531961043, | |
| "loss": 4.6508, | |
| "step": 356000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0003792673978380055, | |
| "loss": 4.6515, | |
| "step": 357000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00037915301116867755, | |
| "loss": 4.6502, | |
| "step": 358000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00037903821210187236, | |
| "loss": 4.6446, | |
| "step": 359000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00037892299993410043, | |
| "loss": 4.6457, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0003788074896220918, | |
| "loss": 4.6399, | |
| "step": 361000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0003786916813583244, | |
| "loss": 4.6416, | |
| "step": 362000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0003785755753357728, | |
| "loss": 4.6394, | |
| "step": 363000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00037845917174790744, | |
| "loss": 4.644, | |
| "step": 364000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0003783425876381264, | |
| "loss": 4.6455, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0003782255897991082, | |
| "loss": 4.6427, | |
| "step": 366000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00037810841242106534, | |
| "loss": 4.6383, | |
| "step": 367000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0003779908211099408, | |
| "loss": 4.6398, | |
| "step": 368000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0003778729332078945, | |
| "loss": 4.639, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00037775474891136603, | |
| "loss": 4.642, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0003776363870456683, | |
| "loss": 4.6378, | |
| "step": 371000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00037751761084737167, | |
| "loss": 4.6251, | |
| "step": 372000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0003773986580663642, | |
| "loss": 4.6378, | |
| "step": 373000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0003772792907571875, | |
| "loss": 4.638, | |
| "step": 374000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0003771598676628421, | |
| "loss": 4.6329, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0003770399103327158, | |
| "loss": 4.6331, | |
| "step": 376000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.000376919657996196, | |
| "loss": 4.6307, | |
| "step": 377000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0003767992315479937, | |
| "loss": 4.6366, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0003766783900948219, | |
| "loss": 4.6312, | |
| "step": 379000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0003765572542376675, | |
| "loss": 4.6322, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00037643582417838255, | |
| "loss": 4.6272, | |
| "step": 381000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0003763142219901536, | |
| "loss": 4.6261, | |
| "step": 382000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0003761922044278193, | |
| "loss": 4.6332, | |
| "step": 383000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0003760698932716468, | |
| "loss": 4.6285, | |
| "step": 384000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0003759472887254464, | |
| "loss": 4.6315, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00037582451403762754, | |
| "loss": 4.6252, | |
| "step": 386000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00037570132361763626, | |
| "loss": 4.6238, | |
| "step": 387000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00037557808768022013, | |
| "loss": 4.6309, | |
| "step": 388000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0003754543124991863, | |
| "loss": 4.6227, | |
| "step": 389000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0003753302449538835, | |
| "loss": 4.6264, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0003752058852510489, | |
| "loss": 4.6297, | |
| "step": 391000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00037508135839531953, | |
| "loss": 4.6229, | |
| "step": 392000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0003749564152912182, | |
| "loss": 4.6277, | |
| "step": 393000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0003748313060326983, | |
| "loss": 4.6258, | |
| "step": 394000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0003747057803592816, | |
| "loss": 4.6298, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0003745799635688954, | |
| "loss": 4.6275, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0003744538558711915, | |
| "loss": 4.6305, | |
| "step": 397000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00037432758401983454, | |
| "loss": 4.6254, | |
| "step": 398000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0003742008954287709, | |
| "loss": 4.6232, | |
| "step": 399000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00037407404368583003, | |
| "loss": 4.6243, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0003739467750449806, | |
| "loss": 4.6271, | |
| "step": 401000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00037381921655191264, | |
| "loss": 4.625, | |
| "step": 402000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0003736914964119172, | |
| "loss": 4.6207, | |
| "step": 403000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0003735633591418774, | |
| "loss": 4.6222, | |
| "step": 404000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0003734349326585155, | |
| "loss": 4.6274, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00037330621717583185, | |
| "loss": 4.6215, | |
| "step": 406000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00037317734205675264, | |
| "loss": 4.6239, | |
| "step": 407000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00037304817894443345, | |
| "loss": 4.6213, | |
| "step": 408000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0003729185983290953, | |
| "loss": 4.6217, | |
| "step": 409000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00037278872957481737, | |
| "loss": 4.6203, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00037265870319842543, | |
| "loss": 4.6233, | |
| "step": 411000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0003725282591035563, | |
| "loss": 4.6189, | |
| "step": 412000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0003723976583952915, | |
| "loss": 4.6208, | |
| "step": 413000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0003722666398284116, | |
| "loss": 4.6228, | |
| "step": 414000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.000372135334208968, | |
| "loss": 4.6152, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00037200374175575874, | |
| "loss": 4.6127, | |
| "step": 416000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00037187199471021856, | |
| "loss": 4.6182, | |
| "step": 417000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0003717399618422258, | |
| "loss": 4.6196, | |
| "step": 418000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0003716075107774151, | |
| "loss": 4.6225, | |
| "step": 419000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00037147477375836516, | |
| "loss": 4.6181, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0003713418841716614, | |
| "loss": 4.6207, | |
| "step": 421000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00037120857619355976, | |
| "loss": 4.6168, | |
| "step": 422000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00037107511666167, | |
| "loss": 4.6148, | |
| "step": 423000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0003709412386121666, | |
| "loss": 4.6241, | |
| "step": 424000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00037080707571865136, | |
| "loss": 4.6167, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.000370672762794291, | |
| "loss": 4.618, | |
| "step": 426000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.0003705380311681886, | |
| "loss": 4.6185, | |
| "step": 427000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00037040301536994983, | |
| "loss": 4.6159, | |
| "step": 428000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00037026771562455524, | |
| "loss": 4.6172, | |
| "step": 429000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.0003701322678825694, | |
| "loss": 4.6185, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0003699964012030795, | |
| "loss": 4.6142, | |
| "step": 431000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0003698605238364365, | |
| "loss": 4.619, | |
| "step": 432000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0003697240914104684, | |
| "loss": 4.6125, | |
| "step": 433000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0003695873761686538, | |
| "loss": 4.613, | |
| "step": 434000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00036945037833880495, | |
| "loss": 4.6193, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00036931337299122744, | |
| "loss": 4.6195, | |
| "step": 436000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00036917581123466377, | |
| "loss": 4.6155, | |
| "step": 437000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0003690379675758677, | |
| "loss": 4.6124, | |
| "step": 438000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0003688998422445319, | |
| "loss": 4.6118, | |
| "step": 439000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0003687617125650919, | |
| "loss": 4.6118, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00036862302514182444, | |
| "loss": 4.6115, | |
| "step": 441000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0003684841958461244, | |
| "loss": 4.6145, | |
| "step": 442000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0003683449469728375, | |
| "loss": 4.6107, | |
| "step": 443000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00036820541758180987, | |
| "loss": 4.6125, | |
| "step": 444000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00036806574785514423, | |
| "loss": 4.6076, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.0003679256584065426, | |
| "loss": 4.6135, | |
| "step": 446000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00036778528913887205, | |
| "loss": 4.611, | |
| "step": 447000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.0003676447810744613, | |
| "loss": 4.6169, | |
| "step": 448000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00036750385315005585, | |
| "loss": 4.6124, | |
| "step": 449000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.0003673626461094468, | |
| "loss": 4.6091, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00036722130181307566, | |
| "loss": 4.6084, | |
| "step": 451000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0003670795375249432, | |
| "loss": 4.6093, | |
| "step": 452000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0003669376370093399, | |
| "loss": 4.6098, | |
| "step": 453000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00036679531641764155, | |
| "loss": 4.6088, | |
| "step": 454000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00036665271789039375, | |
| "loss": 4.6135, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00036650984166521224, | |
| "loss": 4.6111, | |
| "step": 456000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00036636697456429214, | |
| "loss": 4.6087, | |
| "step": 457000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00036622354421214545, | |
| "loss": 4.6105, | |
| "step": 458000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0003660798368772088, | |
| "loss": 4.6142, | |
| "step": 459000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00036593599692117735, | |
| "loss": 4.6036, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00036579173661589563, | |
| "loss": 4.6129, | |
| "step": 461000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00036564720004735664, | |
| "loss": 4.6066, | |
| "step": 462000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00036550253240678936, | |
| "loss": 4.6065, | |
| "step": 463000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.0003653574443103918, | |
| "loss": 4.6065, | |
| "step": 464000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.0003652122261755973, | |
| "loss": 4.6055, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00036506658751743075, | |
| "loss": 4.6077, | |
| "step": 466000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0003649206738043425, | |
| "loss": 4.6113, | |
| "step": 467000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00036477463160518477, | |
| "loss": 4.6056, | |
| "step": 468000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00036462816878657725, | |
| "loss": 4.608, | |
| "step": 469000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.000364481431643597, | |
| "loss": 4.6023, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0003643345675688004, | |
| "loss": 4.6027, | |
| "step": 471000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00036418728278478005, | |
| "loss": 4.6068, | |
| "step": 472000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00036403972441104724, | |
| "loss": 4.6072, | |
| "step": 473000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0003638920406616534, | |
| "loss": 4.6038, | |
| "step": 474000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00036374393611956704, | |
| "loss": 4.6014, | |
| "step": 475000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00036359555872652883, | |
| "loss": 4.6013, | |
| "step": 476000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00036344705751586385, | |
| "loss": 4.606, | |
| "step": 477000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 0.00024019467959966674, | |
| "loss": 4.4837, | |
| "step": 478000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 0.00023963222037118084, | |
| "loss": 4.4223, | |
| "step": 479000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 0.00023906943468937218, | |
| "loss": 4.39, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 0.00023850689045664867, | |
| "loss": 4.3772, | |
| "step": 481000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 0.00023794346609281965, | |
| "loss": 4.3726, | |
| "step": 482000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 0.00023737972918605284, | |
| "loss": 4.3631, | |
| "step": 483000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 0.00023681624857694363, | |
| "loss": 4.3497, | |
| "step": 484000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 0.00023625190081838816, | |
| "loss": 4.3482, | |
| "step": 485000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 0.00023568781924440977, | |
| "loss": 4.3443, | |
| "step": 486000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 0.0002351228792097228, | |
| "loss": 4.3383, | |
| "step": 487000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 0.000234558215237771, | |
| "loss": 4.3393, | |
| "step": 488000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 0.0002339927015221048, | |
| "loss": 4.3349, | |
| "step": 489000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.00023342690780622, | |
| "loss": 4.3287, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.00023286083875059848, | |
| "loss": 4.3263, | |
| "step": 491000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 0.0002322950654913731, | |
| "loss": 4.3202, | |
| "step": 492000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 0.0002317284600104378, | |
| "loss": 4.3198, | |
| "step": 493000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 0.00023116216017581755, | |
| "loss": 4.3163, | |
| "step": 494000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 0.00023059503691953928, | |
| "loss": 4.3163, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 0.00023002766164983935, | |
| "loss": 4.3123, | |
| "step": 496000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 0.00022946060678482666, | |
| "loss": 4.3091, | |
| "step": 497000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 0.00022889274175117623, | |
| "loss": 4.3067, | |
| "step": 498000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 0.00022832463872602635, | |
| "loss": 4.3056, | |
| "step": 499000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 0.00022775687084019932, | |
| "loss": 4.3042, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 0.00022718887477616112, | |
| "loss": 4.306, | |
| "step": 501000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 0.00022662008630440305, | |
| "loss": 4.3007, | |
| "step": 502000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 0.0002260510785611647, | |
| "loss": 4.2996, | |
| "step": 503000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 0.00022548185623340192, | |
| "loss": 4.2993, | |
| "step": 504000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 0.00022491299354534364, | |
| "loss": 4.2962, | |
| "step": 505000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 0.0002243433563192932, | |
| "loss": 4.2954, | |
| "step": 506000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 0.00022377408851168427, | |
| "loss": 4.2964, | |
| "step": 507000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 0.00022320405513710757, | |
| "loss": 4.2958, | |
| "step": 508000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 0.00022263440094754997, | |
| "loss": 4.2934, | |
| "step": 509000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 0.0002220639901872479, | |
| "loss": 4.2935, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 0.00022149396836606137, | |
| "loss": 4.2906, | |
| "step": 511000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 0.00022092319899525643, | |
| "loss": 4.2897, | |
| "step": 512000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 0.00022035282830486165, | |
| "loss": 4.2916, | |
| "step": 513000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 0.00021978171911058022, | |
| "loss": 4.2891, | |
| "step": 514000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 0.00021921101832488073, | |
| "loss": 4.2867, | |
| "step": 515000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 0.00021863958810533452, | |
| "loss": 4.288, | |
| "step": 516000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 0.00021806800435022003, | |
| "loss": 4.2857, | |
| "step": 517000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 0.00021749684357306648, | |
| "loss": 4.285, | |
| "step": 518000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 0.00021692496701433082, | |
| "loss": 4.2832, | |
| "step": 519000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 0.00021635352312751783, | |
| "loss": 4.2824, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 0.0002157819448048862, | |
| "loss": 4.2806, | |
| "step": 521000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 0.0002152096644013863, | |
| "loss": 4.2772, | |
| "step": 522000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 0.00021463725871483544, | |
| "loss": 4.2798, | |
| "step": 523000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 0.0002140647324601787, | |
| "loss": 4.2798, | |
| "step": 524000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 0.00021349266305175916, | |
| "loss": 4.2802, | |
| "step": 525000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 0.00021291990991843793, | |
| "loss": 4.2786, | |
| "step": 526000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 0.0002123476232740738, | |
| "loss": 4.2791, | |
| "step": 527000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 0.00021177466211441055, | |
| "loss": 4.274, | |
| "step": 528000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 0.0002112021770710695, | |
| "loss": 4.2765, | |
| "step": 529000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 0.00021062959993907988, | |
| "loss": 4.2751, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 0.00021005636214541413, | |
| "loss": 4.2751, | |
| "step": 531000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 0.00020948304151680226, | |
| "loss": 4.2744, | |
| "step": 532000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 0.00020891021621191204, | |
| "loss": 4.273, | |
| "step": 533000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 0.00020833674415252564, | |
| "loss": 4.2769, | |
| "step": 534000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 0.00020776320342280467, | |
| "loss": 4.2695, | |
| "step": 535000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 0.00020718959874704363, | |
| "loss": 4.2689, | |
| "step": 536000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 0.00020661650854196894, | |
| "loss": 4.2671, | |
| "step": 537000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 0.0002060427902012143, | |
| "loss": 4.2677, | |
| "step": 538000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 0.0002054701696683469, | |
| "loss": 4.2713, | |
| "step": 539000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 0.00020489635658938387, | |
| "loss": 4.2659, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 0.0002043225031787951, | |
| "loss": 4.2666, | |
| "step": 541000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 0.00020374861416345058, | |
| "loss": 4.2609, | |
| "step": 542000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 0.0002031746942705136, | |
| "loss": 4.2631, | |
| "step": 543000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 0.0002026013221849334, | |
| "loss": 4.2656, | |
| "step": 544000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 0.0002020273547383406, | |
| "loss": 4.2688, | |
| "step": 545000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 0.00020145394458320146, | |
| "loss": 4.2592, | |
| "step": 546000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 0.00020087994847524482, | |
| "loss": 4.2617, | |
| "step": 547000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 0.00020030651912449513, | |
| "loss": 4.2613, | |
| "step": 548000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 0.00019973251324840986, | |
| "loss": 4.2557, | |
| "step": 549000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 0.0001991590835766299, | |
| "loss": 4.2604, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 0.00019858508682597277, | |
| "loss": 4.2614, | |
| "step": 551000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 0.00019801167570775345, | |
| "loss": 4.2587, | |
| "step": 552000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 0.0001974377069757808, | |
| "loss": 4.2567, | |
| "step": 553000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 0.000196863759349592, | |
| "loss": 4.2542, | |
| "step": 554000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 0.0001962904114641484, | |
| "loss": 4.2528, | |
| "step": 555000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 0.00019571652019933017, | |
| "loss": 4.2529, | |
| "step": 556000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 0.00019514323805461362, | |
| "loss": 4.2504, | |
| "step": 557000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 0.00019456999583540802, | |
| "loss": 4.2557, | |
| "step": 558000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 0.00019399622450669583, | |
| "loss": 4.2527, | |
| "step": 559000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 0.00019342250263149486, | |
| "loss": 4.2493, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 0.0001928488349355918, | |
| "loss": 4.2533, | |
| "step": 561000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 0.00019227579972212256, | |
| "loss": 4.251, | |
| "step": 562000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 0.00019170225449436132, | |
| "loss": 4.2442, | |
| "step": 563000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 0.00019112935105686604, | |
| "loss": 4.2508, | |
| "step": 564000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 0.00019055652054145262, | |
| "loss": 4.2482, | |
| "step": 565000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 0.00018998319437138936, | |
| "loss": 4.2453, | |
| "step": 566000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 0.0001894099507104425, | |
| "loss": 4.2467, | |
| "step": 567000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 0.00018883679428045936, | |
| "loss": 4.2429, | |
| "step": 568000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 0.00018826430281954561, | |
| "loss": 4.2436, | |
| "step": 569000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 0.00018769190783313742, | |
| "loss": 4.2462, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 0.00018711904121225677, | |
| "loss": 4.2429, | |
| "step": 571000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 0.000186546280692719, | |
| "loss": 4.2415, | |
| "step": 572000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 0.0001859736309923917, | |
| "loss": 4.242, | |
| "step": 573000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 0.00018540166930311399, | |
| "loss": 4.2415, | |
| "step": 574000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 0.00018482925526851332, | |
| "loss": 4.2394, | |
| "step": 575000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 0.00018425696619637965, | |
| "loss": 4.2393, | |
| "step": 576000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 0.00018368537889375085, | |
| "loss": 4.2374, | |
| "step": 577000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 0.00018311335375069304, | |
| "loss": 4.2376, | |
| "step": 578000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 0.00018254203951910075, | |
| "loss": 4.2361, | |
| "step": 579000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 0.00018197029713347917, | |
| "loss": 4.2363, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 0.000181399274777884, | |
| "loss": 4.2322, | |
| "step": 581000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 0.00018082783396875207, | |
| "loss": 4.235, | |
| "step": 582000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 0.00018025655108206925, | |
| "loss": 4.2327, | |
| "step": 583000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 0.0001796854308235321, | |
| "loss": 4.2323, | |
| "step": 584000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 0.0001791150487652753, | |
| "loss": 4.2297, | |
| "step": 585000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 0.00017854426770033718, | |
| "loss": 4.2339, | |
| "step": 586000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 0.00017797423388223084, | |
| "loss": 4.2315, | |
| "step": 587000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 0.00017740381079830306, | |
| "loss": 4.2289, | |
| "step": 588000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 0.0001768341439831626, | |
| "loss": 4.2285, | |
| "step": 589000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 0.00017626409765587338, | |
| "loss": 4.2273, | |
| "step": 590000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 0.000175694246842843, | |
| "loss": 4.2272, | |
| "step": 591000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 0.00017512459623797167, | |
| "loss": 4.2267, | |
| "step": 592000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 0.00017455571987530613, | |
| "loss": 4.2242, | |
| "step": 593000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 0.00017398648354988546, | |
| "loss": 4.2238, | |
| "step": 594000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 0.00017341803041304732, | |
| "loss": 4.2245, | |
| "step": 595000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 0.00017284922710364303, | |
| "loss": 4.2219, | |
| "step": 596000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 0.00017228121590341918, | |
| "loss": 4.2215, | |
| "step": 597000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 0.0001717128643323442, | |
| "loss": 4.2196, | |
| "step": 598000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 0.00017114474576434977, | |
| "loss": 4.2186, | |
| "step": 599000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 0.00017057686487906743, | |
| "loss": 4.2218, | |
| "step": 600000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 0.00017000922635417116, | |
| "loss": 4.2175, | |
| "step": 601000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 0.00016944183486533842, | |
| "loss": 4.2174, | |
| "step": 602000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 0.00016887582911145858, | |
| "loss": 4.2208, | |
| "step": 603000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 0.00016830894519618436, | |
| "loss": 4.2176, | |
| "step": 604000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 0.00016774232232230643, | |
| "loss": 4.2131, | |
| "step": 605000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 0.00016717596515713635, | |
| "loss": 4.2148, | |
| "step": 606000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 0.00016661044431598456, | |
| "loss": 4.2163, | |
| "step": 607000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 0.0001660446322840068, | |
| "loss": 4.2121, | |
| "step": 608000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 0.0001654796653358085, | |
| "loss": 4.2126, | |
| "step": 609000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 0.0001649144170608772, | |
| "loss": 4.2111, | |
| "step": 610000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 0.00016435002260167044, | |
| "loss": 4.2093, | |
| "step": 611000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 0.0001637853566890836, | |
| "loss": 4.2104, | |
| "step": 612000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 0.00016322155329606282, | |
| "loss": 4.2104, | |
| "step": 613000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 0.00016265748833194975, | |
| "loss": 4.2095, | |
| "step": 614000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 0.00016209373096067142, | |
| "loss": 4.2061, | |
| "step": 615000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 0.00016153141240150847, | |
| "loss": 4.2059, | |
| "step": 616000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 0.0001609682835060673, | |
| "loss": 4.2093, | |
| "step": 617000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 0.00016040547611755718, | |
| "loss": 4.2025, | |
| "step": 618000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 0.00015984299487186134, | |
| "loss": 4.2069, | |
| "step": 619000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 0.00015928140638588216, | |
| "loss": 4.2031, | |
| "step": 620000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 0.00015872015263128903, | |
| "loss": 4.2021, | |
| "step": 621000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 0.00015815867691759442, | |
| "loss": 4.2014, | |
| "step": 622000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 0.00015759754585375357, | |
| "loss": 4.2014, | |
| "step": 623000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 0.00015703676406184148, | |
| "loss": 4.2015, | |
| "step": 624000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 0.0001564768964106519, | |
| "loss": 4.2018, | |
| "step": 625000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 0.000155917386545611, | |
| "loss": 4.2003, | |
| "step": 626000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 0.00015535767954213264, | |
| "loss": 4.1976, | |
| "step": 627000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 0.00015479834026051583, | |
| "loss": 4.1972, | |
| "step": 628000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 0.00015423937330807675, | |
| "loss": 4.1957, | |
| "step": 629000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 0.00015368134168927352, | |
| "loss": 4.1951, | |
| "step": 630000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 0.00015312313282100077, | |
| "loss": 4.1952, | |
| "step": 631000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 0.00015256586770904422, | |
| "loss": 4.1928, | |
| "step": 632000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 0.00015200843529853173, | |
| "loss": 4.1941, | |
| "step": 633000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 0.00015145195503595184, | |
| "loss": 4.1938, | |
| "step": 634000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 0.00015089531743123636, | |
| "loss": 4.1933, | |
| "step": 635000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 0.00015033964033472967, | |
| "loss": 4.1919, | |
| "step": 636000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 0.00014978381585768676, | |
| "loss": 4.191, | |
| "step": 637000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 0.0001492289602175133, | |
| "loss": 4.1907, | |
| "step": 638000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 0.00014867396716325404, | |
| "loss": 4.1906, | |
| "step": 639000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 0.00014811995124263547, | |
| "loss": 4.19, | |
| "step": 640000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 0.00014756580787890456, | |
| "loss": 4.1854, | |
| "step": 641000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 0.0001470126499134229, | |
| "loss": 4.1862, | |
| "step": 642000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 0.0001464593744799972, | |
| "loss": 4.1804, | |
| "step": 643000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 0.00014590709267699477, | |
| "loss": 4.1812, | |
| "step": 644000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 0.00014535470338508303, | |
| "loss": 4.1811, | |
| "step": 645000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 0.0001448033159230627, | |
| "loss": 4.1812, | |
| "step": 646000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 0.00014425238221106002, | |
| "loss": 4.1827, | |
| "step": 647000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 0.00014370135598273356, | |
| "loss": 4.1792, | |
| "step": 648000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 0.00014315079349020695, | |
| "loss": 4.1829, | |
| "step": 649000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 0.00014260069926850117, | |
| "loss": 4.1798, | |
| "step": 650000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 0.00014205162723252818, | |
| "loss": 4.1813, | |
| "step": 651000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 0.00014150248266247203, | |
| "loss": 4.1771, | |
| "step": 652000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 0.0001409543683610207, | |
| "loss": 4.1744, | |
| "step": 653000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 0.00014040673945025616, | |
| "loss": 4.1791, | |
| "step": 654000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 0.00013985905299225343, | |
| "loss": 4.1795, | |
| "step": 655000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 0.00013931186191936434, | |
| "loss": 4.1764, | |
| "step": 656000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0001387651707388392, | |
| "loss": 4.1717, | |
| "step": 657000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0001382195298871527, | |
| "loss": 4.1585, | |
| "step": 658000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 0.00013767385148545907, | |
| "loss": 4.1503, | |
| "step": 659000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 0.0001371292313756203, | |
| "loss": 4.1517, | |
| "step": 660000 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 0.00013658458371390849, | |
| "loss": 4.1508, | |
| "step": 661000 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 0.00013604100227223385, | |
| "loss": 4.1545, | |
| "step": 662000 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 0.00013549740327772723, | |
| "loss": 4.1505, | |
| "step": 663000 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 0.00013495542119768334, | |
| "loss": 4.1496, | |
| "step": 664000 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 0.00013441288822507396, | |
| "loss": 4.1504, | |
| "step": 665000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 0.0001338708954980116, | |
| "loss": 4.1514, | |
| "step": 666000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 0.0001333299886553773, | |
| "loss": 4.1504, | |
| "step": 667000 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 0.00013278908925682, | |
| "loss": 4.1553, | |
| "step": 668000 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 0.0001322487434791535, | |
| "loss": 4.154, | |
| "step": 669000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 0.00013170895577324293, | |
| "loss": 4.1501, | |
| "step": 670000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 0.00013117026952808839, | |
| "loss": 4.1481, | |
| "step": 671000 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 0.00013063161073068494, | |
| "loss": 4.153, | |
| "step": 672000 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 0.00013009406112599048, | |
| "loss": 4.148, | |
| "step": 673000 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 0.00012955708619025508, | |
| "loss": 4.1457, | |
| "step": 674000 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 0.00012902015369654687, | |
| "loss": 4.1496, | |
| "step": 675000 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 0.00012848434192302686, | |
| "loss": 4.1481, | |
| "step": 676000 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 0.00012794858258770753, | |
| "loss": 4.148, | |
| "step": 677000 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 0.00012741341674486485, | |
| "loss": 4.1484, | |
| "step": 678000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 0.00012687884880269694, | |
| "loss": 4.1446, | |
| "step": 679000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 0.00012634541682779958, | |
| "loss": 4.1428, | |
| "step": 680000 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 0.00012581205728294073, | |
| "loss": 4.1455, | |
| "step": 681000 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 0.00012527984127101713, | |
| "loss": 4.1422, | |
| "step": 682000 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 0.0001247477076791393, | |
| "loss": 4.1427, | |
| "step": 683000 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 0.00012421672514822168, | |
| "loss": 4.1434, | |
| "step": 684000 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 0.00012368583502464424, | |
| "loss": 4.1414, | |
| "step": 685000 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 0.00012315610345216445, | |
| "loss": 4.1437, | |
| "step": 686000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 0.00012262647427127763, | |
| "loss": 4.1419, | |
| "step": 687000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 0.0001220980110934919, | |
| "loss": 4.1379, | |
| "step": 688000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 0.00012156966028818173, | |
| "loss": 4.1382, | |
| "step": 689000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 0.00012104248289959676, | |
| "loss": 4.1365, | |
| "step": 690000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 0.00012051542786067112, | |
| "loss": 4.1394, | |
| "step": 691000 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 0.00011998955361347148, | |
| "loss": 4.1366, | |
| "step": 692000 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 0.00011946381168908787, | |
| "loss": 4.1347, | |
| "step": 693000 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 0.00011893873314682198, | |
| "loss": 4.1357, | |
| "step": 694000 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 0.00011841432231178195, | |
| "loss": 4.1337, | |
| "step": 695000 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 0.0001178911069052703, | |
| "loss": 4.1347, | |
| "step": 696000 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 0.00011736804375947676, | |
| "loss": 4.1351, | |
| "step": 697000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 0.00011684618329987129, | |
| "loss": 4.1297, | |
| "step": 698000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 0.00011632448506008744, | |
| "loss": 4.1351, | |
| "step": 699000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 0.00011580399672456457, | |
| "loss": 4.1329, | |
| "step": 700000 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 0.00011528368056262728, | |
| "loss": 4.1313, | |
| "step": 701000 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 0.00011476458148319966, | |
| "loss": 4.1265, | |
| "step": 702000 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 0.00011424566452545455, | |
| "loss": 4.129, | |
| "step": 703000 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 0.00011372797178840713, | |
| "loss": 4.1299, | |
| "step": 704000 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 0.00011321047111514422, | |
| "loss": 4.1257, | |
| "step": 705000 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 0.0001126942017604717, | |
| "loss": 4.1281, | |
| "step": 706000 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 0.00011217813440536418, | |
| "loss": 4.1266, | |
| "step": 707000 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 0.00011166279044499894, | |
| "loss": 4.1249, | |
| "step": 708000 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 0.00011114817412429949, | |
| "loss": 4.1247, | |
| "step": 709000 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 0.00011063428968219605, | |
| "loss": 4.1229, | |
| "step": 710000 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 0.00011012114135158998, | |
| "loss": 4.1245, | |
| "step": 711000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 0.00010960924539610728, | |
| "loss": 4.1261, | |
| "step": 712000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 0.00010909758121624652, | |
| "loss": 4.1228, | |
| "step": 713000 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 0.00010858717634585534, | |
| "loss": 4.1197, | |
| "step": 714000 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 0.00010807701315830314, | |
| "loss": 4.1174, | |
| "step": 715000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 0.00010756862520028245, | |
| "loss": 4.1188, | |
| "step": 716000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 0.00010705997903373485, | |
| "loss": 4.1191, | |
| "step": 717000 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 0.00010655209842052723, | |
| "loss": 4.114, | |
| "step": 718000 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 0.00010604549426910888, | |
| "loss": 4.1175, | |
| "step": 719000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 0.00010553915653058473, | |
| "loss": 4.1166, | |
| "step": 720000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 0.00010503359687251983, | |
| "loss": 4.1143, | |
| "step": 721000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 0.00010452881945924391, | |
| "loss": 4.1152, | |
| "step": 722000 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 0.00010402533204546334, | |
| "loss": 4.1116, | |
| "step": 723000 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 0.00010352213079632074, | |
| "loss": 4.1111, | |
| "step": 724000 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 0.00010301972424201705, | |
| "loss": 4.1103, | |
| "step": 725000 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 0.00010251861772823774, | |
| "loss": 4.1111, | |
| "step": 726000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 0.00010201781216707713, | |
| "loss": 4.1106, | |
| "step": 727000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 0.00010151831328589558, | |
| "loss": 4.1069, | |
| "step": 728000 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 0.00010101962398354699, | |
| "loss": 4.1067, | |
| "step": 729000 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 0.00010052174835955799, | |
| "loss": 4.1035, | |
| "step": 730000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 0.00010002419336242872, | |
| "loss": 4.1095, | |
| "step": 731000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 9.952746187288931e-05, | |
| "loss": 4.1049, | |
| "step": 732000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 9.903155798255135e-05, | |
| "loss": 4.0988, | |
| "step": 733000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 9.853648577620898e-05, | |
| "loss": 4.1043, | |
| "step": 734000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 9.804274314943199e-05, | |
| "loss": 4.1043, | |
| "step": 735000 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 9.754934569616405e-05, | |
| "loss": 4.1018, | |
| "step": 736000 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 9.705777639819362e-05, | |
| "loss": 4.099, | |
| "step": 737000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 9.656606908833878e-05, | |
| "loss": 4.1011, | |
| "step": 738000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 9.6075213770881e-05, | |
| "loss": 4.1025, | |
| "step": 739000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 9.558570405937759e-05, | |
| "loss": 4.1005, | |
| "step": 740000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 9.509656398720454e-05, | |
| "loss": 4.0979, | |
| "step": 741000 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 9.46082880118432e-05, | |
| "loss": 4.0973, | |
| "step": 742000 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 9.412088015525628e-05, | |
| "loss": 4.0912, | |
| "step": 743000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 9.363434443225589e-05, | |
| "loss": 4.0913, | |
| "step": 744000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 9.31486848504702e-05, | |
| "loss": 4.0922, | |
| "step": 745000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 9.266390541031052e-05, | |
| "loss": 4.095, | |
| "step": 746000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 9.218049355729118e-05, | |
| "loss": 4.0916, | |
| "step": 747000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 9.169748548247643e-05, | |
| "loss": 4.0896, | |
| "step": 748000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 9.121585117197211e-05, | |
| "loss": 4.0896, | |
| "step": 749000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 9.073463036084202e-05, | |
| "loss": 4.0849, | |
| "step": 750000 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 9.025430957607068e-05, | |
| "loss": 4.0861, | |
| "step": 751000 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 8.977489277409341e-05, | |
| "loss": 4.0873, | |
| "step": 752000 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 8.929686195794506e-05, | |
| "loss": 4.083, | |
| "step": 753000 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 8.882021833036489e-05, | |
| "loss": 4.0873, | |
| "step": 754000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 8.834353438745977e-05, | |
| "loss": 4.083, | |
| "step": 755000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 8.786824547005008e-05, | |
| "loss": 4.0832, | |
| "step": 756000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 8.739340396441291e-05, | |
| "loss": 4.0828, | |
| "step": 757000 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 8.691949000704588e-05, | |
| "loss": 4.087, | |
| "step": 758000 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 8.644650750161096e-05, | |
| "loss": 4.0797, | |
| "step": 759000 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 8.597446034409749e-05, | |
| "loss": 4.0808, | |
| "step": 760000 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 8.55033524227903e-05, | |
| "loss": 4.0762, | |
| "step": 761000 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 8.503365731066581e-05, | |
| "loss": 4.0769, | |
| "step": 762000 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 8.456443854672643e-05, | |
| "loss": 4.0744, | |
| "step": 763000 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 8.409617063343962e-05, | |
| "loss": 4.0785, | |
| "step": 764000 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 8.362885742796067e-05, | |
| "loss": 4.074, | |
| "step": 765000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 8.316296865415034e-05, | |
| "loss": 4.0752, | |
| "step": 766000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 8.269757543994949e-05, | |
| "loss": 4.0721, | |
| "step": 767000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 8.223314845388103e-05, | |
| "loss": 4.0745, | |
| "step": 768000 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 8.176969152146221e-05, | |
| "loss": 4.0721, | |
| "step": 769000 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 8.130767045556329e-05, | |
| "loss": 4.0721, | |
| "step": 770000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 8.084616409542043e-05, | |
| "loss": 4.0681, | |
| "step": 771000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 8.038609924698259e-05, | |
| "loss": 4.0728, | |
| "step": 772000 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 7.992701769691633e-05, | |
| "loss": 4.0687, | |
| "step": 773000 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 7.946846516190165e-05, | |
| "loss": 4.0675, | |
| "step": 774000 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 7.90109054534227e-05, | |
| "loss": 4.0665, | |
| "step": 775000 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 7.855434234043022e-05, | |
| "loss": 4.0655, | |
| "step": 776000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 7.80992346454953e-05, | |
| "loss": 4.0671, | |
| "step": 777000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 7.764512904833741e-05, | |
| "loss": 4.0638, | |
| "step": 778000 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 7.71915762338268e-05, | |
| "loss": 4.0613, | |
| "step": 779000 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 7.673948703544935e-05, | |
| "loss": 4.0572, | |
| "step": 780000 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 7.628796009423646e-05, | |
| "loss": 4.0639, | |
| "step": 781000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 7.583745217759814e-05, | |
| "loss": 4.0613, | |
| "step": 782000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 7.538841596949084e-05, | |
| "loss": 4.0572, | |
| "step": 783000 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 7.493995619788687e-05, | |
| "loss": 4.0566, | |
| "step": 784000 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 7.44925265544582e-05, | |
| "loss": 4.0562, | |
| "step": 785000 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 7.404613072471351e-05, | |
| "loss": 4.056, | |
| "step": 786000 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 7.360077238564593e-05, | |
| "loss": 4.0507, | |
| "step": 787000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 7.315645520570287e-05, | |
| "loss": 4.0505, | |
| "step": 788000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 7.271362559401307e-05, | |
| "loss": 4.0534, | |
| "step": 789000 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 7.22714006530347e-05, | |
| "loss": 4.0539, | |
| "step": 790000 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 7.183066846739989e-05, | |
| "loss": 4.0536, | |
| "step": 791000 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 7.139055032133843e-05, | |
| "loss": 4.0522, | |
| "step": 792000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 7.095236859806331e-05, | |
| "loss": 4.05, | |
| "step": 793000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 7.051437066874354e-05, | |
| "loss": 4.0474, | |
| "step": 794000 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 7.007743932145127e-05, | |
| "loss": 4.0424, | |
| "step": 795000 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 6.964157815522e-05, | |
| "loss": 4.0445, | |
| "step": 796000 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 6.920679076026799e-05, | |
| "loss": 4.0437, | |
| "step": 797000 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 6.877308071796904e-05, | |
| "loss": 4.0428, | |
| "step": 798000 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 6.834088368883074e-05, | |
| "loss": 4.0413, | |
| "step": 799000 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 6.790933797416663e-05, | |
| "loss": 4.0423, | |
| "step": 800000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 6.747888029936322e-05, | |
| "loss": 4.0412, | |
| "step": 801000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 6.704994302979443e-05, | |
| "loss": 4.0394, | |
| "step": 802000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 6.66216709659637e-05, | |
| "loss": 4.0399, | |
| "step": 803000 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 6.619449754859523e-05, | |
| "loss": 4.0385, | |
| "step": 804000 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 6.576885181589794e-05, | |
| "loss": 4.0375, | |
| "step": 805000 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 6.534388513092143e-05, | |
| "loss": 4.0376, | |
| "step": 806000 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 6.492002761761704e-05, | |
| "loss": 4.0367, | |
| "step": 807000 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 6.44972827673282e-05, | |
| "loss": 4.0361, | |
| "step": 808000 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 6.40760751322673e-05, | |
| "loss": 4.0294, | |
| "step": 809000 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 6.365556492400127e-05, | |
| "loss": 4.0324, | |
| "step": 810000 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 6.323659661921848e-05, | |
| "loss": 4.0314, | |
| "step": 811000 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 6.281833489418096e-05, | |
| "loss": 4.0306, | |
| "step": 812000 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 6.240161971012996e-05, | |
| "loss": 4.0301, | |
| "step": 813000 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 6.198562023551751e-05, | |
| "loss": 4.0285, | |
| "step": 814000 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 6.157158618328416e-05, | |
| "loss": 4.0266, | |
| "step": 815000 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 6.115786150987899e-05, | |
| "loss": 4.0292, | |
| "step": 816000 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 6.0745280488710155e-05, | |
| "loss": 4.0234, | |
| "step": 817000 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 6.0333846518236035e-05, | |
| "loss": 4.0236, | |
| "step": 818000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 5.9923562987466307e-05, | |
| "loss": 4.0237, | |
| "step": 819000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 5.951484182819116e-05, | |
| "loss": 4.021, | |
| "step": 820000 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 5.910727554160531e-05, | |
| "loss": 4.0171, | |
| "step": 821000 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 5.8700461244659956e-05, | |
| "loss": 4.0197, | |
| "step": 822000 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 5.829481084172575e-05, | |
| "loss": 4.0153, | |
| "step": 823000 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 5.789032767417306e-05, | |
| "loss": 4.0209, | |
| "step": 824000 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 5.748701507375753e-05, | |
| "loss": 4.0169, | |
| "step": 825000 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 5.708487636259276e-05, | |
| "loss": 4.0142, | |
| "step": 826000 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 5.6684315225520025e-05, | |
| "loss": 4.0168, | |
| "step": 827000 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 5.628453303834178e-05, | |
| "loss": 4.0179, | |
| "step": 828000 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 5.588633265133554e-05, | |
| "loss": 4.0114, | |
| "step": 829000 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 5.5489316965551574e-05, | |
| "loss": 4.0113, | |
| "step": 830000 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 5.5093093617013605e-05, | |
| "loss": 4.0101, | |
| "step": 831000 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 5.469806387662206e-05, | |
| "loss": 4.012, | |
| "step": 832000 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 5.4304230998263825e-05, | |
| "loss": 4.011, | |
| "step": 833000 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 5.391199025820963e-05, | |
| "loss": 4.0085, | |
| "step": 834000 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 5.352055962116598e-05, | |
| "loss": 4.0103, | |
| "step": 835000 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 5.313033554533935e-05, | |
| "loss": 4.007, | |
| "step": 836000 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 5.2741321245032015e-05, | |
| "loss": 4.0057, | |
| "step": 837000 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 5.235429431454388e-05, | |
| "loss": 4.0023, | |
| "step": 838000 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 5.196770673276694e-05, | |
| "loss": 4.0024, | |
| "step": 839000 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 5.158233850316285e-05, | |
| "loss": 4.0052, | |
| "step": 840000 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 5.119895986925622e-05, | |
| "loss": 3.9984, | |
| "step": 841000 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 5.0816037402308914e-05, | |
| "loss": 4.0024, | |
| "step": 842000 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 5.0434343773913936e-05, | |
| "loss": 3.9997, | |
| "step": 843000 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 5.005388212810789e-05, | |
| "loss": 4.0001, | |
| "step": 844000 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 4.967465559877949e-05, | |
| "loss": 3.9953, | |
| "step": 845000 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 4.929666730964366e-05, | |
| "loss": 3.9974, | |
| "step": 846000 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 4.8920296500061624e-05, | |
| "loss": 3.9978, | |
| "step": 847000 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 4.854479277562882e-05, | |
| "loss": 3.9965, | |
| "step": 848000 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 4.8170910230147306e-05, | |
| "loss": 3.9938, | |
| "step": 849000 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 4.77979034302229e-05, | |
| "loss": 3.9955, | |
| "step": 850000 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 4.7426521455285876e-05, | |
| "loss": 3.9919, | |
| "step": 851000 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 4.705602385748844e-05, | |
| "loss": 3.9902, | |
| "step": 852000 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 4.668678606973318e-05, | |
| "loss": 3.9888, | |
| "step": 853000 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 4.631881113345728e-05, | |
| "loss": 3.9864, | |
| "step": 854000 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 4.5952834232442806e-05, | |
| "loss": 3.9886, | |
| "step": 855000 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 4.5587391540988944e-05, | |
| "loss": 3.9877, | |
| "step": 856000 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 4.5223220756802585e-05, | |
| "loss": 3.9858, | |
| "step": 857000 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 4.4860324879583624e-05, | |
| "loss": 3.9823, | |
| "step": 858000 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 4.4498706898530285e-05, | |
| "loss": 3.9823, | |
| "step": 859000 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 4.413836979231471e-05, | |
| "loss": 3.9826, | |
| "step": 860000 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 4.3779674940056856e-05, | |
| "loss": 3.9845, | |
| "step": 861000 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 4.342190718903205e-05, | |
| "loss": 3.9797, | |
| "step": 862000 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 4.3066140849412765e-05, | |
| "loss": 3.9826, | |
| "step": 863000 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 4.271095293545859e-05, | |
| "loss": 3.9786, | |
| "step": 864000 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 4.235706062219449e-05, | |
| "loss": 3.9789, | |
| "step": 865000 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 4.200481876887719e-05, | |
| "loss": 3.9804, | |
| "step": 866000 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 4.165352508853595e-05, | |
| "loss": 3.9759, | |
| "step": 867000 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 4.13038850558964e-05, | |
| "loss": 3.9766, | |
| "step": 868000 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 4.095520157140329e-05, | |
| "loss": 3.9754, | |
| "step": 869000 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 4.0608174867936735e-05, | |
| "loss": 3.9726, | |
| "step": 870000 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 4.026211305630183e-05, | |
| "loss": 3.9728, | |
| "step": 871000 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 3.9917367016619276e-05, | |
| "loss": 3.9719, | |
| "step": 872000 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 3.9573939588586015e-05, | |
| "loss": 3.9689, | |
| "step": 873000 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 3.923183360103733e-05, | |
| "loss": 3.9669, | |
| "step": 874000 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 3.88910518719237e-05, | |
| "loss": 3.9705, | |
| "step": 875000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 3.8551935999150546e-05, | |
| "loss": 3.969, | |
| "step": 876000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 3.821414732678987e-05, | |
| "loss": 3.9472, | |
| "step": 877000 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 3.7877352503423325e-05, | |
| "loss": 3.9284, | |
| "step": 878000 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 3.7541893095445734e-05, | |
| "loss": 3.9269, | |
| "step": 879000 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 3.720810531795154e-05, | |
| "loss": 3.9298, | |
| "step": 880000 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 3.687532367703408e-05, | |
| "loss": 3.9306, | |
| "step": 881000 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 3.65438857052858e-05, | |
| "loss": 3.9292, | |
| "step": 882000 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 3.6213794132784204e-05, | |
| "loss": 3.9297, | |
| "step": 883000 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 3.588537974618371e-05, | |
| "loss": 3.9289, | |
| "step": 884000 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 3.555798776484851e-05, | |
| "loss": 3.9268, | |
| "step": 885000 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 3.5232601024993396e-05, | |
| "loss": 3.933, | |
| "step": 886000 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 3.490791805247826e-05, | |
| "loss": 3.9279, | |
| "step": 887000 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 3.458459495478781e-05, | |
| "loss": 3.9265, | |
| "step": 888000 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 3.4262634395156536e-05, | |
| "loss": 3.9279, | |
| "step": 889000 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 3.394235893817297e-05, | |
| "loss": 3.9273, | |
| "step": 890000 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 3.3623130030302484e-05, | |
| "loss": 3.924, | |
| "step": 891000 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 3.330527158014394e-05, | |
| "loss": 3.9239, | |
| "step": 892000 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 3.298910200457324e-05, | |
| "loss": 3.9256, | |
| "step": 893000 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 3.267399093621268e-05, | |
| "loss": 3.9225, | |
| "step": 894000 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 3.2360258143687926e-05, | |
| "loss": 3.9213, | |
| "step": 895000 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 3.204821787257311e-05, | |
| "loss": 3.923, | |
| "step": 896000 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 3.173724798834707e-05, | |
| "loss": 3.9165, | |
| "step": 897000 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 3.142797298671269e-05, | |
| "loss": 3.9243, | |
| "step": 898000 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 3.111977624644229e-05, | |
| "loss": 3.9195, | |
| "step": 899000 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 3.081297058418091e-05, | |
| "loss": 3.9208, | |
| "step": 900000 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 3.0507558527107828e-05, | |
| "loss": 3.92, | |
| "step": 901000 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 3.0203845908662563e-05, | |
| "loss": 3.9204, | |
| "step": 902000 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 2.9901227197694415e-05, | |
| "loss": 3.9196, | |
| "step": 903000 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 2.9600310118919393e-05, | |
| "loss": 3.9197, | |
| "step": 904000 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 2.9300494714831896e-05, | |
| "loss": 3.9205, | |
| "step": 905000 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 2.9002383080493055e-05, | |
| "loss": 3.9151, | |
| "step": 906000 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 2.8705380851790375e-05, | |
| "loss": 3.9172, | |
| "step": 907000 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 2.8409789588637402e-05, | |
| "loss": 3.9158, | |
| "step": 908000 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 2.8115611725839808e-05, | |
| "loss": 3.9135, | |
| "step": 909000 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 2.7823141740592663e-05, | |
| "loss": 3.9149, | |
| "step": 910000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 2.7531796516897657e-05, | |
| "loss": 3.9118, | |
| "step": 911000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 2.7242161139836732e-05, | |
| "loss": 3.9082, | |
| "step": 912000 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 2.6953658144950188e-05, | |
| "loss": 3.9131, | |
| "step": 913000 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 2.666686690950142e-05, | |
| "loss": 3.9105, | |
| "step": 914000 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 2.6381215639576494e-05, | |
| "loss": 3.9091, | |
| "step": 915000 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 2.6097277985549907e-05, | |
| "loss": 3.9072, | |
| "step": 916000 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 2.581476991673275e-05, | |
| "loss": 3.9109, | |
| "step": 917000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 2.553341311615387e-05, | |
| "loss": 3.9079, | |
| "step": 918000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 2.5253493408841024e-05, | |
| "loss": 3.9061, | |
| "step": 919000 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 2.4975290861076127e-05, | |
| "loss": 3.9058, | |
| "step": 920000 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 2.469825080275776e-05, | |
| "loss": 3.9049, | |
| "step": 921000 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 2.4422929591059718e-05, | |
| "loss": 3.9037, | |
| "step": 922000 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 2.4148778300583463e-05, | |
| "loss": 3.9052, | |
| "step": 923000 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 2.3876075508705364e-05, | |
| "loss": 3.906, | |
| "step": 924000 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 2.3605364516460604e-05, | |
| "loss": 3.9016, | |
| "step": 925000 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 2.3335562540463497e-05, | |
| "loss": 3.902, | |
| "step": 926000 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 2.3067215761578686e-05, | |
| "loss": 3.901, | |
| "step": 927000 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 2.2800326390197003e-05, | |
| "loss": 3.9034, | |
| "step": 928000 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 2.2535161324668153e-05, | |
| "loss": 3.8986, | |
| "step": 929000 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 2.227119188854776e-05, | |
| "loss": 3.9001, | |
| "step": 930000 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 2.200868641683378e-05, | |
| "loss": 3.896, | |
| "step": 931000 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 2.1747647071801923e-05, | |
| "loss": 3.8955, | |
| "step": 932000 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 2.1488593679023983e-05, | |
| "loss": 3.896, | |
| "step": 933000 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 2.1230490082903298e-05, | |
| "loss": 3.8937, | |
| "step": 934000 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 2.0973859023521336e-05, | |
| "loss": 3.893, | |
| "step": 935000 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 2.0718957033886022e-05, | |
| "loss": 3.8921, | |
| "step": 936000 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 2.0465275899699664e-05, | |
| "loss": 3.8936, | |
| "step": 937000 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 2.021307360537388e-05, | |
| "loss": 3.8894, | |
| "step": 938000 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 1.9962352228316283e-05, | |
| "loss": 3.8913, | |
| "step": 939000 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 1.9713362330696583e-05, | |
| "loss": 3.8938, | |
| "step": 940000 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 1.946560748553077e-05, | |
| "loss": 3.8904, | |
| "step": 941000 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 1.921958523886409e-05, | |
| "loss": 3.8881, | |
| "step": 942000 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 1.8974805080506908e-05, | |
| "loss": 3.8859, | |
| "step": 943000 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 1.8732001127734854e-05, | |
| "loss": 3.8898, | |
| "step": 944000 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 1.8490202219074714e-05, | |
| "loss": 3.888, | |
| "step": 945000 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 1.8250137975426186e-05, | |
| "loss": 3.8873, | |
| "step": 946000 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 1.8011329765448747e-05, | |
| "loss": 3.8839, | |
| "step": 947000 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 1.7774020608654827e-05, | |
| "loss": 3.8851, | |
| "step": 948000 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 1.753821245977625e-05, | |
| "loss": 3.8815, | |
| "step": 949000 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 1.730414081501248e-05, | |
| "loss": 3.8797, | |
| "step": 950000 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 1.7071338990848274e-05, | |
| "loss": 3.8825, | |
| "step": 951000 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 1.6840274504384723e-05, | |
| "loss": 3.8792, | |
| "step": 952000 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 1.6610715701279632e-05, | |
| "loss": 3.8822, | |
| "step": 953000 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 1.6382436945055167e-05, | |
| "loss": 3.8788, | |
| "step": 954000 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 1.615567065931629e-05, | |
| "loss": 3.8825, | |
| "step": 955000 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 1.5930643206869322e-05, | |
| "loss": 3.8789, | |
| "step": 956000 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 1.5706905936180028e-05, | |
| "loss": 3.8776, | |
| "step": 957000 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 1.5484908160738844e-05, | |
| "loss": 3.88, | |
| "step": 958000 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 1.5264207269471153e-05, | |
| "loss": 3.8763, | |
| "step": 959000 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 1.5045028059623756e-05, | |
| "loss": 3.8744, | |
| "step": 960000 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 1.4827372336590928e-05, | |
| "loss": 3.8755, | |
| "step": 961000 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 1.4611457261190308e-05, | |
| "loss": 3.8775, | |
| "step": 962000 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 1.4397066191369536e-05, | |
| "loss": 3.8748, | |
| "step": 963000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 1.4183988576170026e-05, | |
| "loss": 3.8712, | |
| "step": 964000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 1.3972441540226522e-05, | |
| "loss": 3.8697, | |
| "step": 965000 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 1.3762426826066322e-05, | |
| "loss": 3.8722, | |
| "step": 966000 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 1.3554153877426224e-05, | |
| "loss": 3.8705, | |
| "step": 967000 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 1.3347207447291144e-05, | |
| "loss": 3.8681, | |
| "step": 968000 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 1.314179848903565e-05, | |
| "loss": 3.8657, | |
| "step": 969000 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 1.2938131795049502e-05, | |
| "loss": 3.8674, | |
| "step": 970000 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 1.2735801302100369e-05, | |
| "loss": 3.8668, | |
| "step": 971000 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 1.2535213334175821e-05, | |
| "loss": 3.8706, | |
| "step": 972000 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 1.2335967966295303e-05, | |
| "loss": 3.8628, | |
| "step": 973000 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 1.2138268399943431e-05, | |
| "loss": 3.8666, | |
| "step": 974000 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 1.1942116263585212e-05, | |
| "loss": 3.8637, | |
| "step": 975000 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 1.1747707001746943e-05, | |
| "loss": 3.8626, | |
| "step": 976000 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 1.1554653008327055e-05, | |
| "loss": 3.8644, | |
| "step": 977000 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 1.1363532705509805e-05, | |
| "loss": 3.8628, | |
| "step": 978000 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 1.1173581654855314e-05, | |
| "loss": 3.864, | |
| "step": 979000 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 1.0985185980385471e-05, | |
| "loss": 3.8634, | |
| "step": 980000 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 1.079834723392832e-05, | |
| "loss": 3.8611, | |
| "step": 981000 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 1.0613066954487539e-05, | |
| "loss": 3.8614, | |
| "step": 982000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 1.0429529608794375e-05, | |
| "loss": 3.8612, | |
| "step": 983000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 1.024736926677754e-05, | |
| "loss": 3.8581, | |
| "step": 984000 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 1.0066951746339515e-05, | |
| "loss": 3.8562, | |
| "step": 985000 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 9.887917337602925e-06, | |
| "loss": 3.8566, | |
| "step": 986000 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 9.71044889515631e-06, | |
| "loss": 3.8534, | |
| "step": 987000 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 9.534722998420087e-06, | |
| "loss": 3.8577, | |
| "step": 988000 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 9.360389291505156e-06, | |
| "loss": 3.8599, | |
| "step": 989000 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 9.187625896164997e-06, | |
| "loss": 3.8554, | |
| "step": 990000 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 9.016434235463455e-06, | |
| "loss": 3.8554, | |
| "step": 991000 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 8.846984551782144e-06, | |
| "loss": 3.8531, | |
| "step": 992000 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 8.678939002516817e-06, | |
| "loss": 3.8537, | |
| "step": 993000 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 8.512635059971796e-06, | |
| "loss": 3.8556, | |
| "step": 994000 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 8.34774115340684e-06, | |
| "loss": 3.8552, | |
| "step": 995000 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 8.184588425936723e-06, | |
| "loss": 3.8518, | |
| "step": 996000 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 8.022851589599123e-06, | |
| "loss": 3.8519, | |
| "step": 997000 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 7.862855447419604e-06, | |
| "loss": 3.8497, | |
| "step": 998000 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 7.70428100492051e-06, | |
| "loss": 3.8534, | |
| "step": 999000 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 7.54729051547387e-06, | |
| "loss": 3.8516, | |
| "step": 1000000 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 7.392039885206847e-06, | |
| "loss": 3.8476, | |
| "step": 1001000 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 7.238219581070471e-06, | |
| "loss": 3.8473, | |
| "step": 1002000 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 7.085987068966549e-06, | |
| "loss": 3.8497, | |
| "step": 1003000 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 6.935343602844757e-06, | |
| "loss": 3.8455, | |
| "step": 1004000 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 6.786438681986962e-06, | |
| "loss": 3.8503, | |
| "step": 1005000 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 6.638975425188365e-06, | |
| "loss": 3.8475, | |
| "step": 1006000 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 6.493249970997628e-06, | |
| "loss": 3.8456, | |
| "step": 1007000 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 6.349115258944571e-06, | |
| "loss": 3.8407, | |
| "step": 1008000 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 6.2064305863833495e-06, | |
| "loss": 3.8426, | |
| "step": 1009000 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 6.065342204771441e-06, | |
| "loss": 3.8456, | |
| "step": 1010000 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 5.92585127626355e-06, | |
| "loss": 3.8446, | |
| "step": 1011000 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 5.7880960433015715e-06, | |
| "loss": 3.8439, | |
| "step": 1012000 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 5.651801854522143e-06, | |
| "loss": 3.8432, | |
| "step": 1013000 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 5.517108525207015e-06, | |
| "loss": 3.8411, | |
| "step": 1014000 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 5.384017164834387e-06, | |
| "loss": 3.8405, | |
| "step": 1015000 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 5.25265955688945e-06, | |
| "loss": 3.8406, | |
| "step": 1016000 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 5.122773805360459e-06, | |
| "loss": 3.8425, | |
| "step": 1017000 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 4.9946207493118515e-06, | |
| "loss": 3.8429, | |
| "step": 1018000 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 4.867944881850673e-06, | |
| "loss": 3.84, | |
| "step": 1019000 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 4.743000595890457e-06, | |
| "loss": 3.8434, | |
| "step": 1020000 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 4.619538782067134e-06, | |
| "loss": 3.8404, | |
| "step": 1021000 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 4.497686330529982e-06, | |
| "loss": 3.8382, | |
| "step": 1022000 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 4.377444244986006e-06, | |
| "loss": 3.8392, | |
| "step": 1023000 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 4.2589313414077795e-06, | |
| "loss": 3.8395, | |
| "step": 1024000 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 4.1420275474132856e-06, | |
| "loss": 3.8404, | |
| "step": 1025000 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 4.02661922185521e-06, | |
| "loss": 3.841, | |
| "step": 1026000 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 3.912938129952815e-06, | |
| "loss": 3.8376, | |
| "step": 1027000 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 3.8007576184877935e-06, | |
| "loss": 3.8383, | |
| "step": 1028000 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 3.6901932136656604e-06, | |
| "loss": 3.837, | |
| "step": 1029000 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 3.5812458262129755e-06, | |
| "loss": 3.8348, | |
| "step": 1030000 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 3.4740228745658187e-06, | |
| "loss": 3.8393, | |
| "step": 1031000 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 3.368310581510614e-06, | |
| "loss": 3.8335, | |
| "step": 1032000 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 3.2643212405075284e-06, | |
| "loss": 3.833, | |
| "step": 1033000 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 3.161949185382773e-06, | |
| "loss": 3.8357, | |
| "step": 1034000 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 3.0610952138760753e-06, | |
| "loss": 3.8334, | |
| "step": 1035000 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 2.9618634416622936e-06, | |
| "loss": 3.8355, | |
| "step": 1036000 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 2.864351483910399e-06, | |
| "loss": 3.8369, | |
| "step": 1037000 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 2.768364924832545e-06, | |
| "loss": 3.8369, | |
| "step": 1038000 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 2.6740029762871932e-06, | |
| "loss": 3.8313, | |
| "step": 1039000 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 2.581358339964313e-06, | |
| "loss": 3.8306, | |
| "step": 1040000 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 2.4902463043641854e-06, | |
| "loss": 3.8327, | |
| "step": 1041000 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 2.4007611701787116e-06, | |
| "loss": 3.834, | |
| "step": 1042000 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 2.3129036745030752e-06, | |
| "loss": 3.8316, | |
| "step": 1043000 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 2.226759956554547e-06, | |
| "loss": 3.8326, | |
| "step": 1044000 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 2.1421582661275585e-06, | |
| "loss": 3.8354, | |
| "step": 1045000 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 2.0592685019640958e-06, | |
| "loss": 3.8307, | |
| "step": 1046000 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 1.977925401473013e-06, | |
| "loss": 3.8299, | |
| "step": 1047000 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 1.8982923194333036e-06, | |
| "loss": 3.8297, | |
| "step": 1048000 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 1.8202104863079827e-06, | |
| "loss": 3.8298, | |
| "step": 1049000 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 1.7438367075362172e-06, | |
| "loss": 3.8303, | |
| "step": 1050000 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 1.6690927139422218e-06, | |
| "loss": 3.8317, | |
| "step": 1051000 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 1.5959067502205883e-06, | |
| "loss": 3.8296, | |
| "step": 1052000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 1.5243550546499618e-06, | |
| "loss": 3.8309, | |
| "step": 1053000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 1.4544382166065795e-06, | |
| "loss": 3.8339, | |
| "step": 1054000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 1.3861568120002276e-06, | |
| "loss": 3.831, | |
| "step": 1055000 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 1.3195114032695576e-06, | |
| "loss": 3.8265, | |
| "step": 1056000 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 1.2545667306077758e-06, | |
| "loss": 3.8309, | |
| "step": 1057000 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 1.1911933096932392e-06, | |
| "loss": 3.8254, | |
| "step": 1058000 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 1.1294574905821087e-06, | |
| "loss": 3.8288, | |
| "step": 1059000 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 1.0694190611034273e-06, | |
| "loss": 3.8261, | |
| "step": 1060000 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 1.0109583188243843e-06, | |
| "loss": 3.8253, | |
| "step": 1061000 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 9.541366629567838e-07, | |
| "loss": 3.8274, | |
| "step": 1062000 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 8.989545615444961e-07, | |
| "loss": 3.8264, | |
| "step": 1063000 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 8.454651918863299e-07, | |
| "loss": 3.8278, | |
| "step": 1064000 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 7.935619088263124e-07, | |
| "loss": 3.8267, | |
| "step": 1065000 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 7.433489455357823e-07, | |
| "loss": 3.8298, | |
| "step": 1066000 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 6.94726189238426e-07, | |
| "loss": 3.8305, | |
| "step": 1067000 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 6.477912850886725e-07, | |
| "loss": 3.8305, | |
| "step": 1068000 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 6.024951762708009e-07, | |
| "loss": 3.8248, | |
| "step": 1069000 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 5.588382351461308e-07, | |
| "loss": 3.826, | |
| "step": 1070000 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 5.16779582648863e-07, | |
| "loss": 3.8261, | |
| "step": 1071000 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 4.76364087147263e-07, | |
| "loss": 3.8238, | |
| "step": 1072000 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 4.375920815465229e-07, | |
| "loss": 3.8293, | |
| "step": 1073000 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 4.004638852143083e-07, | |
| "loss": 3.8244, | |
| "step": 1074000 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 3.6497980397816043e-07, | |
| "loss": 3.8275, | |
| "step": 1075000 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 3.3117314832133985e-07, | |
| "loss": 3.8278, | |
| "step": 1076000 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 2.989765157657809e-07, | |
| "loss": 3.824, | |
| "step": 1077000 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 2.684545642082537e-07, | |
| "loss": 3.8268, | |
| "step": 1078000 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 2.395464400940739e-07, | |
| "loss": 3.8244, | |
| "step": 1079000 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 2.1228375656396903e-07, | |
| "loss": 3.8236, | |
| "step": 1080000 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 1.8666673818257262e-07, | |
| "loss": 3.8255, | |
| "step": 1081000 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 1.6271874491924355e-07, | |
| "loss": 3.828, | |
| "step": 1082000 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 1.4041353457650008e-07, | |
| "loss": 3.8259, | |
| "step": 1083000 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 1.1973143077612658e-07, | |
| "loss": 3.8265, | |
| "step": 1084000 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 1.0069575448430346e-07, | |
| "loss": 3.826, | |
| "step": 1085000 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 8.330666249920515e-08, | |
| "loss": 3.8267, | |
| "step": 1086000 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 6.759413926236135e-08, | |
| "loss": 3.8269, | |
| "step": 1087000 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 5.349533819716257e-08, | |
| "loss": 3.8255, | |
| "step": 1088000 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 4.1043510231775216e-08, | |
| "loss": 3.8275, | |
| "step": 1089000 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 3.0248739940019756e-08, | |
| "loss": 3.8277, | |
| "step": 1090000 | |
| } | |
| ], | |
| "max_steps": 1095620, | |
| "num_train_epochs": 5, | |
| "total_flos": 7.86932880566174e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |