| { |
| "best_metric": 1.4371888637542725, |
| "best_model_checkpoint": "miner_id_24/checkpoint-200", |
| "epoch": 0.23460410557184752, |
| "eval_steps": 50, |
| "global_step": 200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0011730205278592375, |
| "grad_norm": 8.663331031799316, |
| "learning_rate": 1e-05, |
| "loss": 13.3816, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0011730205278592375, |
| "eval_loss": 1.7326703071594238, |
| "eval_runtime": 53.9567, |
| "eval_samples_per_second": 26.614, |
| "eval_steps_per_second": 6.653, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.002346041055718475, |
| "grad_norm": 51.352439880371094, |
| "learning_rate": 2e-05, |
| "loss": 15.251, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0035190615835777126, |
| "grad_norm": 26.00502586364746, |
| "learning_rate": 3e-05, |
| "loss": 16.2881, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00469208211143695, |
| "grad_norm": 10.644500732421875, |
| "learning_rate": 4e-05, |
| "loss": 16.008, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.005865102639296188, |
| "grad_norm": 9.421217918395996, |
| "learning_rate": 5e-05, |
| "loss": 13.3204, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.007038123167155425, |
| "grad_norm": 13.056597709655762, |
| "learning_rate": 6e-05, |
| "loss": 15.3015, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.008211143695014663, |
| "grad_norm": 14.21023178100586, |
| "learning_rate": 7e-05, |
| "loss": 14.4311, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0093841642228739, |
| "grad_norm": 8.397007942199707, |
| "learning_rate": 8e-05, |
| "loss": 12.7272, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.010557184750733138, |
| "grad_norm": 9.917564392089844, |
| "learning_rate": 9e-05, |
| "loss": 11.9593, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.011730205278592375, |
| "grad_norm": 10.105253219604492, |
| "learning_rate": 0.0001, |
| "loss": 10.8888, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.012903225806451613, |
| "grad_norm": 9.945771217346191, |
| "learning_rate": 9.999316524962345e-05, |
| "loss": 11.1743, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.01407624633431085, |
| "grad_norm": 12.888904571533203, |
| "learning_rate": 9.997266286704631e-05, |
| "loss": 11.0644, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.015249266862170088, |
| "grad_norm": 7.4127044677734375, |
| "learning_rate": 9.993849845741524e-05, |
| "loss": 9.0855, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.016422287390029325, |
| "grad_norm": 7.778149127960205, |
| "learning_rate": 9.989068136093873e-05, |
| "loss": 9.4258, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.017595307917888565, |
| "grad_norm": 8.04168701171875, |
| "learning_rate": 9.98292246503335e-05, |
| "loss": 9.2789, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0187683284457478, |
| "grad_norm": 6.784926414489746, |
| "learning_rate": 9.975414512725057e-05, |
| "loss": 9.1474, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.01994134897360704, |
| "grad_norm": 6.441762924194336, |
| "learning_rate": 9.966546331768191e-05, |
| "loss": 8.7631, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.021114369501466276, |
| "grad_norm": 9.08105754852295, |
| "learning_rate": 9.956320346634876e-05, |
| "loss": 8.476, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.022287390029325515, |
| "grad_norm": 5.7696123123168945, |
| "learning_rate": 9.944739353007344e-05, |
| "loss": 8.6486, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.02346041055718475, |
| "grad_norm": 5.676098823547363, |
| "learning_rate": 9.931806517013612e-05, |
| "loss": 8.7613, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02463343108504399, |
| "grad_norm": 5.42555570602417, |
| "learning_rate": 9.917525374361912e-05, |
| "loss": 8.5526, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.025806451612903226, |
| "grad_norm": 5.8337321281433105, |
| "learning_rate": 9.901899829374047e-05, |
| "loss": 8.6943, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.026979472140762465, |
| "grad_norm": 6.1469855308532715, |
| "learning_rate": 9.884934153917997e-05, |
| "loss": 8.8221, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0281524926686217, |
| "grad_norm": 5.11215353012085, |
| "learning_rate": 9.86663298624003e-05, |
| "loss": 8.7282, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.02932551319648094, |
| "grad_norm": 5.950553894042969, |
| "learning_rate": 9.847001329696653e-05, |
| "loss": 8.0898, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.030498533724340176, |
| "grad_norm": 6.3572492599487305, |
| "learning_rate": 9.826044551386744e-05, |
| "loss": 8.7058, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.03167155425219941, |
| "grad_norm": 5.663265228271484, |
| "learning_rate": 9.803768380684242e-05, |
| "loss": 7.8918, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.03284457478005865, |
| "grad_norm": 6.1261725425720215, |
| "learning_rate": 9.780178907671789e-05, |
| "loss": 8.3618, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.03401759530791789, |
| "grad_norm": 11.174363136291504, |
| "learning_rate": 9.755282581475769e-05, |
| "loss": 7.6953, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.03519061583577713, |
| "grad_norm": 6.395217418670654, |
| "learning_rate": 9.729086208503174e-05, |
| "loss": 8.6185, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03636363636363636, |
| "grad_norm": 6.65997314453125, |
| "learning_rate": 9.701596950580806e-05, |
| "loss": 8.0256, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0375366568914956, |
| "grad_norm": 6.332151889801025, |
| "learning_rate": 9.672822322997305e-05, |
| "loss": 7.8024, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.03870967741935484, |
| "grad_norm": 14.636702537536621, |
| "learning_rate": 9.642770192448536e-05, |
| "loss": 8.6931, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.03988269794721408, |
| "grad_norm": 6.699878215789795, |
| "learning_rate": 9.611448774886924e-05, |
| "loss": 7.633, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.04105571847507331, |
| "grad_norm": 10.704326629638672, |
| "learning_rate": 9.578866633275288e-05, |
| "loss": 8.519, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.04222873900293255, |
| "grad_norm": 7.794402122497559, |
| "learning_rate": 9.545032675245813e-05, |
| "loss": 8.2599, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.04340175953079179, |
| "grad_norm": 7.623858451843262, |
| "learning_rate": 9.509956150664796e-05, |
| "loss": 8.5064, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.04457478005865103, |
| "grad_norm": 7.614893436431885, |
| "learning_rate": 9.473646649103818e-05, |
| "loss": 7.8677, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.04574780058651026, |
| "grad_norm": 19.778417587280273, |
| "learning_rate": 9.43611409721806e-05, |
| "loss": 8.3847, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0469208211143695, |
| "grad_norm": 9.958346366882324, |
| "learning_rate": 9.397368756032445e-05, |
| "loss": 7.5411, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04809384164222874, |
| "grad_norm": 10.39439582824707, |
| "learning_rate": 9.357421218136386e-05, |
| "loss": 8.6156, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.04926686217008798, |
| "grad_norm": 9.843864440917969, |
| "learning_rate": 9.316282404787871e-05, |
| "loss": 8.73, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.05043988269794721, |
| "grad_norm": 12.815672874450684, |
| "learning_rate": 9.273963562927695e-05, |
| "loss": 9.2569, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.05161290322580645, |
| "grad_norm": 9.977739334106445, |
| "learning_rate": 9.230476262104677e-05, |
| "loss": 7.4537, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.05278592375366569, |
| "grad_norm": 12.918895721435547, |
| "learning_rate": 9.185832391312644e-05, |
| "loss": 7.1692, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.05395894428152493, |
| "grad_norm": 14.040423393249512, |
| "learning_rate": 9.140044155740101e-05, |
| "loss": 8.3064, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.05513196480938416, |
| "grad_norm": 18.069244384765625, |
| "learning_rate": 9.093124073433463e-05, |
| "loss": 9.3387, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0563049853372434, |
| "grad_norm": 21.009868621826172, |
| "learning_rate": 9.045084971874738e-05, |
| "loss": 9.4285, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.05747800586510264, |
| "grad_norm": 22.109519958496094, |
| "learning_rate": 8.995939984474624e-05, |
| "loss": 9.3635, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.05865102639296188, |
| "grad_norm": 32.68623352050781, |
| "learning_rate": 8.945702546981969e-05, |
| "loss": 14.1459, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05865102639296188, |
| "eval_loss": 1.6147716045379639, |
| "eval_runtime": 54.9368, |
| "eval_samples_per_second": 26.139, |
| "eval_steps_per_second": 6.535, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05982404692082111, |
| "grad_norm": 11.36614990234375, |
| "learning_rate": 8.894386393810563e-05, |
| "loss": 7.8323, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.06099706744868035, |
| "grad_norm": 14.187142372131348, |
| "learning_rate": 8.842005554284296e-05, |
| "loss": 8.1578, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.06217008797653959, |
| "grad_norm": 8.44095516204834, |
| "learning_rate": 8.788574348801675e-05, |
| "loss": 7.6084, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.06334310850439882, |
| "grad_norm": 6.0896830558776855, |
| "learning_rate": 8.73410738492077e-05, |
| "loss": 7.398, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.06451612903225806, |
| "grad_norm": 6.624449253082275, |
| "learning_rate": 8.678619553365659e-05, |
| "loss": 7.1689, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0656891495601173, |
| "grad_norm": 4.72271728515625, |
| "learning_rate": 8.622126023955446e-05, |
| "loss": 7.3829, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.06686217008797654, |
| "grad_norm": 11.342972755432129, |
| "learning_rate": 8.564642241456986e-05, |
| "loss": 7.1249, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.06803519061583578, |
| "grad_norm": 4.155811309814453, |
| "learning_rate": 8.506183921362443e-05, |
| "loss": 7.2666, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.06920821114369502, |
| "grad_norm": 4.137184143066406, |
| "learning_rate": 8.44676704559283e-05, |
| "loss": 7.1229, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.07038123167155426, |
| "grad_norm": 7.632533073425293, |
| "learning_rate": 8.386407858128706e-05, |
| "loss": 7.0888, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07155425219941348, |
| "grad_norm": 4.366072654724121, |
| "learning_rate": 8.32512286056924e-05, |
| "loss": 7.3379, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.07272727272727272, |
| "grad_norm": 3.8060083389282227, |
| "learning_rate": 8.262928807620843e-05, |
| "loss": 7.0819, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.07390029325513196, |
| "grad_norm": 3.99633526802063, |
| "learning_rate": 8.199842702516583e-05, |
| "loss": 6.9591, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.0750733137829912, |
| "grad_norm": 6.5943379402160645, |
| "learning_rate": 8.135881792367686e-05, |
| "loss": 6.9471, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.07624633431085044, |
| "grad_norm": 4.896180629730225, |
| "learning_rate": 8.07106356344834e-05, |
| "loss": 7.2951, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.07741935483870968, |
| "grad_norm": 4.8060479164123535, |
| "learning_rate": 8.005405736415126e-05, |
| "loss": 7.1263, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.07859237536656892, |
| "grad_norm": 3.955714225769043, |
| "learning_rate": 7.938926261462366e-05, |
| "loss": 6.816, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.07976539589442816, |
| "grad_norm": 4.465524673461914, |
| "learning_rate": 7.871643313414718e-05, |
| "loss": 7.3137, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.08093841642228738, |
| "grad_norm": 4.67534065246582, |
| "learning_rate": 7.803575286758364e-05, |
| "loss": 6.8736, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.08211143695014662, |
| "grad_norm": 4.462228298187256, |
| "learning_rate": 7.734740790612136e-05, |
| "loss": 7.3674, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.08328445747800586, |
| "grad_norm": 5.6989312171936035, |
| "learning_rate": 7.66515864363997e-05, |
| "loss": 7.346, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.0844574780058651, |
| "grad_norm": 4.496844291687012, |
| "learning_rate": 7.594847868906076e-05, |
| "loss": 7.2749, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.08563049853372434, |
| "grad_norm": 5.543951988220215, |
| "learning_rate": 7.52382768867422e-05, |
| "loss": 7.2962, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.08680351906158358, |
| "grad_norm": 4.945760726928711, |
| "learning_rate": 7.452117519152542e-05, |
| "loss": 7.3482, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.08797653958944282, |
| "grad_norm": 5.323139667510986, |
| "learning_rate": 7.379736965185368e-05, |
| "loss": 7.1538, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.08914956011730206, |
| "grad_norm": 5.503332138061523, |
| "learning_rate": 7.30670581489344e-05, |
| "loss": 7.7196, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.09032258064516129, |
| "grad_norm": 5.515101909637451, |
| "learning_rate": 7.233044034264034e-05, |
| "loss": 6.8976, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.09149560117302052, |
| "grad_norm": 6.265590190887451, |
| "learning_rate": 7.158771761692464e-05, |
| "loss": 7.1142, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.09266862170087976, |
| "grad_norm": 5.986038684844971, |
| "learning_rate": 7.083909302476453e-05, |
| "loss": 7.4168, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.093841642228739, |
| "grad_norm": 5.8563456535339355, |
| "learning_rate": 7.008477123264848e-05, |
| "loss": 7.7562, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.09501466275659824, |
| "grad_norm": 7.149432182312012, |
| "learning_rate": 6.932495846462261e-05, |
| "loss": 6.9403, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.09618768328445748, |
| "grad_norm": 5.055361747741699, |
| "learning_rate": 6.855986244591104e-05, |
| "loss": 7.4671, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.09736070381231672, |
| "grad_norm": 5.610112190246582, |
| "learning_rate": 6.778969234612584e-05, |
| "loss": 7.4979, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.09853372434017596, |
| "grad_norm": 5.591436862945557, |
| "learning_rate": 6.701465872208216e-05, |
| "loss": 7.4036, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.09970674486803519, |
| "grad_norm": 5.245117664337158, |
| "learning_rate": 6.623497346023418e-05, |
| "loss": 7.4849, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.10087976539589442, |
| "grad_norm": 6.682361602783203, |
| "learning_rate": 6.545084971874738e-05, |
| "loss": 7.4913, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.10205278592375366, |
| "grad_norm": 7.1743927001953125, |
| "learning_rate": 6.466250186922325e-05, |
| "loss": 7.2481, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.1032258064516129, |
| "grad_norm": 6.011048793792725, |
| "learning_rate": 6.387014543809223e-05, |
| "loss": 7.2815, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.10439882697947214, |
| "grad_norm": 6.022535800933838, |
| "learning_rate": 6.307399704769099e-05, |
| "loss": 6.7935, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.10557184750733138, |
| "grad_norm": 10.157115936279297, |
| "learning_rate": 6.227427435703997e-05, |
| "loss": 7.3842, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.10674486803519062, |
| "grad_norm": 8.966989517211914, |
| "learning_rate": 6.147119600233758e-05, |
| "loss": 7.5907, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.10791788856304986, |
| "grad_norm": 7.883905410766602, |
| "learning_rate": 6.066498153718735e-05, |
| "loss": 7.9347, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.10909090909090909, |
| "grad_norm": 9.566387176513672, |
| "learning_rate": 5.985585137257401e-05, |
| "loss": 7.9712, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.11026392961876832, |
| "grad_norm": 9.400655746459961, |
| "learning_rate": 5.90440267166055e-05, |
| "loss": 6.5513, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.11143695014662756, |
| "grad_norm": 11.420488357543945, |
| "learning_rate": 5.8229729514036705e-05, |
| "loss": 6.9325, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.1126099706744868, |
| "grad_norm": 15.522323608398438, |
| "learning_rate": 5.74131823855921e-05, |
| "loss": 8.2028, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.11378299120234604, |
| "grad_norm": 14.273778915405273, |
| "learning_rate": 5.6594608567103456e-05, |
| "loss": 7.4233, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.11495601173020528, |
| "grad_norm": 17.384851455688477, |
| "learning_rate": 5.577423184847932e-05, |
| "loss": 7.3731, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.11612903225806452, |
| "grad_norm": 21.28325843811035, |
| "learning_rate": 5.495227651252315e-05, |
| "loss": 7.5629, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.11730205278592376, |
| "grad_norm": 23.694461822509766, |
| "learning_rate": 5.4128967273616625e-05, |
| "loss": 9.0356, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11730205278592376, |
| "eval_loss": 1.5398024320602417, |
| "eval_runtime": 54.9395, |
| "eval_samples_per_second": 26.138, |
| "eval_steps_per_second": 6.534, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11847507331378299, |
| "grad_norm": 6.170729160308838, |
| "learning_rate": 5.330452921628497e-05, |
| "loss": 7.0788, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.11964809384164223, |
| "grad_norm": 5.670572757720947, |
| "learning_rate": 5.247918773366112e-05, |
| "loss": 6.8288, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.12082111436950146, |
| "grad_norm": 5.664034843444824, |
| "learning_rate": 5.165316846586541e-05, |
| "loss": 7.3678, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.1219941348973607, |
| "grad_norm": 4.554018020629883, |
| "learning_rate": 5.0826697238317935e-05, |
| "loss": 6.9508, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.12316715542521994, |
| "grad_norm": 3.6928629875183105, |
| "learning_rate": 5e-05, |
| "loss": 6.5448, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.12434017595307918, |
| "grad_norm": 3.7579400539398193, |
| "learning_rate": 4.917330276168208e-05, |
| "loss": 6.329, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.12551319648093842, |
| "grad_norm": 3.062084197998047, |
| "learning_rate": 4.834683153413459e-05, |
| "loss": 6.2742, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.12668621700879765, |
| "grad_norm": 3.557678699493408, |
| "learning_rate": 4.7520812266338885e-05, |
| "loss": 7.0237, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.1278592375366569, |
| "grad_norm": 3.9020395278930664, |
| "learning_rate": 4.669547078371504e-05, |
| "loss": 6.8212, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.12903225806451613, |
| "grad_norm": 3.2508432865142822, |
| "learning_rate": 4.5871032726383386e-05, |
| "loss": 6.4402, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.13020527859237538, |
| "grad_norm": 3.8005776405334473, |
| "learning_rate": 4.504772348747687e-05, |
| "loss": 7.13, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.1313782991202346, |
| "grad_norm": 3.552947521209717, |
| "learning_rate": 4.4225768151520694e-05, |
| "loss": 6.3852, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.13255131964809383, |
| "grad_norm": 3.8805272579193115, |
| "learning_rate": 4.3405391432896555e-05, |
| "loss": 6.9415, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.13372434017595308, |
| "grad_norm": 4.0383806228637695, |
| "learning_rate": 4.2586817614407895e-05, |
| "loss": 6.7668, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.1348973607038123, |
| "grad_norm": 3.9695963859558105, |
| "learning_rate": 4.17702704859633e-05, |
| "loss": 6.8736, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.13607038123167156, |
| "grad_norm": 3.815753936767578, |
| "learning_rate": 4.095597328339452e-05, |
| "loss": 6.8127, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.1372434017595308, |
| "grad_norm": 3.8608906269073486, |
| "learning_rate": 4.0144148627425993e-05, |
| "loss": 6.7532, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.13841642228739004, |
| "grad_norm": 4.177197456359863, |
| "learning_rate": 3.933501846281267e-05, |
| "loss": 6.8221, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.13958944281524927, |
| "grad_norm": 4.147789001464844, |
| "learning_rate": 3.852880399766243e-05, |
| "loss": 7.1846, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.14076246334310852, |
| "grad_norm": 4.26793909072876, |
| "learning_rate": 3.772572564296005e-05, |
| "loss": 7.0007, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.14193548387096774, |
| "grad_norm": 3.9579102993011475, |
| "learning_rate": 3.6926002952309016e-05, |
| "loss": 6.755, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.14310850439882697, |
| "grad_norm": 4.167984485626221, |
| "learning_rate": 3.612985456190778e-05, |
| "loss": 7.0235, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.14428152492668622, |
| "grad_norm": 4.612884044647217, |
| "learning_rate": 3.533749813077677e-05, |
| "loss": 6.6028, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.14545454545454545, |
| "grad_norm": 4.473079681396484, |
| "learning_rate": 3.4549150281252636e-05, |
| "loss": 7.1627, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.1466275659824047, |
| "grad_norm": 4.8667893409729, |
| "learning_rate": 3.3765026539765834e-05, |
| "loss": 7.169, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.14780058651026393, |
| "grad_norm": 4.755546569824219, |
| "learning_rate": 3.298534127791785e-05, |
| "loss": 7.1955, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.14897360703812318, |
| "grad_norm": 4.763340950012207, |
| "learning_rate": 3.221030765387417e-05, |
| "loss": 6.8214, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.1501466275659824, |
| "grad_norm": 5.186800479888916, |
| "learning_rate": 3.144013755408895e-05, |
| "loss": 7.5825, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.15131964809384163, |
| "grad_norm": 4.935817718505859, |
| "learning_rate": 3.0675041535377405e-05, |
| "loss": 6.8696, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.15249266862170088, |
| "grad_norm": 4.84830904006958, |
| "learning_rate": 2.991522876735154e-05, |
| "loss": 6.6679, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1536656891495601, |
| "grad_norm": 5.055245876312256, |
| "learning_rate": 2.916090697523549e-05, |
| "loss": 6.9668, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.15483870967741936, |
| "grad_norm": 5.089951038360596, |
| "learning_rate": 2.8412282383075363e-05, |
| "loss": 6.8507, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.1560117302052786, |
| "grad_norm": 5.252066135406494, |
| "learning_rate": 2.766955965735968e-05, |
| "loss": 6.6759, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.15718475073313784, |
| "grad_norm": 5.395578861236572, |
| "learning_rate": 2.693294185106562e-05, |
| "loss": 7.5199, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.15835777126099707, |
| "grad_norm": 5.481312274932861, |
| "learning_rate": 2.6202630348146324e-05, |
| "loss": 7.0232, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.15953079178885632, |
| "grad_norm": 6.59968900680542, |
| "learning_rate": 2.547882480847461e-05, |
| "loss": 6.421, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.16070381231671554, |
| "grad_norm": 5.844842433929443, |
| "learning_rate": 2.476172311325783e-05, |
| "loss": 6.9942, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.16187683284457477, |
| "grad_norm": 6.2577056884765625, |
| "learning_rate": 2.405152131093926e-05, |
| "loss": 6.4066, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.16304985337243402, |
| "grad_norm": 7.429435729980469, |
| "learning_rate": 2.3348413563600325e-05, |
| "loss": 7.0302, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.16422287390029325, |
| "grad_norm": 7.769280910491943, |
| "learning_rate": 2.2652592093878666e-05, |
| "loss": 6.7873, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.1653958944281525, |
| "grad_norm": 7.310204982757568, |
| "learning_rate": 2.196424713241637e-05, |
| "loss": 7.0036, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.16656891495601173, |
| "grad_norm": 9.804479598999023, |
| "learning_rate": 2.128356686585282e-05, |
| "loss": 7.0607, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.16774193548387098, |
| "grad_norm": 8.672088623046875, |
| "learning_rate": 2.061073738537635e-05, |
| "loss": 7.1593, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.1689149560117302, |
| "grad_norm": 12.077096939086914, |
| "learning_rate": 1.9945942635848748e-05, |
| "loss": 7.5432, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.17008797653958943, |
| "grad_norm": 12.569242477416992, |
| "learning_rate": 1.928936436551661e-05, |
| "loss": 6.4717, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.17126099706744868, |
| "grad_norm": 13.25029468536377, |
| "learning_rate": 1.8641182076323148e-05, |
| "loss": 7.6049, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.1724340175953079, |
| "grad_norm": 15.713706016540527, |
| "learning_rate": 1.800157297483417e-05, |
| "loss": 7.0083, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.17360703812316716, |
| "grad_norm": 18.675628662109375, |
| "learning_rate": 1.7370711923791567e-05, |
| "loss": 7.4158, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.1747800586510264, |
| "grad_norm": 20.445755004882812, |
| "learning_rate": 1.6748771394307585e-05, |
| "loss": 8.0984, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.17595307917888564, |
| "grad_norm": 31.651071548461914, |
| "learning_rate": 1.6135921418712956e-05, |
| "loss": 9.0892, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.17595307917888564, |
| "eval_loss": 1.4641185998916626, |
| "eval_runtime": 54.969, |
| "eval_samples_per_second": 26.124, |
| "eval_steps_per_second": 6.531, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.17712609970674487, |
| "grad_norm": 3.545945882797241, |
| "learning_rate": 1.553232954407171e-05, |
| "loss": 6.3979, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.17829912023460412, |
| "grad_norm": 3.4330179691314697, |
| "learning_rate": 1.4938160786375572e-05, |
| "loss": 5.8609, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.17947214076246334, |
| "grad_norm": 3.8319311141967773, |
| "learning_rate": 1.435357758543015e-05, |
| "loss": 6.8796, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.18064516129032257, |
| "grad_norm": 4.39741849899292, |
| "learning_rate": 1.3778739760445552e-05, |
| "loss": 6.2018, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 4.3072357177734375, |
| "learning_rate": 1.3213804466343421e-05, |
| "loss": 6.1792, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.18299120234604105, |
| "grad_norm": 3.892882823944092, |
| "learning_rate": 1.2658926150792322e-05, |
| "loss": 6.3373, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.1841642228739003, |
| "grad_norm": 3.4460794925689697, |
| "learning_rate": 1.2114256511983274e-05, |
| "loss": 6.3294, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.18533724340175953, |
| "grad_norm": 3.7324860095977783, |
| "learning_rate": 1.157994445715706e-05, |
| "loss": 6.3531, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.18651026392961878, |
| "grad_norm": 3.388925552368164, |
| "learning_rate": 1.1056136061894384e-05, |
| "loss": 6.4866, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.187683284457478, |
| "grad_norm": 3.5137898921966553, |
| "learning_rate": 1.0542974530180327e-05, |
| "loss": 6.6119, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.18885630498533723, |
| "grad_norm": 3.3227107524871826, |
| "learning_rate": 1.0040600155253765e-05, |
| "loss": 6.3487, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.19002932551319648, |
| "grad_norm": 3.202991485595703, |
| "learning_rate": 9.549150281252633e-06, |
| "loss": 6.5303, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.1912023460410557, |
| "grad_norm": 4.062410831451416, |
| "learning_rate": 9.068759265665384e-06, |
| "loss": 6.5675, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.19237536656891496, |
| "grad_norm": 3.620666265487671, |
| "learning_rate": 8.599558442598998e-06, |
| "loss": 6.4053, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.1935483870967742, |
| "grad_norm": 3.3483715057373047, |
| "learning_rate": 8.141676086873572e-06, |
| "loss": 6.7736, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.19472140762463344, |
| "grad_norm": 3.302370309829712, |
| "learning_rate": 7.695237378953223e-06, |
| "loss": 6.3168, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.19589442815249267, |
| "grad_norm": 4.081686973571777, |
| "learning_rate": 7.260364370723044e-06, |
| "loss": 7.0721, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.19706744868035192, |
| "grad_norm": 3.7871413230895996, |
| "learning_rate": 6.837175952121306e-06, |
| "loss": 6.6491, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.19824046920821115, |
| "grad_norm": 3.5725629329681396, |
| "learning_rate": 6.425787818636131e-06, |
| "loss": 7.232, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.19941348973607037, |
| "grad_norm": 3.952382802963257, |
| "learning_rate": 6.026312439675552e-06, |
| "loss": 6.9474, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.20058651026392962, |
| "grad_norm": 3.558424711227417, |
| "learning_rate": 5.6388590278194096e-06, |
| "loss": 6.8185, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.20175953079178885, |
| "grad_norm": 4.020036697387695, |
| "learning_rate": 5.263533508961827e-06, |
| "loss": 6.9452, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.2029325513196481, |
| "grad_norm": 4.385979175567627, |
| "learning_rate": 4.900438493352055e-06, |
| "loss": 6.7301, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.20410557184750733, |
| "grad_norm": 4.704169750213623, |
| "learning_rate": 4.549673247541875e-06, |
| "loss": 6.6311, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.20527859237536658, |
| "grad_norm": 4.294439315795898, |
| "learning_rate": 4.2113336672471245e-06, |
| "loss": 7.0591, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.2064516129032258, |
| "grad_norm": 4.28928279876709, |
| "learning_rate": 3.885512251130763e-06, |
| "loss": 6.6714, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.20762463343108503, |
| "grad_norm": 4.021836757659912, |
| "learning_rate": 3.5722980755146517e-06, |
| "loss": 6.8467, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.20879765395894428, |
| "grad_norm": 4.013036727905273, |
| "learning_rate": 3.271776770026963e-06, |
| "loss": 6.4059, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.2099706744868035, |
| "grad_norm": 4.504580020904541, |
| "learning_rate": 2.9840304941919415e-06, |
| "loss": 6.9156, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.21114369501466276, |
| "grad_norm": 4.332256317138672, |
| "learning_rate": 2.7091379149682685e-06, |
| "loss": 6.9564, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.212316715542522, |
| "grad_norm": 4.6176347732543945, |
| "learning_rate": 2.4471741852423237e-06, |
| "loss": 6.9044, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.21348973607038124, |
| "grad_norm": 5.39663028717041, |
| "learning_rate": 2.1982109232821178e-06, |
| "loss": 7.2944, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.21466275659824047, |
| "grad_norm": 4.653443813323975, |
| "learning_rate": 1.962316193157593e-06, |
| "loss": 7.0987, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.21583577712609972, |
| "grad_norm": 6.479732513427734, |
| "learning_rate": 1.7395544861325718e-06, |
| "loss": 6.1803, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.21700879765395895, |
| "grad_norm": 6.245946884155273, |
| "learning_rate": 1.5299867030334814e-06, |
| "loss": 6.7305, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.21818181818181817, |
| "grad_norm": 6.659122943878174, |
| "learning_rate": 1.333670137599713e-06, |
| "loss": 7.5387, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.21935483870967742, |
| "grad_norm": 5.82586669921875, |
| "learning_rate": 1.1506584608200367e-06, |
| "loss": 6.5154, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.22052785923753665, |
| "grad_norm": 6.484030723571777, |
| "learning_rate": 9.810017062595322e-07, |
| "loss": 6.526, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.2217008797653959, |
| "grad_norm": 5.605183124542236, |
| "learning_rate": 8.247462563808817e-07, |
| "loss": 6.6999, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.22287390029325513, |
| "grad_norm": 6.196300983428955, |
| "learning_rate": 6.819348298638839e-07, |
| "loss": 6.4769, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.22404692082111438, |
| "grad_norm": 7.179093837738037, |
| "learning_rate": 5.526064699265753e-07, |
| "loss": 7.2343, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.2252199413489736, |
| "grad_norm": 7.460024356842041, |
| "learning_rate": 4.367965336512403e-07, |
| "loss": 7.5183, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.22639296187683283, |
| "grad_norm": 9.167801856994629, |
| "learning_rate": 3.3453668231809286e-07, |
| "loss": 6.9693, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.22756598240469209, |
| "grad_norm": 8.804646492004395, |
| "learning_rate": 2.458548727494292e-07, |
| "loss": 7.5381, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.2287390029325513, |
| "grad_norm": 9.841207504272461, |
| "learning_rate": 1.7077534966650766e-07, |
| "loss": 7.295, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.22991202346041056, |
| "grad_norm": 13.219451904296875, |
| "learning_rate": 1.0931863906127327e-07, |
| "loss": 7.4361, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.2310850439882698, |
| "grad_norm": 15.988734245300293, |
| "learning_rate": 6.150154258476315e-08, |
| "loss": 7.2263, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.23225806451612904, |
| "grad_norm": 15.204376220703125, |
| "learning_rate": 2.7337132953697554e-08, |
| "loss": 9.2538, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.23343108504398827, |
| "grad_norm": 22.735008239746094, |
| "learning_rate": 6.834750376549792e-09, |
| "loss": 9.2901, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.23460410557184752, |
| "grad_norm": 72.2196273803711, |
| "learning_rate": 0.0, |
| "loss": 10.607, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.23460410557184752, |
| "eval_loss": 1.4371888637542725, |
| "eval_runtime": 54.9673, |
| "eval_samples_per_second": 26.125, |
| "eval_steps_per_second": 6.531, |
| "step": 200 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 200, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.169711928705024e+17, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|