| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 0, |
| "global_step": 293, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0034129692832764505, |
| "grad_norm": 0.4375, |
| "learning_rate": 1e-05, |
| "loss": 2.098, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.006825938566552901, |
| "grad_norm": 0.419921875, |
| "learning_rate": 9.965870307167235e-06, |
| "loss": 1.9904, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.010238907849829351, |
| "grad_norm": 0.39453125, |
| "learning_rate": 9.931740614334472e-06, |
| "loss": 1.9246, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.013651877133105802, |
| "grad_norm": 0.40625, |
| "learning_rate": 9.897610921501706e-06, |
| "loss": 1.9454, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.017064846416382253, |
| "grad_norm": 0.41796875, |
| "learning_rate": 9.863481228668942e-06, |
| "loss": 2.0677, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.020477815699658702, |
| "grad_norm": 0.380859375, |
| "learning_rate": 9.829351535836179e-06, |
| "loss": 1.9607, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.023890784982935155, |
| "grad_norm": 0.37109375, |
| "learning_rate": 9.795221843003415e-06, |
| "loss": 1.8974, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.027303754266211604, |
| "grad_norm": 0.361328125, |
| "learning_rate": 9.76109215017065e-06, |
| "loss": 1.9186, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.030716723549488054, |
| "grad_norm": 0.384765625, |
| "learning_rate": 9.726962457337886e-06, |
| "loss": 2.0028, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.034129692832764506, |
| "grad_norm": 0.369140625, |
| "learning_rate": 9.69283276450512e-06, |
| "loss": 1.9483, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03754266211604096, |
| "grad_norm": 0.376953125, |
| "learning_rate": 9.658703071672356e-06, |
| "loss": 2.0182, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.040955631399317405, |
| "grad_norm": 0.36328125, |
| "learning_rate": 9.62457337883959e-06, |
| "loss": 1.9185, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.04436860068259386, |
| "grad_norm": 0.359375, |
| "learning_rate": 9.590443686006825e-06, |
| "loss": 1.9198, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.04778156996587031, |
| "grad_norm": 0.34765625, |
| "learning_rate": 9.556313993174062e-06, |
| "loss": 1.7946, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.051194539249146756, |
| "grad_norm": 0.34765625, |
| "learning_rate": 9.522184300341298e-06, |
| "loss": 1.8384, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05460750853242321, |
| "grad_norm": 0.318359375, |
| "learning_rate": 9.488054607508534e-06, |
| "loss": 1.7561, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.05802047781569966, |
| "grad_norm": 0.302734375, |
| "learning_rate": 9.453924914675769e-06, |
| "loss": 1.8221, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.06143344709897611, |
| "grad_norm": 0.306640625, |
| "learning_rate": 9.419795221843005e-06, |
| "loss": 1.8518, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.06484641638225255, |
| "grad_norm": 0.291015625, |
| "learning_rate": 9.38566552901024e-06, |
| "loss": 1.7368, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.06825938566552901, |
| "grad_norm": 0.283203125, |
| "learning_rate": 9.351535836177476e-06, |
| "loss": 1.7257, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07167235494880546, |
| "grad_norm": 0.28515625, |
| "learning_rate": 9.31740614334471e-06, |
| "loss": 1.6868, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.07508532423208192, |
| "grad_norm": 0.283203125, |
| "learning_rate": 9.283276450511946e-06, |
| "loss": 1.7257, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.07849829351535836, |
| "grad_norm": 0.314453125, |
| "learning_rate": 9.249146757679181e-06, |
| "loss": 1.6474, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.08191126279863481, |
| "grad_norm": 0.275390625, |
| "learning_rate": 9.215017064846417e-06, |
| "loss": 1.7366, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.08532423208191127, |
| "grad_norm": 0.291015625, |
| "learning_rate": 9.180887372013653e-06, |
| "loss": 1.7124, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08873720136518772, |
| "grad_norm": 0.251953125, |
| "learning_rate": 9.146757679180888e-06, |
| "loss": 1.6561, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.09215017064846416, |
| "grad_norm": 0.275390625, |
| "learning_rate": 9.112627986348124e-06, |
| "loss": 1.7343, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.09556313993174062, |
| "grad_norm": 0.265625, |
| "learning_rate": 9.078498293515359e-06, |
| "loss": 1.6355, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.09897610921501707, |
| "grad_norm": 0.244140625, |
| "learning_rate": 9.044368600682595e-06, |
| "loss": 1.6064, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.10238907849829351, |
| "grad_norm": 0.265625, |
| "learning_rate": 9.01023890784983e-06, |
| "loss": 1.6406, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10580204778156997, |
| "grad_norm": 0.23828125, |
| "learning_rate": 8.976109215017066e-06, |
| "loss": 1.6211, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.10921501706484642, |
| "grad_norm": 0.2412109375, |
| "learning_rate": 8.9419795221843e-06, |
| "loss": 1.6092, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.11262798634812286, |
| "grad_norm": 0.24609375, |
| "learning_rate": 8.907849829351536e-06, |
| "loss": 1.6817, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.11604095563139932, |
| "grad_norm": 0.2392578125, |
| "learning_rate": 8.873720136518773e-06, |
| "loss": 1.5899, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.11945392491467577, |
| "grad_norm": 0.2353515625, |
| "learning_rate": 8.839590443686009e-06, |
| "loss": 1.5608, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.12286689419795221, |
| "grad_norm": 0.232421875, |
| "learning_rate": 8.805460750853243e-06, |
| "loss": 1.5448, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.12627986348122866, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 8.771331058020478e-06, |
| "loss": 1.4979, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.1296928327645051, |
| "grad_norm": 0.220703125, |
| "learning_rate": 8.737201365187714e-06, |
| "loss": 1.5345, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.13310580204778158, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 8.703071672354949e-06, |
| "loss": 1.5387, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.13651877133105803, |
| "grad_norm": 0.302734375, |
| "learning_rate": 8.668941979522185e-06, |
| "loss": 1.4665, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.13993174061433447, |
| "grad_norm": 0.21875, |
| "learning_rate": 8.63481228668942e-06, |
| "loss": 1.5719, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.14334470989761092, |
| "grad_norm": 0.203125, |
| "learning_rate": 8.600682593856656e-06, |
| "loss": 1.5123, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.14675767918088736, |
| "grad_norm": 0.2294921875, |
| "learning_rate": 8.566552901023892e-06, |
| "loss": 1.3894, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.15017064846416384, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 8.532423208191128e-06, |
| "loss": 1.4781, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.15358361774744028, |
| "grad_norm": 0.2578125, |
| "learning_rate": 8.498293515358363e-06, |
| "loss": 1.5839, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.15699658703071673, |
| "grad_norm": 0.23046875, |
| "learning_rate": 8.464163822525599e-06, |
| "loss": 1.4626, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.16040955631399317, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 8.430034129692833e-06, |
| "loss": 1.4703, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.16382252559726962, |
| "grad_norm": 0.2451171875, |
| "learning_rate": 8.395904436860068e-06, |
| "loss": 1.485, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.16723549488054607, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 8.361774744027304e-06, |
| "loss": 1.4331, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.17064846416382254, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 8.327645051194539e-06, |
| "loss": 1.4434, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.17406143344709898, |
| "grad_norm": 0.205078125, |
| "learning_rate": 8.293515358361775e-06, |
| "loss": 1.488, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.17747440273037543, |
| "grad_norm": 0.18359375, |
| "learning_rate": 8.259385665529011e-06, |
| "loss": 1.4482, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.18088737201365188, |
| "grad_norm": 0.197265625, |
| "learning_rate": 8.225255972696247e-06, |
| "loss": 1.5022, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.18430034129692832, |
| "grad_norm": 0.181640625, |
| "learning_rate": 8.191126279863482e-06, |
| "loss": 1.4294, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.18771331058020477, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 8.156996587030718e-06, |
| "loss": 1.4651, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.19112627986348124, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 8.122866894197953e-06, |
| "loss": 1.417, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.1945392491467577, |
| "grad_norm": 0.193359375, |
| "learning_rate": 8.088737201365189e-06, |
| "loss": 1.4324, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.19795221843003413, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 8.054607508532423e-06, |
| "loss": 1.4433, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.20136518771331058, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 8.02047781569966e-06, |
| "loss": 1.3302, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.20477815699658702, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 7.986348122866894e-06, |
| "loss": 1.4283, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.20819112627986347, |
| "grad_norm": 0.1796875, |
| "learning_rate": 7.95221843003413e-06, |
| "loss": 1.3951, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.21160409556313994, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 7.918088737201367e-06, |
| "loss": 1.4534, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.2150170648464164, |
| "grad_norm": 0.189453125, |
| "learning_rate": 7.883959044368601e-06, |
| "loss": 1.4199, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.21843003412969283, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 7.849829351535837e-06, |
| "loss": 1.3395, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.22184300341296928, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 7.815699658703072e-06, |
| "loss": 1.4051, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.22525597269624573, |
| "grad_norm": 0.224609375, |
| "learning_rate": 7.781569965870308e-06, |
| "loss": 1.3764, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.22866894197952217, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 7.747440273037543e-06, |
| "loss": 1.373, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.23208191126279865, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 7.713310580204779e-06, |
| "loss": 1.3301, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.2354948805460751, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 7.679180887372013e-06, |
| "loss": 1.3786, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.23890784982935154, |
| "grad_norm": 0.16796875, |
| "learning_rate": 7.64505119453925e-06, |
| "loss": 1.3802, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.24232081911262798, |
| "grad_norm": 0.322265625, |
| "learning_rate": 7.610921501706485e-06, |
| "loss": 1.3011, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.24573378839590443, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 7.57679180887372e-06, |
| "loss": 1.3053, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.24914675767918087, |
| "grad_norm": 0.16796875, |
| "learning_rate": 7.542662116040957e-06, |
| "loss": 1.3408, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.2525597269624573, |
| "grad_norm": 0.26171875, |
| "learning_rate": 7.508532423208191e-06, |
| "loss": 1.313, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.25597269624573377, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 7.474402730375427e-06, |
| "loss": 1.3259, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2593856655290102, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 7.440273037542663e-06, |
| "loss": 1.3397, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.2627986348122867, |
| "grad_norm": 0.162109375, |
| "learning_rate": 7.406143344709898e-06, |
| "loss": 1.315, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.26621160409556316, |
| "grad_norm": 0.279296875, |
| "learning_rate": 7.3720136518771335e-06, |
| "loss": 1.3648, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2696245733788396, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 7.33788395904437e-06, |
| "loss": 1.2991, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.27303754266211605, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 7.303754266211604e-06, |
| "loss": 1.2913, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2764505119453925, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 7.2696245733788405e-06, |
| "loss": 1.3413, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.27986348122866894, |
| "grad_norm": 0.1640625, |
| "learning_rate": 7.235494880546076e-06, |
| "loss": 1.3348, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.2832764505119454, |
| "grad_norm": 0.255859375, |
| "learning_rate": 7.201365187713312e-06, |
| "loss": 1.3224, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.28668941979522183, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 7.167235494880547e-06, |
| "loss": 1.2809, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.2901023890784983, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 7.133105802047782e-06, |
| "loss": 1.2331, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.2935153583617747, |
| "grad_norm": 0.1796875, |
| "learning_rate": 7.098976109215017e-06, |
| "loss": 1.3094, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.29692832764505117, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 7.064846416382253e-06, |
| "loss": 1.2913, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.3003412969283277, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 7.030716723549489e-06, |
| "loss": 1.291, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.3037542662116041, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 6.9965870307167235e-06, |
| "loss": 1.2663, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.30716723549488056, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 6.96245733788396e-06, |
| "loss": 1.2546, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.310580204778157, |
| "grad_norm": 0.310546875, |
| "learning_rate": 6.928327645051195e-06, |
| "loss": 1.3238, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.31399317406143346, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 6.894197952218431e-06, |
| "loss": 1.2953, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.3174061433447099, |
| "grad_norm": 0.2392578125, |
| "learning_rate": 6.860068259385666e-06, |
| "loss": 1.3402, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.32081911262798635, |
| "grad_norm": 0.1640625, |
| "learning_rate": 6.825938566552902e-06, |
| "loss": 1.3209, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.3242320819112628, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 6.7918088737201375e-06, |
| "loss": 1.2684, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.32764505119453924, |
| "grad_norm": 0.16015625, |
| "learning_rate": 6.757679180887372e-06, |
| "loss": 1.2606, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.3310580204778157, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 6.723549488054608e-06, |
| "loss": 1.2737, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.33447098976109213, |
| "grad_norm": 0.16796875, |
| "learning_rate": 6.689419795221843e-06, |
| "loss": 1.3183, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.3378839590443686, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 6.655290102389079e-06, |
| "loss": 1.283, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.3412969283276451, |
| "grad_norm": 0.185546875, |
| "learning_rate": 6.621160409556314e-06, |
| "loss": 1.3525, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3447098976109215, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 6.587030716723551e-06, |
| "loss": 1.2137, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.34812286689419797, |
| "grad_norm": 0.18359375, |
| "learning_rate": 6.552901023890785e-06, |
| "loss": 1.3545, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.3515358361774744, |
| "grad_norm": 0.169921875, |
| "learning_rate": 6.518771331058021e-06, |
| "loss": 1.2902, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.35494880546075086, |
| "grad_norm": 0.173828125, |
| "learning_rate": 6.484641638225257e-06, |
| "loss": 1.3484, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.3583617747440273, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 6.450511945392492e-06, |
| "loss": 1.2494, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.36177474402730375, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 6.4163822525597275e-06, |
| "loss": 1.2387, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.3651877133105802, |
| "grad_norm": 0.2392578125, |
| "learning_rate": 6.382252559726962e-06, |
| "loss": 1.3741, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.36860068259385664, |
| "grad_norm": 0.162109375, |
| "learning_rate": 6.348122866894198e-06, |
| "loss": 1.2862, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3720136518771331, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 6.313993174061434e-06, |
| "loss": 1.2937, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.37542662116040953, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 6.27986348122867e-06, |
| "loss": 1.2434, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.378839590443686, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 6.245733788395904e-06, |
| "loss": 1.3098, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.3822525597269625, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 6.211604095563141e-06, |
| "loss": 1.3057, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.3856655290102389, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 6.177474402730376e-06, |
| "loss": 1.2747, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.3890784982935154, |
| "grad_norm": 0.22265625, |
| "learning_rate": 6.143344709897611e-06, |
| "loss": 1.2129, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.3924914675767918, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 6.109215017064847e-06, |
| "loss": 1.2678, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.39590443686006827, |
| "grad_norm": 0.244140625, |
| "learning_rate": 6.075085324232083e-06, |
| "loss": 1.2789, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.3993174061433447, |
| "grad_norm": 0.1484375, |
| "learning_rate": 6.0409556313993175e-06, |
| "loss": 1.2183, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.40273037542662116, |
| "grad_norm": 0.2109375, |
| "learning_rate": 6.006825938566554e-06, |
| "loss": 1.2227, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.4061433447098976, |
| "grad_norm": 0.177734375, |
| "learning_rate": 5.972696245733789e-06, |
| "loss": 1.2714, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.40955631399317405, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 5.938566552901024e-06, |
| "loss": 1.2793, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4129692832764505, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 5.90443686006826e-06, |
| "loss": 1.1814, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.41638225255972694, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 5.870307167235495e-06, |
| "loss": 1.2612, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.4197952218430034, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 5.8361774744027315e-06, |
| "loss": 1.1921, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.4232081911262799, |
| "grad_norm": 0.158203125, |
| "learning_rate": 5.802047781569966e-06, |
| "loss": 1.2512, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.42662116040955633, |
| "grad_norm": 0.16796875, |
| "learning_rate": 5.767918088737202e-06, |
| "loss": 1.2593, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.4300341296928328, |
| "grad_norm": 0.220703125, |
| "learning_rate": 5.733788395904437e-06, |
| "loss": 1.2941, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.4334470989761092, |
| "grad_norm": 0.181640625, |
| "learning_rate": 5.699658703071673e-06, |
| "loss": 1.2128, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.43686006825938567, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 5.665529010238908e-06, |
| "loss": 1.2025, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.4402730375426621, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 5.631399317406145e-06, |
| "loss": 1.2257, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.44368600682593856, |
| "grad_norm": 0.16015625, |
| "learning_rate": 5.597269624573379e-06, |
| "loss": 1.2245, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.447098976109215, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 5.5631399317406145e-06, |
| "loss": 1.1971, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.45051194539249145, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 5.529010238907851e-06, |
| "loss": 1.2063, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.4539249146757679, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 5.494880546075085e-06, |
| "loss": 1.2045, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.45733788395904434, |
| "grad_norm": 0.1875, |
| "learning_rate": 5.4607508532423215e-06, |
| "loss": 1.3064, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.46075085324232085, |
| "grad_norm": 0.18359375, |
| "learning_rate": 5.426621160409556e-06, |
| "loss": 1.1963, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4641638225255973, |
| "grad_norm": 0.1640625, |
| "learning_rate": 5.392491467576792e-06, |
| "loss": 1.2109, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.46757679180887374, |
| "grad_norm": 0.16796875, |
| "learning_rate": 5.358361774744028e-06, |
| "loss": 1.2651, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.4709897610921502, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 5.324232081911264e-06, |
| "loss": 1.1986, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.47440273037542663, |
| "grad_norm": 0.158203125, |
| "learning_rate": 5.290102389078498e-06, |
| "loss": 1.261, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.4778156996587031, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 5.255972696245735e-06, |
| "loss": 1.1949, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4812286689419795, |
| "grad_norm": 0.17578125, |
| "learning_rate": 5.22184300341297e-06, |
| "loss": 1.1967, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.48464163822525597, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 5.1877133105802046e-06, |
| "loss": 1.2054, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.4880546075085324, |
| "grad_norm": 0.212890625, |
| "learning_rate": 5.153583617747441e-06, |
| "loss": 1.2582, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.49146757679180886, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 5.119453924914676e-06, |
| "loss": 1.2465, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4948805460750853, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 5.0853242320819115e-06, |
| "loss": 1.1709, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.49829351535836175, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 5.051194539249147e-06, |
| "loss": 1.2741, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.5017064846416383, |
| "grad_norm": 0.173828125, |
| "learning_rate": 5.017064846416383e-06, |
| "loss": 1.2093, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.5051194539249146, |
| "grad_norm": 0.162109375, |
| "learning_rate": 4.982935153583618e-06, |
| "loss": 1.2348, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.5085324232081911, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 4.948805460750853e-06, |
| "loss": 1.2276, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.5119453924914675, |
| "grad_norm": 0.158203125, |
| "learning_rate": 4.914675767918089e-06, |
| "loss": 1.2371, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.515358361774744, |
| "grad_norm": 0.1640625, |
| "learning_rate": 4.880546075085325e-06, |
| "loss": 1.2013, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.5187713310580204, |
| "grad_norm": 0.171875, |
| "learning_rate": 4.84641638225256e-06, |
| "loss": 1.1444, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.5221843003412969, |
| "grad_norm": 0.193359375, |
| "learning_rate": 4.812286689419795e-06, |
| "loss": 1.2329, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.5255972696245734, |
| "grad_norm": 0.158203125, |
| "learning_rate": 4.778156996587031e-06, |
| "loss": 1.2572, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.5290102389078498, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 4.744027303754267e-06, |
| "loss": 1.2378, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.5324232081911263, |
| "grad_norm": 0.349609375, |
| "learning_rate": 4.709897610921502e-06, |
| "loss": 1.192, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.5358361774744027, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 4.675767918088738e-06, |
| "loss": 1.2053, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.5392491467576792, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 4.641638225255973e-06, |
| "loss": 1.2316, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.5426621160409556, |
| "grad_norm": 0.16796875, |
| "learning_rate": 4.6075085324232085e-06, |
| "loss": 1.2648, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.5460750853242321, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 4.573378839590444e-06, |
| "loss": 1.1857, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5494880546075085, |
| "grad_norm": 0.15625, |
| "learning_rate": 4.539249146757679e-06, |
| "loss": 1.2094, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.552901023890785, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 4.505119453924915e-06, |
| "loss": 1.2234, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.5563139931740614, |
| "grad_norm": 0.16015625, |
| "learning_rate": 4.47098976109215e-06, |
| "loss": 1.209, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.5597269624573379, |
| "grad_norm": 0.185546875, |
| "learning_rate": 4.436860068259386e-06, |
| "loss": 1.234, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.5631399317406144, |
| "grad_norm": 0.23828125, |
| "learning_rate": 4.402730375426622e-06, |
| "loss": 1.1981, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5665529010238908, |
| "grad_norm": 0.3125, |
| "learning_rate": 4.368600682593857e-06, |
| "loss": 1.2415, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.5699658703071673, |
| "grad_norm": 0.1640625, |
| "learning_rate": 4.3344709897610924e-06, |
| "loss": 1.2153, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.5733788395904437, |
| "grad_norm": 0.197265625, |
| "learning_rate": 4.300341296928328e-06, |
| "loss": 1.2421, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.5767918088737202, |
| "grad_norm": 0.359375, |
| "learning_rate": 4.266211604095564e-06, |
| "loss": 1.2309, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.5802047781569966, |
| "grad_norm": 0.158203125, |
| "learning_rate": 4.232081911262799e-06, |
| "loss": 1.178, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5836177474402731, |
| "grad_norm": 0.228515625, |
| "learning_rate": 4.197952218430034e-06, |
| "loss": 1.2564, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.5870307167235495, |
| "grad_norm": 0.17578125, |
| "learning_rate": 4.163822525597269e-06, |
| "loss": 1.2054, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.590443686006826, |
| "grad_norm": 0.16015625, |
| "learning_rate": 4.1296928327645055e-06, |
| "loss": 1.1599, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.5938566552901023, |
| "grad_norm": 0.2109375, |
| "learning_rate": 4.095563139931741e-06, |
| "loss": 1.25, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.5972696245733788, |
| "grad_norm": 0.224609375, |
| "learning_rate": 4.061433447098976e-06, |
| "loss": 1.2541, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.6006825938566553, |
| "grad_norm": 0.1640625, |
| "learning_rate": 4.027303754266212e-06, |
| "loss": 1.1981, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.6040955631399317, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 3.993174061433447e-06, |
| "loss": 1.2048, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.6075085324232082, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 3.959044368600683e-06, |
| "loss": 1.2193, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.6109215017064846, |
| "grad_norm": 0.302734375, |
| "learning_rate": 3.924914675767919e-06, |
| "loss": 1.2185, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.6143344709897611, |
| "grad_norm": 0.31640625, |
| "learning_rate": 3.890784982935154e-06, |
| "loss": 1.2896, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6177474402730375, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 3.8566552901023894e-06, |
| "loss": 1.1668, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.621160409556314, |
| "grad_norm": 0.158203125, |
| "learning_rate": 3.822525597269625e-06, |
| "loss": 1.2119, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.6245733788395904, |
| "grad_norm": 0.19921875, |
| "learning_rate": 3.78839590443686e-06, |
| "loss": 1.2454, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.6279863481228669, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 3.7542662116040956e-06, |
| "loss": 1.2037, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.6313993174061433, |
| "grad_norm": 0.212890625, |
| "learning_rate": 3.7201365187713314e-06, |
| "loss": 1.1692, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.6348122866894198, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 3.6860068259385667e-06, |
| "loss": 1.2523, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.6382252559726962, |
| "grad_norm": 0.16015625, |
| "learning_rate": 3.651877133105802e-06, |
| "loss": 1.2013, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.6416382252559727, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 3.617747440273038e-06, |
| "loss": 1.1724, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.6450511945392492, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 3.5836177474402733e-06, |
| "loss": 1.192, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.6484641638225256, |
| "grad_norm": 0.18359375, |
| "learning_rate": 3.5494880546075087e-06, |
| "loss": 1.1876, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6518771331058021, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 3.5153583617747445e-06, |
| "loss": 1.2077, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.6552901023890785, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 3.48122866894198e-06, |
| "loss": 1.1763, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.658703071672355, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 3.4470989761092157e-06, |
| "loss": 1.1838, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.6621160409556314, |
| "grad_norm": 0.25390625, |
| "learning_rate": 3.412969283276451e-06, |
| "loss": 1.1777, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.6655290102389079, |
| "grad_norm": 0.251953125, |
| "learning_rate": 3.378839590443686e-06, |
| "loss": 1.2041, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6689419795221843, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 3.3447098976109214e-06, |
| "loss": 1.2381, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.6723549488054608, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 3.310580204778157e-06, |
| "loss": 1.1874, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.6757679180887372, |
| "grad_norm": 0.20703125, |
| "learning_rate": 3.2764505119453926e-06, |
| "loss": 1.1845, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.6791808873720137, |
| "grad_norm": 0.15625, |
| "learning_rate": 3.2423208191126284e-06, |
| "loss": 1.181, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.6825938566552902, |
| "grad_norm": 0.185546875, |
| "learning_rate": 3.2081911262798638e-06, |
| "loss": 1.2325, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6860068259385665, |
| "grad_norm": 0.169921875, |
| "learning_rate": 3.174061433447099e-06, |
| "loss": 1.1921, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.689419795221843, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 3.139931740614335e-06, |
| "loss": 1.1966, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.6928327645051194, |
| "grad_norm": 0.166015625, |
| "learning_rate": 3.1058020477815703e-06, |
| "loss": 1.1414, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.6962457337883959, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 3.0716723549488057e-06, |
| "loss": 1.1206, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.6996587030716723, |
| "grad_norm": 0.189453125, |
| "learning_rate": 3.0375426621160415e-06, |
| "loss": 1.147, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.7030716723549488, |
| "grad_norm": 0.216796875, |
| "learning_rate": 3.003412969283277e-06, |
| "loss": 1.1238, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.7064846416382252, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 2.969283276450512e-06, |
| "loss": 1.2101, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.7098976109215017, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 2.9351535836177476e-06, |
| "loss": 1.1966, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.7133105802047781, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 2.901023890784983e-06, |
| "loss": 1.1808, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.7167235494880546, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 2.8668941979522184e-06, |
| "loss": 1.2115, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7201365187713311, |
| "grad_norm": 0.162109375, |
| "learning_rate": 2.832764505119454e-06, |
| "loss": 1.2059, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.7235494880546075, |
| "grad_norm": 0.26953125, |
| "learning_rate": 2.7986348122866896e-06, |
| "loss": 1.1821, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.726962457337884, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 2.7645051194539254e-06, |
| "loss": 1.2195, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.7303754266211604, |
| "grad_norm": 0.158203125, |
| "learning_rate": 2.7303754266211608e-06, |
| "loss": 1.1515, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.7337883959044369, |
| "grad_norm": 0.166015625, |
| "learning_rate": 2.696245733788396e-06, |
| "loss": 1.2382, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.7372013651877133, |
| "grad_norm": 0.162109375, |
| "learning_rate": 2.662116040955632e-06, |
| "loss": 1.1684, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.7406143344709898, |
| "grad_norm": 0.173828125, |
| "learning_rate": 2.6279863481228673e-06, |
| "loss": 1.1336, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.7440273037542662, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 2.5938566552901023e-06, |
| "loss": 1.1603, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.7474402730375427, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 2.559726962457338e-06, |
| "loss": 1.1898, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.7508532423208191, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 2.5255972696245735e-06, |
| "loss": 1.2161, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7542662116040956, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 2.491467576791809e-06, |
| "loss": 1.1927, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.757679180887372, |
| "grad_norm": 0.154296875, |
| "learning_rate": 2.4573378839590446e-06, |
| "loss": 1.147, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.7610921501706485, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 2.42320819112628e-06, |
| "loss": 1.1513, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.764505119453925, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 2.3890784982935154e-06, |
| "loss": 1.1733, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.7679180887372014, |
| "grad_norm": 0.291015625, |
| "learning_rate": 2.354948805460751e-06, |
| "loss": 1.1957, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.7713310580204779, |
| "grad_norm": 0.169921875, |
| "learning_rate": 2.3208191126279866e-06, |
| "loss": 1.1909, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.7747440273037542, |
| "grad_norm": 0.162109375, |
| "learning_rate": 2.286689419795222e-06, |
| "loss": 1.1856, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.7781569965870307, |
| "grad_norm": 0.17578125, |
| "learning_rate": 2.2525597269624573e-06, |
| "loss": 1.146, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.7815699658703071, |
| "grad_norm": 0.189453125, |
| "learning_rate": 2.218430034129693e-06, |
| "loss": 1.1223, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.7849829351535836, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 2.1843003412969285e-06, |
| "loss": 1.195, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.78839590443686, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 2.150170648464164e-06, |
| "loss": 1.1762, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.7918088737201365, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 2.1160409556313997e-06, |
| "loss": 1.2074, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.7952218430034129, |
| "grad_norm": 0.166015625, |
| "learning_rate": 2.0819112627986347e-06, |
| "loss": 1.2056, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.7986348122866894, |
| "grad_norm": 0.22265625, |
| "learning_rate": 2.0477815699658705e-06, |
| "loss": 1.1656, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.8020477815699659, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 2.013651877133106e-06, |
| "loss": 1.2169, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.8054607508532423, |
| "grad_norm": 0.16796875, |
| "learning_rate": 1.9795221843003416e-06, |
| "loss": 1.1713, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.8088737201365188, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 1.945392491467577e-06, |
| "loss": 1.187, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.8122866894197952, |
| "grad_norm": 0.248046875, |
| "learning_rate": 1.9112627986348124e-06, |
| "loss": 1.2144, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.8156996587030717, |
| "grad_norm": 0.177734375, |
| "learning_rate": 1.8771331058020478e-06, |
| "loss": 1.2708, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.8191126279863481, |
| "grad_norm": 0.169921875, |
| "learning_rate": 1.8430034129692834e-06, |
| "loss": 1.2218, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8225255972696246, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 1.808873720136519e-06, |
| "loss": 1.1734, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.825938566552901, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 1.7747440273037543e-06, |
| "loss": 1.2534, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.8293515358361775, |
| "grad_norm": 0.2294921875, |
| "learning_rate": 1.74061433447099e-06, |
| "loss": 1.1887, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.8327645051194539, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 1.7064846416382255e-06, |
| "loss": 1.1736, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.8361774744027304, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 1.6723549488054607e-06, |
| "loss": 1.1802, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.8395904436860068, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 1.6382252559726963e-06, |
| "loss": 1.1557, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.8430034129692833, |
| "grad_norm": 0.1875, |
| "learning_rate": 1.6040955631399319e-06, |
| "loss": 1.162, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.8464163822525598, |
| "grad_norm": 0.208984375, |
| "learning_rate": 1.5699658703071675e-06, |
| "loss": 1.1774, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.8498293515358362, |
| "grad_norm": 0.27734375, |
| "learning_rate": 1.5358361774744028e-06, |
| "loss": 1.1862, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.8532423208191127, |
| "grad_norm": 0.169921875, |
| "learning_rate": 1.5017064846416384e-06, |
| "loss": 1.183, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.856655290102389, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 1.4675767918088738e-06, |
| "loss": 1.1744, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.8600682593856656, |
| "grad_norm": 0.162109375, |
| "learning_rate": 1.4334470989761092e-06, |
| "loss": 1.1734, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.863481228668942, |
| "grad_norm": 0.193359375, |
| "learning_rate": 1.3993174061433448e-06, |
| "loss": 1.1626, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.8668941979522184, |
| "grad_norm": 0.267578125, |
| "learning_rate": 1.3651877133105804e-06, |
| "loss": 1.2057, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.8703071672354948, |
| "grad_norm": 0.208984375, |
| "learning_rate": 1.331058020477816e-06, |
| "loss": 1.1584, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.8737201365187713, |
| "grad_norm": 0.275390625, |
| "learning_rate": 1.2969283276450511e-06, |
| "loss": 1.142, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.8771331058020477, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 1.2627986348122867e-06, |
| "loss": 1.1179, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.8805460750853242, |
| "grad_norm": 0.177734375, |
| "learning_rate": 1.2286689419795223e-06, |
| "loss": 1.1862, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.8839590443686007, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 1.1945392491467577e-06, |
| "loss": 1.1537, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.8873720136518771, |
| "grad_norm": 0.166015625, |
| "learning_rate": 1.1604095563139933e-06, |
| "loss": 1.1028, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8907849829351536, |
| "grad_norm": 0.251953125, |
| "learning_rate": 1.1262798634812287e-06, |
| "loss": 1.1959, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.89419795221843, |
| "grad_norm": 0.15625, |
| "learning_rate": 1.0921501706484643e-06, |
| "loss": 1.197, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.8976109215017065, |
| "grad_norm": 0.16796875, |
| "learning_rate": 1.0580204778156999e-06, |
| "loss": 1.1256, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.9010238907849829, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 1.0238907849829352e-06, |
| "loss": 1.2366, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.9044368600682594, |
| "grad_norm": 0.201171875, |
| "learning_rate": 9.897610921501708e-07, |
| "loss": 1.192, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.9078498293515358, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 9.556313993174062e-07, |
| "loss": 1.1641, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.9112627986348123, |
| "grad_norm": 0.18359375, |
| "learning_rate": 9.215017064846417e-07, |
| "loss": 1.2233, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.9146757679180887, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 8.873720136518772e-07, |
| "loss": 1.1396, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.9180887372013652, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 8.532423208191128e-07, |
| "loss": 1.2493, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.9215017064846417, |
| "grad_norm": 0.2236328125, |
| "learning_rate": 8.191126279863481e-07, |
| "loss": 1.1951, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9249146757679181, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 7.849829351535837e-07, |
| "loss": 1.1482, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.9283276450511946, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 7.508532423208192e-07, |
| "loss": 1.1803, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.931740614334471, |
| "grad_norm": 0.259765625, |
| "learning_rate": 7.167235494880546e-07, |
| "loss": 1.1707, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.9351535836177475, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 6.825938566552902e-07, |
| "loss": 1.1746, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.9385665529010239, |
| "grad_norm": 0.162109375, |
| "learning_rate": 6.484641638225256e-07, |
| "loss": 1.2075, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.9419795221843004, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 6.143344709897612e-07, |
| "loss": 1.1349, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.9453924914675768, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 5.802047781569966e-07, |
| "loss": 1.1804, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.9488054607508533, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 5.460750853242321e-07, |
| "loss": 1.1798, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.9522184300341296, |
| "grad_norm": 0.162109375, |
| "learning_rate": 5.119453924914676e-07, |
| "loss": 1.1949, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.9556313993174061, |
| "grad_norm": 0.23046875, |
| "learning_rate": 4.778156996587031e-07, |
| "loss": 1.2302, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9590443686006825, |
| "grad_norm": 0.1640625, |
| "learning_rate": 4.436860068259386e-07, |
| "loss": 1.1692, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.962457337883959, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 4.0955631399317407e-07, |
| "loss": 1.2129, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.9658703071672355, |
| "grad_norm": 0.326171875, |
| "learning_rate": 3.754266211604096e-07, |
| "loss": 1.1722, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.9692832764505119, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 3.412969283276451e-07, |
| "loss": 1.1918, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.9726962457337884, |
| "grad_norm": 0.244140625, |
| "learning_rate": 3.071672354948806e-07, |
| "loss": 1.2091, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.9761092150170648, |
| "grad_norm": 0.228515625, |
| "learning_rate": 2.7303754266211607e-07, |
| "loss": 1.1193, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.9795221843003413, |
| "grad_norm": 0.17578125, |
| "learning_rate": 2.3890784982935155e-07, |
| "loss": 1.1617, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.9829351535836177, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 2.0477815699658704e-07, |
| "loss": 1.1884, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.9863481228668942, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 1.7064846416382255e-07, |
| "loss": 1.1583, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.9897610921501706, |
| "grad_norm": 0.16796875, |
| "learning_rate": 1.3651877133105803e-07, |
| "loss": 1.1518, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9931740614334471, |
| "grad_norm": 0.162109375, |
| "learning_rate": 1.0238907849829352e-07, |
| "loss": 1.1679, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.9965870307167235, |
| "grad_norm": 0.193359375, |
| "learning_rate": 6.825938566552902e-08, |
| "loss": 1.1069, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 3.412969283276451e-08, |
| "loss": 1.1428, |
| "step": 293 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 293, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.379910643320095e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|