| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 116, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.017241379310344827, |
| "grad_norm": 16.797964096069336, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 3.4811, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.034482758620689655, |
| "grad_norm": 16.552379608154297, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 3.4256, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.05172413793103448, |
| "grad_norm": 16.94522476196289, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 3.4624, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.06896551724137931, |
| "grad_norm": 16.51656723022461, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 3.4428, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.08620689655172414, |
| "grad_norm": 16.90593910217285, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 3.4053, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.10344827586206896, |
| "grad_norm": 16.83192253112793, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 3.506, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.1206896551724138, |
| "grad_norm": 16.592622756958008, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 3.4454, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.13793103448275862, |
| "grad_norm": 16.566585540771484, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 3.4268, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.15517241379310345, |
| "grad_norm": 16.46024513244629, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 3.4186, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.1724137931034483, |
| "grad_norm": 16.710294723510742, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 3.4258, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1896551724137931, |
| "grad_norm": 16.718793869018555, |
| "learning_rate": 5.5e-07, |
| "loss": 3.3919, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.20689655172413793, |
| "grad_norm": 15.603546142578125, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 3.3223, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.22413793103448276, |
| "grad_norm": 16.184322357177734, |
| "learning_rate": 6.5e-07, |
| "loss": 3.3516, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.2413793103448276, |
| "grad_norm": 15.28188419342041, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 3.2181, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.25862068965517243, |
| "grad_norm": 14.98234748840332, |
| "learning_rate": 7.5e-07, |
| "loss": 3.1837, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.27586206896551724, |
| "grad_norm": 15.273055076599121, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 3.2103, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.29310344827586204, |
| "grad_norm": 14.799996376037598, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 3.1195, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.3103448275862069, |
| "grad_norm": 13.851456642150879, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 3.0261, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.3275862068965517, |
| "grad_norm": 12.985479354858398, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 2.8995, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.3448275862068966, |
| "grad_norm": 12.569958686828613, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 2.8289, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.3620689655172414, |
| "grad_norm": 11.687292098999023, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 2.7955, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.3793103448275862, |
| "grad_norm": 10.375764846801758, |
| "learning_rate": 1.1e-06, |
| "loss": 2.6422, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.39655172413793105, |
| "grad_norm": 9.314571380615234, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 2.5636, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.41379310344827586, |
| "grad_norm": 8.794600486755371, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 2.4439, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.43103448275862066, |
| "grad_norm": 8.352209091186523, |
| "learning_rate": 1.25e-06, |
| "loss": 2.351, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.4482758620689655, |
| "grad_norm": 8.14919662475586, |
| "learning_rate": 1.3e-06, |
| "loss": 2.2486, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.46551724137931033, |
| "grad_norm": 8.511932373046875, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 2.201, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.4827586206896552, |
| "grad_norm": 8.55715274810791, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 2.0727, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 8.575194358825684, |
| "learning_rate": 1.45e-06, |
| "loss": 1.9386, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.5172413793103449, |
| "grad_norm": 8.735200881958008, |
| "learning_rate": 1.5e-06, |
| "loss": 1.8335, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.5344827586206896, |
| "grad_norm": 8.932766914367676, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 1.7265, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.5517241379310345, |
| "grad_norm": 9.010940551757812, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 1.5886, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.5689655172413793, |
| "grad_norm": 9.089744567871094, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 1.4379, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.5862068965517241, |
| "grad_norm": 9.299127578735352, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 1.2766, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.603448275862069, |
| "grad_norm": 9.707971572875977, |
| "learning_rate": 1.75e-06, |
| "loss": 1.1075, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.6206896551724138, |
| "grad_norm": 9.807348251342773, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 0.9345, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.6379310344827587, |
| "grad_norm": 9.576728820800781, |
| "learning_rate": 1.85e-06, |
| "loss": 0.7481, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.6551724137931034, |
| "grad_norm": 8.874764442443848, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 0.5921, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.6724137931034483, |
| "grad_norm": 7.2561354637146, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 0.4217, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.6896551724137931, |
| "grad_norm": 4.964897155761719, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.3012, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.7068965517241379, |
| "grad_norm": 3.965514659881592, |
| "learning_rate": 2.05e-06, |
| "loss": 0.2359, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.7241379310344828, |
| "grad_norm": 3.0960378646850586, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 0.1738, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.7413793103448276, |
| "grad_norm": 2.1212306022644043, |
| "learning_rate": 2.15e-06, |
| "loss": 0.1364, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.7586206896551724, |
| "grad_norm": 1.155745506286621, |
| "learning_rate": 2.2e-06, |
| "loss": 0.1012, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.7758620689655172, |
| "grad_norm": 0.7050064206123352, |
| "learning_rate": 2.25e-06, |
| "loss": 0.095, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.7931034482758621, |
| "grad_norm": 0.43830573558807373, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 0.0801, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.8103448275862069, |
| "grad_norm": 0.3802882432937622, |
| "learning_rate": 2.35e-06, |
| "loss": 0.0827, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.8275862068965517, |
| "grad_norm": 0.3097267746925354, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 0.0762, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.8448275862068966, |
| "grad_norm": 0.2734082341194153, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 0.0749, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.8620689655172413, |
| "grad_norm": 0.2831459641456604, |
| "learning_rate": 2.5e-06, |
| "loss": 0.0762, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.8793103448275862, |
| "grad_norm": 0.2537994086742401, |
| "learning_rate": 2.55e-06, |
| "loss": 0.072, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.896551724137931, |
| "grad_norm": 0.3006448745727539, |
| "learning_rate": 2.6e-06, |
| "loss": 0.073, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.9137931034482759, |
| "grad_norm": 0.24919214844703674, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 0.0699, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.9310344827586207, |
| "grad_norm": 0.23921510577201843, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 0.0647, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.9482758620689655, |
| "grad_norm": 0.1967734843492508, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 0.0711, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.9655172413793104, |
| "grad_norm": 0.1832527369260788, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.0686, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.9827586206896551, |
| "grad_norm": 0.16995869576931, |
| "learning_rate": 2.85e-06, |
| "loss": 0.0644, |
| "step": 57 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.1822829246520996, |
| "learning_rate": 2.9e-06, |
| "loss": 0.0669, |
| "step": 58 |
| }, |
| { |
| "epoch": 1.0172413793103448, |
| "grad_norm": 0.17660750448703766, |
| "learning_rate": 2.95e-06, |
| "loss": 0.0668, |
| "step": 59 |
| }, |
| { |
| "epoch": 1.0344827586206897, |
| "grad_norm": 0.12004642933607101, |
| "learning_rate": 3e-06, |
| "loss": 0.0588, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.0517241379310345, |
| "grad_norm": 0.15699979662895203, |
| "learning_rate": 3.05e-06, |
| "loss": 0.0631, |
| "step": 61 |
| }, |
| { |
| "epoch": 1.0689655172413792, |
| "grad_norm": 0.15650232136249542, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 0.0596, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.0862068965517242, |
| "grad_norm": 0.1188632994890213, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 0.0614, |
| "step": 63 |
| }, |
| { |
| "epoch": 1.103448275862069, |
| "grad_norm": 0.11891665309667587, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.0636, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.1206896551724137, |
| "grad_norm": 0.12215171754360199, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 0.0567, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.1379310344827587, |
| "grad_norm": 0.10582094639539719, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 0.0588, |
| "step": 66 |
| }, |
| { |
| "epoch": 1.1551724137931034, |
| "grad_norm": 0.11556132137775421, |
| "learning_rate": 3.3500000000000005e-06, |
| "loss": 0.0628, |
| "step": 67 |
| }, |
| { |
| "epoch": 1.1724137931034484, |
| "grad_norm": 0.12296544015407562, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 0.0623, |
| "step": 68 |
| }, |
| { |
| "epoch": 1.1896551724137931, |
| "grad_norm": 0.1281358152627945, |
| "learning_rate": 3.45e-06, |
| "loss": 0.0626, |
| "step": 69 |
| }, |
| { |
| "epoch": 1.206896551724138, |
| "grad_norm": 0.1277759075164795, |
| "learning_rate": 3.5e-06, |
| "loss": 0.0592, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.2241379310344827, |
| "grad_norm": 0.09552627056837082, |
| "learning_rate": 3.5500000000000003e-06, |
| "loss": 0.0595, |
| "step": 71 |
| }, |
| { |
| "epoch": 1.2413793103448276, |
| "grad_norm": 0.11053085327148438, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 0.0546, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.2586206896551724, |
| "grad_norm": 0.09386970102787018, |
| "learning_rate": 3.65e-06, |
| "loss": 0.0556, |
| "step": 73 |
| }, |
| { |
| "epoch": 1.2758620689655173, |
| "grad_norm": 0.0934402197599411, |
| "learning_rate": 3.7e-06, |
| "loss": 0.0563, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.293103448275862, |
| "grad_norm": 0.09964020550251007, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.0569, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.3103448275862069, |
| "grad_norm": 0.08830665796995163, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 0.0578, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.3275862068965516, |
| "grad_norm": 0.13536307215690613, |
| "learning_rate": 3.85e-06, |
| "loss": 0.0569, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.3448275862068966, |
| "grad_norm": 0.10773950815200806, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 0.0581, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.3620689655172413, |
| "grad_norm": 0.09468439221382141, |
| "learning_rate": 3.95e-06, |
| "loss": 0.0549, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.3793103448275863, |
| "grad_norm": 0.08775551617145538, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.0594, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.396551724137931, |
| "grad_norm": 0.12008150666952133, |
| "learning_rate": 4.05e-06, |
| "loss": 0.057, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.4137931034482758, |
| "grad_norm": 0.12070683389902115, |
| "learning_rate": 4.1e-06, |
| "loss": 0.0549, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.4310344827586206, |
| "grad_norm": 0.1037198081612587, |
| "learning_rate": 4.15e-06, |
| "loss": 0.0554, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.4482758620689655, |
| "grad_norm": 0.14529870450496674, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 0.0549, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.4655172413793103, |
| "grad_norm": 0.0954233855009079, |
| "learning_rate": 4.25e-06, |
| "loss": 0.0556, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.4827586206896552, |
| "grad_norm": 0.08504101634025574, |
| "learning_rate": 4.3e-06, |
| "loss": 0.0505, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.15293122828006744, |
| "learning_rate": 4.350000000000001e-06, |
| "loss": 0.0577, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.5172413793103448, |
| "grad_norm": 0.10908783227205276, |
| "learning_rate": 4.4e-06, |
| "loss": 0.0556, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.5344827586206895, |
| "grad_norm": 0.12018983066082001, |
| "learning_rate": 4.450000000000001e-06, |
| "loss": 0.0554, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.5517241379310345, |
| "grad_norm": 0.1018645316362381, |
| "learning_rate": 4.5e-06, |
| "loss": 0.0548, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.5689655172413794, |
| "grad_norm": 0.09623338282108307, |
| "learning_rate": 4.5500000000000005e-06, |
| "loss": 0.0566, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.5862068965517242, |
| "grad_norm": 0.09007120132446289, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 0.0552, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.603448275862069, |
| "grad_norm": 0.07549538463354111, |
| "learning_rate": 4.65e-06, |
| "loss": 0.056, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.6206896551724137, |
| "grad_norm": 0.14191967248916626, |
| "learning_rate": 4.7e-06, |
| "loss": 0.0547, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.6379310344827587, |
| "grad_norm": 0.13249421119689941, |
| "learning_rate": 4.75e-06, |
| "loss": 0.0529, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.6551724137931034, |
| "grad_norm": 0.09079012274742126, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.0537, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.6724137931034484, |
| "grad_norm": 0.08319538831710815, |
| "learning_rate": 4.85e-06, |
| "loss": 0.0523, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.6896551724137931, |
| "grad_norm": 0.09330669790506363, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 0.0515, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.706896551724138, |
| "grad_norm": 0.1553690880537033, |
| "learning_rate": 4.95e-06, |
| "loss": 0.0539, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.7241379310344827, |
| "grad_norm": 0.10644665360450745, |
| "learning_rate": 5e-06, |
| "loss": 0.0537, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.7413793103448276, |
| "grad_norm": 0.10171601176261902, |
| "learning_rate": 4.999799414013322e-06, |
| "loss": 0.0526, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.7586206896551724, |
| "grad_norm": 0.08432668447494507, |
| "learning_rate": 4.999197688241076e-06, |
| "loss": 0.0525, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.7758620689655173, |
| "grad_norm": 0.08552516251802444, |
| "learning_rate": 4.998194919241471e-06, |
| "loss": 0.0528, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.793103448275862, |
| "grad_norm": 0.10473164916038513, |
| "learning_rate": 4.996791267927632e-06, |
| "loss": 0.0509, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.8103448275862069, |
| "grad_norm": 0.08742501586675644, |
| "learning_rate": 4.994986959541788e-06, |
| "loss": 0.0527, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.8275862068965516, |
| "grad_norm": 0.07423796504735947, |
| "learning_rate": 4.9927822836191185e-06, |
| "loss": 0.0532, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.8448275862068966, |
| "grad_norm": 0.0921708270907402, |
| "learning_rate": 4.990177593941303e-06, |
| "loss": 0.0545, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.8620689655172413, |
| "grad_norm": 0.08664074540138245, |
| "learning_rate": 4.987173308479738e-06, |
| "loss": 0.0505, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.8793103448275863, |
| "grad_norm": 0.09824781864881516, |
| "learning_rate": 4.9837699093284765e-06, |
| "loss": 0.053, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.896551724137931, |
| "grad_norm": 0.0782749354839325, |
| "learning_rate": 4.9799679426268575e-06, |
| "loss": 0.0525, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.9137931034482758, |
| "grad_norm": 0.09107070416212082, |
| "learning_rate": 4.975768018471877e-06, |
| "loss": 0.0536, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.9310344827586206, |
| "grad_norm": 0.07815398275852203, |
| "learning_rate": 4.971170810820279e-06, |
| "loss": 0.0486, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.9482758620689655, |
| "grad_norm": 0.08175615966320038, |
| "learning_rate": 4.966177057380409e-06, |
| "loss": 0.0545, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.9655172413793105, |
| "grad_norm": 0.08906937390565872, |
| "learning_rate": 4.960787559493836e-06, |
| "loss": 0.0478, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.9827586206896552, |
| "grad_norm": 0.08419270813465118, |
| "learning_rate": 4.955003182006761e-06, |
| "loss": 0.0528, |
| "step": 115 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.10093575716018677, |
| "learning_rate": 4.948824853131237e-06, |
| "loss": 0.049, |
| "step": 116 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 348, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 58, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.5960715144989245e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|