| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9333333333333333, |
| "eval_steps": 1, |
| "global_step": 66, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02962962962962963, |
| "eval_loss": 3.58097505569458, |
| "eval_runtime": 2.4045, |
| "eval_samples_per_second": 246.616, |
| "eval_steps_per_second": 31.191, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.05925925925925926, |
| "grad_norm": 239.4490699071725, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 3.6289, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.05925925925925926, |
| "eval_loss": 2.9033193588256836, |
| "eval_runtime": 2.3627, |
| "eval_samples_per_second": 250.989, |
| "eval_steps_per_second": 31.744, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.08888888888888889, |
| "eval_loss": 1.3921175003051758, |
| "eval_runtime": 2.3745, |
| "eval_samples_per_second": 249.737, |
| "eval_steps_per_second": 31.586, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.11851851851851852, |
| "grad_norm": 221.48026445665957, |
| "learning_rate": 9.993784606094612e-06, |
| "loss": 2.1197, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.11851851851851852, |
| "eval_loss": 0.4444686472415924, |
| "eval_runtime": 2.3779, |
| "eval_samples_per_second": 249.376, |
| "eval_steps_per_second": 31.54, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.14814814814814814, |
| "eval_loss": 0.24378979206085205, |
| "eval_runtime": 2.3891, |
| "eval_samples_per_second": 248.216, |
| "eval_steps_per_second": 31.393, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 17.0706644609287, |
| "learning_rate": 9.944154131125643e-06, |
| "loss": 0.3612, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "eval_loss": 0.12096767872571945, |
| "eval_runtime": 2.3926, |
| "eval_samples_per_second": 247.851, |
| "eval_steps_per_second": 31.347, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.2074074074074074, |
| "eval_loss": 0.06132328137755394, |
| "eval_runtime": 2.3873, |
| "eval_samples_per_second": 248.4, |
| "eval_steps_per_second": 31.416, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.23703703703703705, |
| "grad_norm": 3.6831128580560013, |
| "learning_rate": 9.84538643114539e-06, |
| "loss": 0.0928, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.23703703703703705, |
| "eval_loss": 0.11507756263017654, |
| "eval_runtime": 2.395, |
| "eval_samples_per_second": 247.601, |
| "eval_steps_per_second": 31.315, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "eval_loss": 0.06397408992052078, |
| "eval_runtime": 2.4131, |
| "eval_samples_per_second": 245.739, |
| "eval_steps_per_second": 31.08, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.2962962962962963, |
| "grad_norm": 2.690253307848938, |
| "learning_rate": 9.698463103929542e-06, |
| "loss": 0.0827, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.2962962962962963, |
| "eval_loss": 0.07617466151714325, |
| "eval_runtime": 2.3892, |
| "eval_samples_per_second": 248.196, |
| "eval_steps_per_second": 31.391, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.32592592592592595, |
| "eval_loss": 0.0631294772028923, |
| "eval_runtime": 2.4391, |
| "eval_samples_per_second": 243.127, |
| "eval_steps_per_second": 30.75, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 4.896892192143718, |
| "learning_rate": 9.504844339512096e-06, |
| "loss": 0.0682, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "eval_loss": 0.05761503055691719, |
| "eval_runtime": 2.391, |
| "eval_samples_per_second": 248.014, |
| "eval_steps_per_second": 31.368, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.3851851851851852, |
| "eval_loss": 0.05635310336947441, |
| "eval_runtime": 2.3916, |
| "eval_samples_per_second": 247.954, |
| "eval_steps_per_second": 31.36, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.4148148148148148, |
| "grad_norm": 1.9584606890246514, |
| "learning_rate": 9.266454408160779e-06, |
| "loss": 0.0509, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.4148148148148148, |
| "eval_loss": 0.054581720381975174, |
| "eval_runtime": 2.3981, |
| "eval_samples_per_second": 247.284, |
| "eval_steps_per_second": 31.275, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "eval_loss": 0.055863503366708755, |
| "eval_runtime": 2.4091, |
| "eval_samples_per_second": 246.147, |
| "eval_steps_per_second": 31.132, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.4740740740740741, |
| "grad_norm": 1.6996185012450227, |
| "learning_rate": 8.985662536114614e-06, |
| "loss": 0.0579, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.4740740740740741, |
| "eval_loss": 0.05389578640460968, |
| "eval_runtime": 2.393, |
| "eval_samples_per_second": 247.805, |
| "eval_steps_per_second": 31.341, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.5037037037037037, |
| "eval_loss": 0.051116086542606354, |
| "eval_runtime": 2.4064, |
| "eval_samples_per_second": 246.431, |
| "eval_steps_per_second": 31.168, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 0.8959360156191395, |
| "learning_rate": 8.665259359149132e-06, |
| "loss": 0.0509, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "eval_loss": 0.05351101979613304, |
| "eval_runtime": 2.3864, |
| "eval_samples_per_second": 248.489, |
| "eval_steps_per_second": 31.428, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.562962962962963, |
| "eval_loss": 0.05162196233868599, |
| "eval_runtime": 2.4122, |
| "eval_samples_per_second": 245.838, |
| "eval_steps_per_second": 31.093, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.5925925925925926, |
| "grad_norm": 0.6692476053580021, |
| "learning_rate": 8.308429187984298e-06, |
| "loss": 0.0495, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.5925925925925926, |
| "eval_loss": 0.050382427871227264, |
| "eval_runtime": 2.3971, |
| "eval_samples_per_second": 247.382, |
| "eval_steps_per_second": 31.288, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.6222222222222222, |
| "eval_loss": 0.055619001388549805, |
| "eval_runtime": 2.3923, |
| "eval_samples_per_second": 247.876, |
| "eval_steps_per_second": 31.35, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.6518518518518519, |
| "grad_norm": 3.625080531722897, |
| "learning_rate": 7.918718361173951e-06, |
| "loss": 0.0509, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.6518518518518519, |
| "eval_loss": 0.0558856837451458, |
| "eval_runtime": 2.407, |
| "eval_samples_per_second": 246.367, |
| "eval_steps_per_second": 31.159, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.6814814814814815, |
| "eval_loss": 0.0540882870554924, |
| "eval_runtime": 2.3978, |
| "eval_samples_per_second": 247.315, |
| "eval_steps_per_second": 31.279, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 1.4952683921693086, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.0995, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "eval_loss": 0.04953569918870926, |
| "eval_runtime": 2.4024, |
| "eval_samples_per_second": 246.84, |
| "eval_steps_per_second": 31.219, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.7407407407407407, |
| "eval_loss": 0.050019968301057816, |
| "eval_runtime": 2.4016, |
| "eval_samples_per_second": 246.919, |
| "eval_steps_per_second": 31.229, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.7703703703703704, |
| "grad_norm": 1.1153495773080158, |
| "learning_rate": 7.056435515653059e-06, |
| "loss": 0.0473, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.7703703703703704, |
| "eval_loss": 0.050220031291246414, |
| "eval_runtime": 2.4024, |
| "eval_samples_per_second": 246.841, |
| "eval_steps_per_second": 31.219, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.8, |
| "eval_loss": 0.05034106969833374, |
| "eval_runtime": 2.4013, |
| "eval_samples_per_second": 246.948, |
| "eval_steps_per_second": 31.233, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.8296296296296296, |
| "grad_norm": 1.1745530155874857, |
| "learning_rate": 6.592433251258423e-06, |
| "loss": 0.0486, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.8296296296296296, |
| "eval_loss": 0.049442000687122345, |
| "eval_runtime": 2.3999, |
| "eval_samples_per_second": 247.095, |
| "eval_steps_per_second": 31.252, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.8592592592592593, |
| "eval_loss": 0.04920043796300888, |
| "eval_runtime": 2.4029, |
| "eval_samples_per_second": 246.787, |
| "eval_steps_per_second": 31.212, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 2.909790494873015, |
| "learning_rate": 6.112604669781572e-06, |
| "loss": 0.0502, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "eval_loss": 0.04876376688480377, |
| "eval_runtime": 2.399, |
| "eval_samples_per_second": 247.185, |
| "eval_steps_per_second": 31.263, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.9185185185185185, |
| "eval_loss": 0.04928644001483917, |
| "eval_runtime": 2.3863, |
| "eval_samples_per_second": 248.507, |
| "eval_steps_per_second": 31.43, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.9481481481481482, |
| "grad_norm": 1.047992100256246, |
| "learning_rate": 5.621718523237427e-06, |
| "loss": 0.071, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.9481481481481482, |
| "eval_loss": 0.048252660781145096, |
| "eval_runtime": 2.3851, |
| "eval_samples_per_second": 248.628, |
| "eval_steps_per_second": 31.445, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.9777777777777777, |
| "eval_loss": 0.047693394124507904, |
| "eval_runtime": 2.4138, |
| "eval_samples_per_second": 245.673, |
| "eval_steps_per_second": 31.072, |
| "step": 33 |
| }, |
| { |
| "epoch": 1.0074074074074073, |
| "grad_norm": 0.2985545158890502, |
| "learning_rate": 5.1246534586903655e-06, |
| "loss": 0.0467, |
| "step": 34 |
| }, |
| { |
| "epoch": 1.0074074074074073, |
| "eval_loss": 0.048503223806619644, |
| "eval_runtime": 2.3947, |
| "eval_samples_per_second": 247.632, |
| "eval_steps_per_second": 31.319, |
| "step": 34 |
| }, |
| { |
| "epoch": 1.0148148148148148, |
| "eval_loss": 0.04916958510875702, |
| "eval_runtime": 2.3733, |
| "eval_samples_per_second": 249.862, |
| "eval_steps_per_second": 31.601, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.0444444444444445, |
| "grad_norm": 1.2895588815868697, |
| "learning_rate": 4.626349532067879e-06, |
| "loss": 0.0439, |
| "step": 36 |
| }, |
| { |
| "epoch": 1.0444444444444445, |
| "eval_loss": 0.048909012228250504, |
| "eval_runtime": 2.384, |
| "eval_samples_per_second": 248.737, |
| "eval_steps_per_second": 31.459, |
| "step": 36 |
| }, |
| { |
| "epoch": 1.074074074074074, |
| "eval_loss": 0.0482967235147953, |
| "eval_runtime": 2.3943, |
| "eval_samples_per_second": 247.672, |
| "eval_steps_per_second": 31.324, |
| "step": 37 |
| }, |
| { |
| "epoch": 1.1037037037037036, |
| "grad_norm": 0.6487111314817995, |
| "learning_rate": 4.131759111665349e-06, |
| "loss": 0.0407, |
| "step": 38 |
| }, |
| { |
| "epoch": 1.1037037037037036, |
| "eval_loss": 0.047575678676366806, |
| "eval_runtime": 2.3994, |
| "eval_samples_per_second": 247.142, |
| "eval_steps_per_second": 31.257, |
| "step": 38 |
| }, |
| { |
| "epoch": 1.1333333333333333, |
| "eval_loss": 0.04676678404211998, |
| "eval_runtime": 2.4002, |
| "eval_samples_per_second": 247.067, |
| "eval_steps_per_second": 31.248, |
| "step": 39 |
| }, |
| { |
| "epoch": 1.162962962962963, |
| "grad_norm": 0.18657842568363442, |
| "learning_rate": 3.6457976592849753e-06, |
| "loss": 0.0464, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.162962962962963, |
| "eval_loss": 0.04638924077153206, |
| "eval_runtime": 2.3905, |
| "eval_samples_per_second": 248.065, |
| "eval_steps_per_second": 31.374, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.1925925925925926, |
| "eval_loss": 0.045983266085386276, |
| "eval_runtime": 2.4177, |
| "eval_samples_per_second": 245.277, |
| "eval_steps_per_second": 31.022, |
| "step": 41 |
| }, |
| { |
| "epoch": 1.2222222222222223, |
| "grad_norm": 0.8997059752014409, |
| "learning_rate": 3.173294878168025e-06, |
| "loss": 0.0434, |
| "step": 42 |
| }, |
| { |
| "epoch": 1.2222222222222223, |
| "eval_loss": 0.04601927101612091, |
| "eval_runtime": 2.3945, |
| "eval_samples_per_second": 247.646, |
| "eval_steps_per_second": 31.321, |
| "step": 42 |
| }, |
| { |
| "epoch": 1.2518518518518518, |
| "eval_loss": 0.046453364193439484, |
| "eval_runtime": 2.3957, |
| "eval_samples_per_second": 247.53, |
| "eval_steps_per_second": 31.306, |
| "step": 43 |
| }, |
| { |
| "epoch": 1.2814814814814814, |
| "grad_norm": 1.1550252113522264, |
| "learning_rate": 2.718946713234185e-06, |
| "loss": 0.0455, |
| "step": 44 |
| }, |
| { |
| "epoch": 1.2814814814814814, |
| "eval_loss": 0.04625436291098595, |
| "eval_runtime": 2.3882, |
| "eval_samples_per_second": 248.304, |
| "eval_steps_per_second": 31.404, |
| "step": 44 |
| }, |
| { |
| "epoch": 1.3111111111111111, |
| "eval_loss": 0.04611556977033615, |
| "eval_runtime": 2.3961, |
| "eval_samples_per_second": 247.485, |
| "eval_steps_per_second": 31.301, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.3407407407407408, |
| "grad_norm": 0.35168502595914414, |
| "learning_rate": 2.2872686806712037e-06, |
| "loss": 0.048, |
| "step": 46 |
| }, |
| { |
| "epoch": 1.3407407407407408, |
| "eval_loss": 0.04597063362598419, |
| "eval_runtime": 2.3985, |
| "eval_samples_per_second": 247.238, |
| "eval_steps_per_second": 31.27, |
| "step": 46 |
| }, |
| { |
| "epoch": 1.3703703703703702, |
| "eval_loss": 0.045863181352615356, |
| "eval_runtime": 2.3895, |
| "eval_samples_per_second": 248.164, |
| "eval_steps_per_second": 31.387, |
| "step": 47 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.1552601397592931, |
| "learning_rate": 1.8825509907063328e-06, |
| "loss": 0.0446, |
| "step": 48 |
| }, |
| { |
| "epoch": 1.4, |
| "eval_loss": 0.04581974819302559, |
| "eval_runtime": 2.3913, |
| "eval_samples_per_second": 247.98, |
| "eval_steps_per_second": 31.363, |
| "step": 48 |
| }, |
| { |
| "epoch": 1.4296296296296296, |
| "eval_loss": 0.04564787447452545, |
| "eval_runtime": 2.3985, |
| "eval_samples_per_second": 247.241, |
| "eval_steps_per_second": 31.27, |
| "step": 49 |
| }, |
| { |
| "epoch": 1.4592592592592593, |
| "grad_norm": 0.43359469093350184, |
| "learning_rate": 1.5088159095696365e-06, |
| "loss": 0.0481, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.4592592592592593, |
| "eval_loss": 0.045681748539209366, |
| "eval_runtime": 2.3985, |
| "eval_samples_per_second": 247.236, |
| "eval_steps_per_second": 31.269, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.488888888888889, |
| "eval_loss": 0.045599259436130524, |
| "eval_runtime": 2.3934, |
| "eval_samples_per_second": 247.767, |
| "eval_steps_per_second": 31.336, |
| "step": 51 |
| }, |
| { |
| "epoch": 1.5185185185185186, |
| "grad_norm": 0.4559013831402292, |
| "learning_rate": 1.1697777844051105e-06, |
| "loss": 0.0432, |
| "step": 52 |
| }, |
| { |
| "epoch": 1.5185185185185186, |
| "eval_loss": 0.04557538405060768, |
| "eval_runtime": 2.3883, |
| "eval_samples_per_second": 248.299, |
| "eval_steps_per_second": 31.404, |
| "step": 52 |
| }, |
| { |
| "epoch": 1.5481481481481483, |
| "eval_loss": 0.045645877718925476, |
| "eval_runtime": 2.4, |
| "eval_samples_per_second": 247.085, |
| "eval_steps_per_second": 31.25, |
| "step": 53 |
| }, |
| { |
| "epoch": 1.5777777777777777, |
| "grad_norm": 0.49502628271352395, |
| "learning_rate": 8.688061284200266e-07, |
| "loss": 0.0416, |
| "step": 54 |
| }, |
| { |
| "epoch": 1.5777777777777777, |
| "eval_loss": 0.04559370502829552, |
| "eval_runtime": 2.4044, |
| "eval_samples_per_second": 246.635, |
| "eval_steps_per_second": 31.193, |
| "step": 54 |
| }, |
| { |
| "epoch": 1.6074074074074074, |
| "eval_loss": 0.04555842652916908, |
| "eval_runtime": 2.3868, |
| "eval_samples_per_second": 248.445, |
| "eval_steps_per_second": 31.422, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.637037037037037, |
| "grad_norm": 0.25465223888040067, |
| "learning_rate": 6.088921331488568e-07, |
| "loss": 0.0424, |
| "step": 56 |
| }, |
| { |
| "epoch": 1.637037037037037, |
| "eval_loss": 0.0455278605222702, |
| "eval_runtime": 2.3974, |
| "eval_samples_per_second": 247.347, |
| "eval_steps_per_second": 31.283, |
| "step": 56 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "eval_loss": 0.045483075082302094, |
| "eval_runtime": 2.4, |
| "eval_samples_per_second": 247.08, |
| "eval_steps_per_second": 31.25, |
| "step": 57 |
| }, |
| { |
| "epoch": 1.6962962962962962, |
| "grad_norm": 0.17076926516750576, |
| "learning_rate": 3.9261894064796136e-07, |
| "loss": 0.044, |
| "step": 58 |
| }, |
| { |
| "epoch": 1.6962962962962962, |
| "eval_loss": 0.04561697319149971, |
| "eval_runtime": 2.4215, |
| "eval_samples_per_second": 244.886, |
| "eval_steps_per_second": 30.972, |
| "step": 58 |
| }, |
| { |
| "epoch": 1.7259259259259259, |
| "eval_loss": 0.04549773409962654, |
| "eval_runtime": 2.3978, |
| "eval_samples_per_second": 247.312, |
| "eval_steps_per_second": 31.279, |
| "step": 59 |
| }, |
| { |
| "epoch": 1.7555555555555555, |
| "grad_norm": 0.23795645448648295, |
| "learning_rate": 2.2213597106929608e-07, |
| "loss": 0.0422, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.7555555555555555, |
| "eval_loss": 0.04546647146344185, |
| "eval_runtime": 2.3889, |
| "eval_samples_per_second": 248.236, |
| "eval_steps_per_second": 31.396, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.7851851851851852, |
| "eval_loss": 0.04548794403672218, |
| "eval_runtime": 2.3987, |
| "eval_samples_per_second": 247.22, |
| "eval_steps_per_second": 31.267, |
| "step": 61 |
| }, |
| { |
| "epoch": 1.8148148148148149, |
| "grad_norm": 0.3806583076277453, |
| "learning_rate": 9.913756075728088e-08, |
| "loss": 0.0419, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.8148148148148149, |
| "eval_loss": 0.04549010843038559, |
| "eval_runtime": 2.4157, |
| "eval_samples_per_second": 245.482, |
| "eval_steps_per_second": 31.047, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.8444444444444446, |
| "eval_loss": 0.04554362595081329, |
| "eval_runtime": 2.4258, |
| "eval_samples_per_second": 244.457, |
| "eval_steps_per_second": 30.918, |
| "step": 63 |
| }, |
| { |
| "epoch": 1.8740740740740742, |
| "grad_norm": 0.1543376857763108, |
| "learning_rate": 2.4846123172992953e-08, |
| "loss": 0.0431, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.8740740740740742, |
| "eval_loss": 0.04551076889038086, |
| "eval_runtime": 2.4228, |
| "eval_samples_per_second": 244.759, |
| "eval_steps_per_second": 30.956, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.9037037037037037, |
| "eval_loss": 0.04560859501361847, |
| "eval_runtime": 2.403, |
| "eval_samples_per_second": 246.776, |
| "eval_steps_per_second": 31.211, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.9333333333333333, |
| "grad_norm": 0.180948589263062, |
| "learning_rate": 0.0, |
| "loss": 0.0396, |
| "step": 66 |
| }, |
| { |
| "epoch": 1.9333333333333333, |
| "eval_loss": 0.04552510008215904, |
| "eval_runtime": 2.3896, |
| "eval_samples_per_second": 248.162, |
| "eval_steps_per_second": 31.386, |
| "step": 66 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 66, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 34469729796096.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|