| { | |
| "best_metric": 0.6138996138996139, | |
| "best_model_checkpoint": "./Validated_cracks_raw_dataset_359_relabeled2_outputs/checkpoint-432", | |
| "epoch": 30.0, | |
| "eval_steps": 500, | |
| "global_step": 3240, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.46296296296296297, | |
| "grad_norm": 11.11386489868164, | |
| "learning_rate": 2.7777777777777783e-06, | |
| "loss": 0.9464, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.9259259259259259, | |
| "grad_norm": 22.558269500732422, | |
| "learning_rate": 5.864197530864199e-06, | |
| "loss": 0.9468, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6023166023166023, | |
| "eval_loss": 0.9880222678184509, | |
| "eval_runtime": 1.7676, | |
| "eval_samples_per_second": 146.524, | |
| "eval_steps_per_second": 18.669, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.3888888888888888, | |
| "grad_norm": 23.05500030517578, | |
| "learning_rate": 8.950617283950618e-06, | |
| "loss": 0.9061, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.8518518518518519, | |
| "grad_norm": 20.498937606811523, | |
| "learning_rate": 9.997164110237345e-06, | |
| "loss": 0.9275, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.583011583011583, | |
| "eval_loss": 1.0344593524932861, | |
| "eval_runtime": 1.7521, | |
| "eval_samples_per_second": 147.825, | |
| "eval_steps_per_second": 18.835, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 2.314814814814815, | |
| "grad_norm": 10.24534797668457, | |
| "learning_rate": 9.98206922906478e-06, | |
| "loss": 0.9077, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "grad_norm": 62.0069580078125, | |
| "learning_rate": 9.954002016824226e-06, | |
| "loss": 0.9377, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.5675675675675675, | |
| "eval_loss": 1.023147463798523, | |
| "eval_runtime": 1.7707, | |
| "eval_samples_per_second": 146.27, | |
| "eval_steps_per_second": 18.637, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 3.240740740740741, | |
| "grad_norm": 19.093095779418945, | |
| "learning_rate": 9.91398066213228e-06, | |
| "loss": 0.8912, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.7037037037037037, | |
| "grad_norm": 14.229964256286621, | |
| "learning_rate": 9.860476209695013e-06, | |
| "loss": 0.8461, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6138996138996139, | |
| "eval_loss": 0.9997061491012573, | |
| "eval_runtime": 1.7439, | |
| "eval_samples_per_second": 148.519, | |
| "eval_steps_per_second": 18.923, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 4.166666666666667, | |
| "grad_norm": 25.24408721923828, | |
| "learning_rate": 9.794316030562418e-06, | |
| "loss": 0.8663, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 4.62962962962963, | |
| "grad_norm": 27.978118896484375, | |
| "learning_rate": 9.715672392864017e-06, | |
| "loss": 0.8429, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.5868725868725869, | |
| "eval_loss": 0.9872773885726929, | |
| "eval_runtime": 1.7504, | |
| "eval_samples_per_second": 147.962, | |
| "eval_steps_per_second": 18.852, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 5.092592592592593, | |
| "grad_norm": 22.3253173828125, | |
| "learning_rate": 9.62475006920687e-06, | |
| "loss": 0.8246, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 5.555555555555555, | |
| "grad_norm": 29.907615661621094, | |
| "learning_rate": 9.521785803487888e-06, | |
| "loss": 0.781, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.6061776061776062, | |
| "eval_loss": 0.9970757365226746, | |
| "eval_runtime": 1.7424, | |
| "eval_samples_per_second": 148.643, | |
| "eval_steps_per_second": 18.939, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 6.018518518518518, | |
| "grad_norm": 23.293292999267578, | |
| "learning_rate": 9.407047694459149e-06, | |
| "loss": 0.8409, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 6.481481481481482, | |
| "grad_norm": 21.52410888671875, | |
| "learning_rate": 9.280834497651334e-06, | |
| "loss": 0.8137, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 6.944444444444445, | |
| "grad_norm": 34.57286071777344, | |
| "learning_rate": 9.143474847472932e-06, | |
| "loss": 0.7621, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.583011583011583, | |
| "eval_loss": 1.0092536211013794, | |
| "eval_runtime": 1.7382, | |
| "eval_samples_per_second": 149.004, | |
| "eval_steps_per_second": 18.985, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 7.407407407407407, | |
| "grad_norm": 15.154962539672852, | |
| "learning_rate": 8.9953264015107e-06, | |
| "loss": 0.7191, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 7.87037037037037, | |
| "grad_norm": 40.10356140136719, | |
| "learning_rate": 8.836774909259442e-06, | |
| "loss": 0.7649, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.5675675675675675, | |
| "eval_loss": 1.0584689378738403, | |
| "eval_runtime": 1.7294, | |
| "eval_samples_per_second": 149.767, | |
| "eval_steps_per_second": 19.082, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 8.333333333333334, | |
| "grad_norm": 65.0152587890625, | |
| "learning_rate": 8.668233207706014e-06, | |
| "loss": 0.7822, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 8.796296296296296, | |
| "grad_norm": 27.71040916442871, | |
| "learning_rate": 8.490140146382767e-06, | |
| "loss": 0.7912, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.5752895752895753, | |
| "eval_loss": 1.0433698892593384, | |
| "eval_runtime": 1.7507, | |
| "eval_samples_per_second": 147.94, | |
| "eval_steps_per_second": 18.85, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 9.25925925925926, | |
| "grad_norm": 35.2958984375, | |
| "learning_rate": 8.302959444689442e-06, | |
| "loss": 0.7821, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 9.722222222222221, | |
| "grad_norm": 47.18446731567383, | |
| "learning_rate": 8.107178484458825e-06, | |
| "loss": 0.7529, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.61003861003861, | |
| "eval_loss": 1.022578477859497, | |
| "eval_runtime": 1.747, | |
| "eval_samples_per_second": 148.256, | |
| "eval_steps_per_second": 18.89, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 10.185185185185185, | |
| "grad_norm": 18.027130126953125, | |
| "learning_rate": 7.903307040910035e-06, | |
| "loss": 0.7478, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 10.648148148148149, | |
| "grad_norm": 17.87278175354004, | |
| "learning_rate": 7.691875955293851e-06, | |
| "loss": 0.6924, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.5868725868725869, | |
| "eval_loss": 1.0567034482955933, | |
| "eval_runtime": 1.7505, | |
| "eval_samples_per_second": 147.961, | |
| "eval_steps_per_second": 18.852, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 11.11111111111111, | |
| "grad_norm": 30.35451316833496, | |
| "learning_rate": 7.473435752686213e-06, | |
| "loss": 0.7193, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 11.574074074074074, | |
| "grad_norm": 27.861392974853516, | |
| "learning_rate": 7.248555208528913e-06, | |
| "loss": 0.7015, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.583011583011583, | |
| "eval_loss": 1.0600991249084473, | |
| "eval_runtime": 1.7432, | |
| "eval_samples_per_second": 148.579, | |
| "eval_steps_per_second": 18.931, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 12.037037037037036, | |
| "grad_norm": 16.875736236572266, | |
| "learning_rate": 7.017819867649898e-06, | |
| "loss": 0.7261, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "grad_norm": 29.472204208374023, | |
| "learning_rate": 6.781830519619392e-06, | |
| "loss": 0.7286, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 12.962962962962964, | |
| "grad_norm": 79.9528579711914, | |
| "learning_rate": 6.541201634411704e-06, | |
| "loss": 0.7094, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.5637065637065637, | |
| "eval_loss": 1.0830413103103638, | |
| "eval_runtime": 1.7485, | |
| "eval_samples_per_second": 148.126, | |
| "eval_steps_per_second": 18.873, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 13.425925925925926, | |
| "grad_norm": 32.787437438964844, | |
| "learning_rate": 6.296559762445928e-06, | |
| "loss": 0.6587, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 13.88888888888889, | |
| "grad_norm": 21.805397033691406, | |
| "learning_rate": 6.048541903171552e-06, | |
| "loss": 0.749, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.5907335907335908, | |
| "eval_loss": 1.0383802652359009, | |
| "eval_runtime": 1.7396, | |
| "eval_samples_per_second": 148.885, | |
| "eval_steps_per_second": 18.97, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 14.351851851851851, | |
| "grad_norm": 14.21861457824707, | |
| "learning_rate": 5.79779384644684e-06, | |
| "loss": 0.6565, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 14.814814814814815, | |
| "grad_norm": 111.92208862304688, | |
| "learning_rate": 5.550041104970398e-06, | |
| "loss": 0.7433, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.5675675675675675, | |
| "eval_loss": 1.0580743551254272, | |
| "eval_runtime": 1.737, | |
| "eval_samples_per_second": 149.108, | |
| "eval_steps_per_second": 18.998, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 15.277777777777779, | |
| "grad_norm": 27.13836669921875, | |
| "learning_rate": 5.2958186601645375e-06, | |
| "loss": 0.7729, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 15.74074074074074, | |
| "grad_norm": 41.231163024902344, | |
| "learning_rate": 5.040825961590755e-06, | |
| "loss": 0.7595, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.5714285714285714, | |
| "eval_loss": 1.0641090869903564, | |
| "eval_runtime": 1.7544, | |
| "eval_samples_per_second": 147.629, | |
| "eval_steps_per_second": 18.81, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 16.203703703703702, | |
| "grad_norm": 18.411924362182617, | |
| "learning_rate": 4.785726960220769e-06, | |
| "loss": 0.7803, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 16.666666666666668, | |
| "grad_norm": 34.5395393371582, | |
| "learning_rate": 4.531185883817934e-06, | |
| "loss": 0.8132, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.5714285714285714, | |
| "eval_loss": 1.0863285064697266, | |
| "eval_runtime": 1.7323, | |
| "eval_samples_per_second": 149.51, | |
| "eval_steps_per_second": 19.05, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 17.12962962962963, | |
| "grad_norm": 30.300302505493164, | |
| "learning_rate": 4.277865507418416e-06, | |
| "loss": 0.8507, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 17.59259259259259, | |
| "grad_norm": 19.417051315307617, | |
| "learning_rate": 4.026425427595017e-06, | |
| "loss": 0.7951, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.5868725868725869, | |
| "eval_loss": 1.0517489910125732, | |
| "eval_runtime": 1.7405, | |
| "eval_samples_per_second": 148.809, | |
| "eval_steps_per_second": 18.96, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 18.055555555555557, | |
| "grad_norm": 20.044008255004883, | |
| "learning_rate": 3.777520344997052e-06, | |
| "loss": 0.8131, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 18.51851851851852, | |
| "grad_norm": 29.19565200805664, | |
| "learning_rate": 3.5317983596382976e-06, | |
| "loss": 0.7714, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 18.98148148148148, | |
| "grad_norm": 32.99634552001953, | |
| "learning_rate": 3.289899283371657e-06, | |
| "loss": 0.7861, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.5907335907335908, | |
| "eval_loss": 1.0631417036056519, | |
| "eval_runtime": 1.739, | |
| "eval_samples_per_second": 148.94, | |
| "eval_steps_per_second": 18.977, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 19.444444444444443, | |
| "grad_norm": 20.90323829650879, | |
| "learning_rate": 3.052452973944603e-06, | |
| "loss": 0.7129, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 19.90740740740741, | |
| "grad_norm": 40.24602508544922, | |
| "learning_rate": 2.8200776949731435e-06, | |
| "loss": 0.7334, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.5907335907335908, | |
| "eval_loss": 1.0507447719573975, | |
| "eval_runtime": 1.893, | |
| "eval_samples_per_second": 136.818, | |
| "eval_steps_per_second": 17.432, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 20.37037037037037, | |
| "grad_norm": 19.65435028076172, | |
| "learning_rate": 2.5933785061046333e-06, | |
| "loss": 0.7563, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 20.833333333333332, | |
| "grad_norm": 96.42213439941406, | |
| "learning_rate": 2.372945687561132e-06, | |
| "loss": 0.6807, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.5791505791505791, | |
| "eval_loss": 1.0659432411193848, | |
| "eval_runtime": 1.7425, | |
| "eval_samples_per_second": 148.637, | |
| "eval_steps_per_second": 18.938, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 21.296296296296298, | |
| "grad_norm": 30.792505264282227, | |
| "learning_rate": 2.159353203165484e-06, | |
| "loss": 0.7706, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 21.75925925925926, | |
| "grad_norm": 62.397857666015625, | |
| "learning_rate": 1.9531572058521074e-06, | |
| "loss": 0.6905, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.5752895752895753, | |
| "eval_loss": 1.0680582523345947, | |
| "eval_runtime": 1.7542, | |
| "eval_samples_per_second": 147.645, | |
| "eval_steps_per_second": 18.812, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 22.22222222222222, | |
| "grad_norm": 17.79286766052246, | |
| "learning_rate": 1.754894589553836e-06, | |
| "loss": 0.763, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 22.685185185185187, | |
| "grad_norm": 20.265037536621094, | |
| "learning_rate": 1.5650815912354285e-06, | |
| "loss": 0.7242, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.5907335907335908, | |
| "eval_loss": 1.0529667139053345, | |
| "eval_runtime": 1.7502, | |
| "eval_samples_per_second": 147.984, | |
| "eval_steps_per_second": 18.855, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 23.14814814814815, | |
| "grad_norm": 43.985389709472656, | |
| "learning_rate": 1.3842124467137664e-06, | |
| "loss": 0.7003, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 23.61111111111111, | |
| "grad_norm": 58.851200103759766, | |
| "learning_rate": 1.212758103764733e-06, | |
| "loss": 0.7342, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.5637065637065637, | |
| "eval_loss": 1.0659518241882324, | |
| "eval_runtime": 1.7466, | |
| "eval_samples_per_second": 148.291, | |
| "eval_steps_per_second": 18.894, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 24.074074074074073, | |
| "grad_norm": 34.4920654296875, | |
| "learning_rate": 1.0511649958675745e-06, | |
| "loss": 0.7262, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 24.537037037037038, | |
| "grad_norm": 29.049121856689453, | |
| "learning_rate": 8.998538797796879e-07, | |
| "loss": 0.6924, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 54.39902877807617, | |
| "learning_rate": 7.592187399685524e-07, | |
| "loss": 0.7736, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.5791505791505791, | |
| "eval_loss": 1.070913314819336, | |
| "eval_runtime": 1.7438, | |
| "eval_samples_per_second": 148.528, | |
| "eval_steps_per_second": 18.924, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 25.462962962962962, | |
| "grad_norm": 41.184539794921875, | |
| "learning_rate": 6.296257627534552e-07, | |
| "loss": 0.6561, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 25.925925925925927, | |
| "grad_norm": 57.05064010620117, | |
| "learning_rate": 5.114123828281398e-07, | |
| "loss": 0.7798, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.5714285714285714, | |
| "eval_loss": 1.0627210140228271, | |
| "eval_runtime": 1.737, | |
| "eval_samples_per_second": 149.108, | |
| "eval_steps_per_second": 18.998, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 26.38888888888889, | |
| "grad_norm": 29.81525993347168, | |
| "learning_rate": 4.048864046470502e-07, | |
| "loss": 0.7423, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 26.85185185185185, | |
| "grad_norm": 36.73592758178711, | |
| "learning_rate": 3.1032520096290817e-07, | |
| "loss": 0.71, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.5868725868725869, | |
| "eval_loss": 1.060496211051941, | |
| "eval_runtime": 1.7352, | |
| "eval_samples_per_second": 149.263, | |
| "eval_steps_per_second": 19.018, | |
| "step": 2916 | |
| }, | |
| { | |
| "epoch": 27.314814814814813, | |
| "grad_norm": 63.34288024902344, | |
| "learning_rate": 2.2797499060246253e-07, | |
| "loss": 0.6777, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 27.77777777777778, | |
| "grad_norm": 25.37461280822754, | |
| "learning_rate": 1.5805019736097105e-07, | |
| "loss": 0.706, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.5791505791505791, | |
| "eval_loss": 1.0605989694595337, | |
| "eval_runtime": 1.7369, | |
| "eval_samples_per_second": 149.116, | |
| "eval_steps_per_second": 18.999, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 28.24074074074074, | |
| "grad_norm": 42.660560607910156, | |
| "learning_rate": 1.007328916846817e-07, | |
| "loss": 0.7275, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 28.703703703703702, | |
| "grad_norm": 31.769351959228516, | |
| "learning_rate": 5.617231659511446e-08, | |
| "loss": 0.7421, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.5752895752895753, | |
| "eval_loss": 1.0606402158737183, | |
| "eval_runtime": 1.7325, | |
| "eval_samples_per_second": 149.494, | |
| "eval_steps_per_second": 19.048, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 29.166666666666668, | |
| "grad_norm": 23.639081954956055, | |
| "learning_rate": 2.448449908949879e-08, | |
| "loss": 0.6976, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 29.62962962962963, | |
| "grad_norm": 30.702539443969727, | |
| "learning_rate": 5.751948029234511e-09, | |
| "loss": 0.654, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.5752895752895753, | |
| "eval_loss": 1.060823917388916, | |
| "eval_runtime": 3.0177, | |
| "eval_samples_per_second": 85.826, | |
| "eval_steps_per_second": 10.935, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 3240, | |
| "total_flos": 2.0405460235862016e+18, | |
| "train_loss": 0.7678916913491708, | |
| "train_runtime": 680.1267, | |
| "train_samples_per_second": 37.934, | |
| "train_steps_per_second": 4.764 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 3240, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.0405460235862016e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |