| { | |
| "best_metric": 0.6332046332046332, | |
| "best_model_checkpoint": "./Validated_cracks_22122025_outputs/checkpoint-1620", | |
| "epoch": 30.0, | |
| "eval_steps": 500, | |
| "global_step": 3240, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.46296296296296297, | |
| "grad_norm": 10.873927116394043, | |
| "learning_rate": 2.901234567901235e-06, | |
| "loss": 1.415, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.9259259259259259, | |
| "grad_norm": 10.678427696228027, | |
| "learning_rate": 5.9876543209876546e-06, | |
| "loss": 1.3724, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.3359073359073359, | |
| "eval_loss": 1.3528215885162354, | |
| "eval_runtime": 1.7714, | |
| "eval_samples_per_second": 146.211, | |
| "eval_steps_per_second": 18.629, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.3888888888888888, | |
| "grad_norm": 15.727510452270508, | |
| "learning_rate": 9.074074074074075e-06, | |
| "loss": 1.3621, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.8518518518518519, | |
| "grad_norm": 18.13157844543457, | |
| "learning_rate": 9.996809987196146e-06, | |
| "loss": 1.3528, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.42084942084942084, | |
| "eval_loss": 1.2848700284957886, | |
| "eval_runtime": 1.764, | |
| "eval_samples_per_second": 146.821, | |
| "eval_steps_per_second": 18.707, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 2.314814814814815, | |
| "grad_norm": 11.158724784851074, | |
| "learning_rate": 9.981195232868493e-06, | |
| "loss": 1.2858, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "grad_norm": 27.68938446044922, | |
| "learning_rate": 9.952610423187516e-06, | |
| "loss": 1.2839, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.46332046332046334, | |
| "eval_loss": 1.2019163370132446, | |
| "eval_runtime": 1.7428, | |
| "eval_samples_per_second": 148.61, | |
| "eval_steps_per_second": 18.935, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 3.240740740740741, | |
| "grad_norm": 16.059484481811523, | |
| "learning_rate": 9.91208532978737e-06, | |
| "loss": 1.2499, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.7037037037037037, | |
| "grad_norm": 10.013123512268066, | |
| "learning_rate": 9.85807175279907e-06, | |
| "loss": 1.2608, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.5212355212355212, | |
| "eval_loss": 1.1559516191482544, | |
| "eval_runtime": 1.7474, | |
| "eval_samples_per_second": 148.218, | |
| "eval_steps_per_second": 18.885, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 4.166666666666667, | |
| "grad_norm": 24.753084182739258, | |
| "learning_rate": 9.791408709849578e-06, | |
| "loss": 1.2172, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 4.62962962962963, | |
| "grad_norm": 12.23658275604248, | |
| "learning_rate": 9.712269778427157e-06, | |
| "loss": 1.2178, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.555984555984556, | |
| "eval_loss": 1.0907317399978638, | |
| "eval_runtime": 1.747, | |
| "eval_samples_per_second": 148.252, | |
| "eval_steps_per_second": 18.889, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 5.092592592592593, | |
| "grad_norm": 15.365643501281738, | |
| "learning_rate": 9.620861020786583e-06, | |
| "loss": 1.2052, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 5.555555555555555, | |
| "grad_norm": 12.787246704101562, | |
| "learning_rate": 9.517420447403444e-06, | |
| "loss": 1.1624, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.528957528957529, | |
| "eval_loss": 1.0542538166046143, | |
| "eval_runtime": 1.7382, | |
| "eval_samples_per_second": 149.005, | |
| "eval_steps_per_second": 18.985, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 6.018518518518518, | |
| "grad_norm": 13.462636947631836, | |
| "learning_rate": 9.404634840109069e-06, | |
| "loss": 1.166, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 6.481481481481482, | |
| "grad_norm": 15.236122131347656, | |
| "learning_rate": 9.278195395448948e-06, | |
| "loss": 1.1335, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 6.944444444444445, | |
| "grad_norm": 19.687719345092773, | |
| "learning_rate": 9.140616369122732e-06, | |
| "loss": 1.1108, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.5598455598455598, | |
| "eval_loss": 1.0452436208724976, | |
| "eval_runtime": 1.7531, | |
| "eval_samples_per_second": 147.737, | |
| "eval_steps_per_second": 18.824, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 7.407407407407407, | |
| "grad_norm": 16.47871971130371, | |
| "learning_rate": 8.992255989929632e-06, | |
| "loss": 1.0708, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 7.87037037037037, | |
| "grad_norm": 19.686279296875, | |
| "learning_rate": 8.833500559197024e-06, | |
| "loss": 1.1028, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.5598455598455598, | |
| "eval_loss": 1.0568522214889526, | |
| "eval_runtime": 1.7528, | |
| "eval_samples_per_second": 147.764, | |
| "eval_steps_per_second": 18.827, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 8.333333333333334, | |
| "grad_norm": 37.79273986816406, | |
| "learning_rate": 8.664763444927562e-06, | |
| "loss": 1.0619, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 8.796296296296296, | |
| "grad_norm": 16.420129776000977, | |
| "learning_rate": 8.486484005469977e-06, | |
| "loss": 1.1023, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.5675675675675675, | |
| "eval_loss": 1.057991862297058, | |
| "eval_runtime": 1.7353, | |
| "eval_samples_per_second": 149.25, | |
| "eval_steps_per_second": 19.016, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 9.25925925925926, | |
| "grad_norm": 25.430509567260742, | |
| "learning_rate": 8.299126445516126e-06, | |
| "loss": 1.1072, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 9.722222222222221, | |
| "grad_norm": 35.4229850769043, | |
| "learning_rate": 8.103178607403005e-06, | |
| "loss": 1.0572, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.6138996138996139, | |
| "eval_loss": 1.0031245946884155, | |
| "eval_runtime": 1.743, | |
| "eval_samples_per_second": 148.597, | |
| "eval_steps_per_second": 18.933, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 10.185185185185185, | |
| "grad_norm": 14.466697692871094, | |
| "learning_rate": 7.899150700867014e-06, | |
| "loss": 1.0489, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 10.648148148148149, | |
| "grad_norm": 21.01825714111328, | |
| "learning_rate": 7.687573974557857e-06, | |
| "loss": 0.9874, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.583011583011583, | |
| "eval_loss": 1.0340404510498047, | |
| "eval_runtime": 1.7486, | |
| "eval_samples_per_second": 148.117, | |
| "eval_steps_per_second": 18.872, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 11.11111111111111, | |
| "grad_norm": 16.48180389404297, | |
| "learning_rate": 7.4689993327712765e-06, | |
| "loss": 1.0212, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 11.574074074074074, | |
| "grad_norm": 18.663524627685547, | |
| "learning_rate": 7.243995901002312e-06, | |
| "loss": 1.0132, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.61003861003861, | |
| "eval_loss": 1.004916787147522, | |
| "eval_runtime": 1.7428, | |
| "eval_samples_per_second": 148.608, | |
| "eval_steps_per_second": 18.935, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 12.037037037037036, | |
| "grad_norm": 24.026588439941406, | |
| "learning_rate": 7.013149544054148e-06, | |
| "loss": 1.0111, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "grad_norm": 20.475515365600586, | |
| "learning_rate": 6.777061340561082e-06, | |
| "loss": 1.005, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 12.962962962962964, | |
| "grad_norm": 49.801326751708984, | |
| "learning_rate": 6.5363460178976524e-06, | |
| "loss": 0.9898, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.6216216216216216, | |
| "eval_loss": 0.9874952435493469, | |
| "eval_runtime": 1.7492, | |
| "eval_samples_per_second": 148.065, | |
| "eval_steps_per_second": 18.865, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 13.425925925925926, | |
| "grad_norm": 38.209232330322266, | |
| "learning_rate": 6.291630351549136e-06, | |
| "loss": 0.9332, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 13.88888888888889, | |
| "grad_norm": 16.34885025024414, | |
| "learning_rate": 6.043551533111121e-06, | |
| "loss": 1.0182, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.61003861003861, | |
| "eval_loss": 0.9667864441871643, | |
| "eval_runtime": 1.7502, | |
| "eval_samples_per_second": 147.984, | |
| "eval_steps_per_second": 18.855, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 14.351851851851851, | |
| "grad_norm": 21.03826904296875, | |
| "learning_rate": 5.792755511167572e-06, | |
| "loss": 0.9279, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 14.814814814814815, | |
| "grad_norm": 34.984375, | |
| "learning_rate": 5.544968491028696e-06, | |
| "loss": 0.9889, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.6332046332046332, | |
| "eval_loss": 0.9692270755767822, | |
| "eval_runtime": 1.7309, | |
| "eval_samples_per_second": 149.63, | |
| "eval_steps_per_second": 19.065, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 15.277777777777779, | |
| "grad_norm": 16.456146240234375, | |
| "learning_rate": 5.290724144552379e-06, | |
| "loss": 0.9673, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 15.74074074074074, | |
| "grad_norm": 27.009721755981445, | |
| "learning_rate": 5.035722809427533e-06, | |
| "loss": 0.9446, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.6332046332046332, | |
| "eval_loss": 0.9777135848999023, | |
| "eval_runtime": 1.7383, | |
| "eval_samples_per_second": 148.995, | |
| "eval_steps_per_second": 18.984, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 16.203703703703702, | |
| "grad_norm": 12.319013595581055, | |
| "learning_rate": 4.780628459113764e-06, | |
| "loss": 0.9024, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 16.666666666666668, | |
| "grad_norm": 20.682802200317383, | |
| "learning_rate": 4.526105309263983e-06, | |
| "loss": 0.9519, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.5984555984555985, | |
| "eval_loss": 1.0030242204666138, | |
| "eval_runtime": 1.7421, | |
| "eval_samples_per_second": 148.671, | |
| "eval_steps_per_second": 18.943, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 17.12962962962963, | |
| "grad_norm": 24.291671752929688, | |
| "learning_rate": 4.272816088237135e-06, | |
| "loss": 1.003, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 17.59259259259259, | |
| "grad_norm": 13.384744644165039, | |
| "learning_rate": 4.021420311483538e-06, | |
| "loss": 0.9458, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.5984555984555985, | |
| "eval_loss": 0.97477126121521, | |
| "eval_runtime": 1.7465, | |
| "eval_samples_per_second": 148.299, | |
| "eval_steps_per_second": 18.895, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 18.055555555555557, | |
| "grad_norm": 15.239733695983887, | |
| "learning_rate": 3.7725725642960047e-06, | |
| "loss": 0.9798, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 18.51851851851852, | |
| "grad_norm": 24.09659767150879, | |
| "learning_rate": 3.526920797398148e-06, | |
| "loss": 0.9346, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 18.98148148148148, | |
| "grad_norm": 23.59765625, | |
| "learning_rate": 3.2851046398077705e-06, | |
| "loss": 0.9347, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.6177606177606177, | |
| "eval_loss": 0.9744410514831543, | |
| "eval_runtime": 1.7456, | |
| "eval_samples_per_second": 148.373, | |
| "eval_steps_per_second": 18.905, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 19.444444444444443, | |
| "grad_norm": 22.252914428710938, | |
| "learning_rate": 3.0477537333683815e-06, | |
| "loss": 0.8767, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 19.90740740740741, | |
| "grad_norm": 32.37730026245117, | |
| "learning_rate": 2.815486093285317e-06, | |
| "loss": 0.8863, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.6293436293436293, | |
| "eval_loss": 0.9656945466995239, | |
| "eval_runtime": 1.7376, | |
| "eval_samples_per_second": 149.058, | |
| "eval_steps_per_second": 18.992, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 20.37037037037037, | |
| "grad_norm": 18.95844841003418, | |
| "learning_rate": 2.5889064989353797e-06, | |
| "loss": 0.9081, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 20.833333333333332, | |
| "grad_norm": 84.69548034667969, | |
| "learning_rate": 2.3686049191399614e-06, | |
| "loss": 0.8507, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.6254826254826255, | |
| "eval_loss": 0.9784243702888489, | |
| "eval_runtime": 1.7492, | |
| "eval_samples_per_second": 148.065, | |
| "eval_steps_per_second": 18.865, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 21.296296296296298, | |
| "grad_norm": 34.8240852355957, | |
| "learning_rate": 2.155154976001948e-06, | |
| "loss": 0.9308, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 21.75925925925926, | |
| "grad_norm": 39.87207794189453, | |
| "learning_rate": 1.949112451306282e-06, | |
| "loss": 0.8712, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.6254826254826255, | |
| "eval_loss": 0.9790379405021667, | |
| "eval_runtime": 1.7458, | |
| "eval_samples_per_second": 148.355, | |
| "eval_steps_per_second": 18.902, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 22.22222222222222, | |
| "grad_norm": 15.800605773925781, | |
| "learning_rate": 1.7510138393732029e-06, | |
| "loss": 0.8973, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 22.685185185185187, | |
| "grad_norm": 13.362822532653809, | |
| "learning_rate": 1.5613749501322705e-06, | |
| "loss": 0.8857, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.6177606177606177, | |
| "eval_loss": 0.9682068824768066, | |
| "eval_runtime": 1.7485, | |
| "eval_samples_per_second": 148.127, | |
| "eval_steps_per_second": 18.873, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 23.14814814814815, | |
| "grad_norm": 26.928611755371094, | |
| "learning_rate": 1.3806895660544805e-06, | |
| "loss": 0.8446, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 23.61111111111111, | |
| "grad_norm": 52.092063903808594, | |
| "learning_rate": 1.2094281564395628e-06, | |
| "loss": 0.8848, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.6254826254826255, | |
| "eval_loss": 0.9722690582275391, | |
| "eval_runtime": 1.7534, | |
| "eval_samples_per_second": 147.714, | |
| "eval_steps_per_second": 18.821, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 24.074074074074073, | |
| "grad_norm": 22.29905128479004, | |
| "learning_rate": 1.0480366524062041e-06, | |
| "loss": 0.8778, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 24.537037037037038, | |
| "grad_norm": 23.949947357177734, | |
| "learning_rate": 8.969352857748842e-07, | |
| "loss": 0.8672, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 31.42759895324707, | |
| "learning_rate": 7.565174948666382e-07, | |
| "loss": 0.904, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.6177606177606177, | |
| "eval_loss": 0.975362241268158, | |
| "eval_runtime": 1.7452, | |
| "eval_samples_per_second": 148.407, | |
| "eval_steps_per_second": 18.909, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 25.462962962962962, | |
| "grad_norm": 20.711061477661133, | |
| "learning_rate": 6.271489000668418e-07, | |
| "loss": 0.7932, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 25.925925925925927, | |
| "grad_norm": 22.23466682434082, | |
| "learning_rate": 5.091663518214407e-07, | |
| "loss": 0.943, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.6216216216216216, | |
| "eval_loss": 0.9710479974746704, | |
| "eval_runtime": 1.7435, | |
| "eval_samples_per_second": 148.554, | |
| "eval_steps_per_second": 18.928, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 26.38888888888889, | |
| "grad_norm": 18.16496467590332, | |
| "learning_rate": 4.0287705354446147e-07, | |
| "loss": 0.9108, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 26.85185185185185, | |
| "grad_norm": 50.363128662109375, | |
| "learning_rate": 3.085577617205765e-07, | |
| "loss": 0.862, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.6177606177606177, | |
| "eval_loss": 0.9716824889183044, | |
| "eval_runtime": 1.7298, | |
| "eval_samples_per_second": 149.728, | |
| "eval_steps_per_second": 19.077, | |
| "step": 2916 | |
| }, | |
| { | |
| "epoch": 27.314814814814813, | |
| "grad_norm": 69.7729721069336, | |
| "learning_rate": 2.2645406528550407e-07, | |
| "loss": 0.8412, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 27.77777777777778, | |
| "grad_norm": 20.13194465637207, | |
| "learning_rate": 1.5677974616058856e-07, | |
| "loss": 0.864, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.6177606177606177, | |
| "eval_loss": 0.9705125689506531, | |
| "eval_runtime": 1.7418, | |
| "eval_samples_per_second": 148.699, | |
| "eval_steps_per_second": 18.946, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 28.24074074074074, | |
| "grad_norm": 29.41867446899414, | |
| "learning_rate": 9.971622260661007e-08, | |
| "loss": 0.8656, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 28.703703703703702, | |
| "grad_norm": 19.62738609313965, | |
| "learning_rate": 5.541207684621908e-08, | |
| "loss": 0.8879, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.6177606177606177, | |
| "eval_loss": 0.97031569480896, | |
| "eval_runtime": 1.7373, | |
| "eval_samples_per_second": 149.084, | |
| "eval_steps_per_second": 18.995, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 29.166666666666668, | |
| "grad_norm": 18.350698471069336, | |
| "learning_rate": 2.398266818496864e-08, | |
| "loss": 0.8594, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 29.62962962962963, | |
| "grad_norm": 28.054824829101562, | |
| "learning_rate": 5.509832638314061e-09, | |
| "loss": 0.8099, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.6177606177606177, | |
| "eval_loss": 0.97026127576828, | |
| "eval_runtime": 1.8767, | |
| "eval_samples_per_second": 138.009, | |
| "eval_steps_per_second": 17.584, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 3240, | |
| "total_flos": 2.0405460235862016e+18, | |
| "train_loss": 1.012899116233543, | |
| "train_runtime": 640.9857, | |
| "train_samples_per_second": 40.251, | |
| "train_steps_per_second": 5.055 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 3240, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.0405460235862016e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |