| { | |
| "best_metric": 0.602112676056338, | |
| "best_model_checkpoint": "./Validated_cracks_raw_dataset_359_outputs/checkpoint-1620", | |
| "epoch": 30.0, | |
| "eval_steps": 500, | |
| "global_step": 3240, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.46296296296296297, | |
| "grad_norm": 10.873927116394043, | |
| "learning_rate": 2.901234567901235e-06, | |
| "loss": 1.415, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.9259259259259259, | |
| "grad_norm": 10.678427696228027, | |
| "learning_rate": 5.9876543209876546e-06, | |
| "loss": 1.3724, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.31338028169014087, | |
| "eval_loss": 1.368980884552002, | |
| "eval_runtime": 1.9715, | |
| "eval_samples_per_second": 144.049, | |
| "eval_steps_per_second": 18.26, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.3888888888888888, | |
| "grad_norm": 15.727510452270508, | |
| "learning_rate": 9.074074074074075e-06, | |
| "loss": 1.3621, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.8518518518518519, | |
| "grad_norm": 18.13157844543457, | |
| "learning_rate": 9.996809987196146e-06, | |
| "loss": 1.3528, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.4014084507042254, | |
| "eval_loss": 1.3014895915985107, | |
| "eval_runtime": 1.9678, | |
| "eval_samples_per_second": 144.321, | |
| "eval_steps_per_second": 18.294, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 2.314814814814815, | |
| "grad_norm": 11.158724784851074, | |
| "learning_rate": 9.981195232868493e-06, | |
| "loss": 1.2858, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "grad_norm": 27.68938446044922, | |
| "learning_rate": 9.952610423187516e-06, | |
| "loss": 1.2839, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.43309859154929575, | |
| "eval_loss": 1.2375338077545166, | |
| "eval_runtime": 1.9547, | |
| "eval_samples_per_second": 145.29, | |
| "eval_steps_per_second": 18.417, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 3.240740740740741, | |
| "grad_norm": 16.059484481811523, | |
| "learning_rate": 9.91208532978737e-06, | |
| "loss": 1.2499, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.7037037037037037, | |
| "grad_norm": 10.013123512268066, | |
| "learning_rate": 9.85807175279907e-06, | |
| "loss": 1.2608, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.49295774647887325, | |
| "eval_loss": 1.197479486465454, | |
| "eval_runtime": 1.947, | |
| "eval_samples_per_second": 145.866, | |
| "eval_steps_per_second": 18.49, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 4.166666666666667, | |
| "grad_norm": 24.753084182739258, | |
| "learning_rate": 9.791408709849578e-06, | |
| "loss": 1.2172, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 4.62962962962963, | |
| "grad_norm": 12.23658275604248, | |
| "learning_rate": 9.712269778427157e-06, | |
| "loss": 1.2178, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.5246478873239436, | |
| "eval_loss": 1.1367548704147339, | |
| "eval_runtime": 1.9388, | |
| "eval_samples_per_second": 146.485, | |
| "eval_steps_per_second": 18.568, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 5.092592592592593, | |
| "grad_norm": 15.365643501281738, | |
| "learning_rate": 9.620861020786583e-06, | |
| "loss": 1.2052, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 5.555555555555555, | |
| "grad_norm": 12.787246704101562, | |
| "learning_rate": 9.517420447403444e-06, | |
| "loss": 1.1624, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.4964788732394366, | |
| "eval_loss": 1.1164129972457886, | |
| "eval_runtime": 1.936, | |
| "eval_samples_per_second": 146.695, | |
| "eval_steps_per_second": 18.595, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 6.018518518518518, | |
| "grad_norm": 13.462636947631836, | |
| "learning_rate": 9.404634840109069e-06, | |
| "loss": 1.166, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 6.481481481481482, | |
| "grad_norm": 15.236122131347656, | |
| "learning_rate": 9.278195395448948e-06, | |
| "loss": 1.1335, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 6.944444444444445, | |
| "grad_norm": 19.687719345092773, | |
| "learning_rate": 9.140616369122732e-06, | |
| "loss": 1.1108, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.528169014084507, | |
| "eval_loss": 1.106345772743225, | |
| "eval_runtime": 1.9599, | |
| "eval_samples_per_second": 144.907, | |
| "eval_steps_per_second": 18.368, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 7.407407407407407, | |
| "grad_norm": 16.47871971130371, | |
| "learning_rate": 8.992255989929632e-06, | |
| "loss": 1.0708, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 7.87037037037037, | |
| "grad_norm": 19.686279296875, | |
| "learning_rate": 8.833500559197024e-06, | |
| "loss": 1.1028, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.5316901408450704, | |
| "eval_loss": 1.1173583269119263, | |
| "eval_runtime": 1.9473, | |
| "eval_samples_per_second": 145.843, | |
| "eval_steps_per_second": 18.487, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 8.333333333333334, | |
| "grad_norm": 37.79273986816406, | |
| "learning_rate": 8.664763444927562e-06, | |
| "loss": 1.0619, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 8.796296296296296, | |
| "grad_norm": 16.420129776000977, | |
| "learning_rate": 8.486484005469977e-06, | |
| "loss": 1.1023, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.545774647887324, | |
| "eval_loss": 1.1122897863388062, | |
| "eval_runtime": 1.9381, | |
| "eval_samples_per_second": 146.538, | |
| "eval_steps_per_second": 18.575, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 9.25925925925926, | |
| "grad_norm": 25.430509567260742, | |
| "learning_rate": 8.299126445516126e-06, | |
| "loss": 1.1072, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 9.722222222222221, | |
| "grad_norm": 35.4229850769043, | |
| "learning_rate": 8.103178607403005e-06, | |
| "loss": 1.0572, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.573943661971831, | |
| "eval_loss": 1.0754574537277222, | |
| "eval_runtime": 1.967, | |
| "eval_samples_per_second": 144.38, | |
| "eval_steps_per_second": 18.302, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 10.185185185185185, | |
| "grad_norm": 14.466697692871094, | |
| "learning_rate": 7.899150700867014e-06, | |
| "loss": 1.0489, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 10.648148148148149, | |
| "grad_norm": 21.01825714111328, | |
| "learning_rate": 7.687573974557857e-06, | |
| "loss": 0.9874, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.5598591549295775, | |
| "eval_loss": 1.095212459564209, | |
| "eval_runtime": 1.9552, | |
| "eval_samples_per_second": 145.255, | |
| "eval_steps_per_second": 18.413, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 11.11111111111111, | |
| "grad_norm": 16.48180389404297, | |
| "learning_rate": 7.4689993327712765e-06, | |
| "loss": 1.0212, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 11.574074074074074, | |
| "grad_norm": 18.663524627685547, | |
| "learning_rate": 7.243995901002312e-06, | |
| "loss": 1.0132, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.5774647887323944, | |
| "eval_loss": 1.0767173767089844, | |
| "eval_runtime": 1.9611, | |
| "eval_samples_per_second": 144.815, | |
| "eval_steps_per_second": 18.357, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 12.037037037037036, | |
| "grad_norm": 24.026588439941406, | |
| "learning_rate": 7.013149544054148e-06, | |
| "loss": 1.0111, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "grad_norm": 20.475515365600586, | |
| "learning_rate": 6.777061340561082e-06, | |
| "loss": 1.005, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 12.962962962962964, | |
| "grad_norm": 49.801326751708984, | |
| "learning_rate": 6.5363460178976524e-06, | |
| "loss": 0.9898, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.5880281690140845, | |
| "eval_loss": 1.061557412147522, | |
| "eval_runtime": 1.9611, | |
| "eval_samples_per_second": 144.818, | |
| "eval_steps_per_second": 18.357, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 13.425925925925926, | |
| "grad_norm": 38.209232330322266, | |
| "learning_rate": 6.291630351549136e-06, | |
| "loss": 0.9332, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 13.88888888888889, | |
| "grad_norm": 16.34885025024414, | |
| "learning_rate": 6.043551533111121e-06, | |
| "loss": 1.0182, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.5809859154929577, | |
| "eval_loss": 1.0419806241989136, | |
| "eval_runtime": 1.9564, | |
| "eval_samples_per_second": 145.165, | |
| "eval_steps_per_second": 18.401, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 14.351851851851851, | |
| "grad_norm": 21.03826904296875, | |
| "learning_rate": 5.792755511167572e-06, | |
| "loss": 0.9279, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 14.814814814814815, | |
| "grad_norm": 34.984375, | |
| "learning_rate": 5.544968491028696e-06, | |
| "loss": 0.9889, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.602112676056338, | |
| "eval_loss": 1.0440722703933716, | |
| "eval_runtime": 1.9414, | |
| "eval_samples_per_second": 146.284, | |
| "eval_steps_per_second": 18.543, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 15.277777777777779, | |
| "grad_norm": 16.456146240234375, | |
| "learning_rate": 5.290724144552379e-06, | |
| "loss": 0.9673, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 15.74074074074074, | |
| "grad_norm": 27.009721755981445, | |
| "learning_rate": 5.035722809427533e-06, | |
| "loss": 0.9446, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.602112676056338, | |
| "eval_loss": 1.0512064695358276, | |
| "eval_runtime": 2.126, | |
| "eval_samples_per_second": 133.583, | |
| "eval_steps_per_second": 16.933, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 16.203703703703702, | |
| "grad_norm": 12.319013595581055, | |
| "learning_rate": 4.780628459113764e-06, | |
| "loss": 0.9024, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 16.666666666666668, | |
| "grad_norm": 20.682802200317383, | |
| "learning_rate": 4.526105309263983e-06, | |
| "loss": 0.9519, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.5704225352112676, | |
| "eval_loss": 1.0736767053604126, | |
| "eval_runtime": 2.1743, | |
| "eval_samples_per_second": 130.614, | |
| "eval_steps_per_second": 16.557, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 17.12962962962963, | |
| "grad_norm": 24.291671752929688, | |
| "learning_rate": 4.272816088237135e-06, | |
| "loss": 1.003, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 17.59259259259259, | |
| "grad_norm": 13.384744644165039, | |
| "learning_rate": 4.021420311483538e-06, | |
| "loss": 0.9458, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.5669014084507042, | |
| "eval_loss": 1.0471343994140625, | |
| "eval_runtime": 1.9519, | |
| "eval_samples_per_second": 145.497, | |
| "eval_steps_per_second": 18.443, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 18.055555555555557, | |
| "grad_norm": 15.239733695983887, | |
| "learning_rate": 3.7725725642960047e-06, | |
| "loss": 0.9798, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 18.51851851851852, | |
| "grad_norm": 24.09659767150879, | |
| "learning_rate": 3.526920797398148e-06, | |
| "loss": 0.9346, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 18.98148148148148, | |
| "grad_norm": 23.59765625, | |
| "learning_rate": 3.2851046398077705e-06, | |
| "loss": 0.9347, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.5845070422535211, | |
| "eval_loss": 1.051329255104065, | |
| "eval_runtime": 1.9643, | |
| "eval_samples_per_second": 144.583, | |
| "eval_steps_per_second": 18.327, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 19.444444444444443, | |
| "grad_norm": 22.252914428710938, | |
| "learning_rate": 3.0477537333683815e-06, | |
| "loss": 0.8767, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 19.90740740740741, | |
| "grad_norm": 32.37730026245117, | |
| "learning_rate": 2.815486093285317e-06, | |
| "loss": 0.8863, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.5950704225352113, | |
| "eval_loss": 1.0428193807601929, | |
| "eval_runtime": 1.9503, | |
| "eval_samples_per_second": 145.616, | |
| "eval_steps_per_second": 18.458, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 20.37037037037037, | |
| "grad_norm": 18.95844841003418, | |
| "learning_rate": 2.5889064989353797e-06, | |
| "loss": 0.9081, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 20.833333333333332, | |
| "grad_norm": 84.69548034667969, | |
| "learning_rate": 2.3686049191399614e-06, | |
| "loss": 0.8507, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.5950704225352113, | |
| "eval_loss": 1.0526705980300903, | |
| "eval_runtime": 1.9472, | |
| "eval_samples_per_second": 145.852, | |
| "eval_steps_per_second": 18.488, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 21.296296296296298, | |
| "grad_norm": 34.8240852355957, | |
| "learning_rate": 2.155154976001948e-06, | |
| "loss": 0.9308, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 21.75925925925926, | |
| "grad_norm": 39.87207794189453, | |
| "learning_rate": 1.949112451306282e-06, | |
| "loss": 0.8712, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.5915492957746479, | |
| "eval_loss": 1.056043267250061, | |
| "eval_runtime": 1.9559, | |
| "eval_samples_per_second": 145.204, | |
| "eval_steps_per_second": 18.406, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 22.22222222222222, | |
| "grad_norm": 15.800605773925781, | |
| "learning_rate": 1.7510138393732029e-06, | |
| "loss": 0.8973, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 22.685185185185187, | |
| "grad_norm": 13.362822532653809, | |
| "learning_rate": 1.5613749501322705e-06, | |
| "loss": 0.8857, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.5880281690140845, | |
| "eval_loss": 1.0447120666503906, | |
| "eval_runtime": 1.9485, | |
| "eval_samples_per_second": 145.752, | |
| "eval_steps_per_second": 18.476, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 23.14814814814815, | |
| "grad_norm": 26.928611755371094, | |
| "learning_rate": 1.3806895660544805e-06, | |
| "loss": 0.8446, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 23.61111111111111, | |
| "grad_norm": 52.092063903808594, | |
| "learning_rate": 1.2094281564395628e-06, | |
| "loss": 0.8848, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.5950704225352113, | |
| "eval_loss": 1.051204800605774, | |
| "eval_runtime": 1.9468, | |
| "eval_samples_per_second": 145.879, | |
| "eval_steps_per_second": 18.492, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 24.074074074074073, | |
| "grad_norm": 22.29905128479004, | |
| "learning_rate": 1.0480366524062041e-06, | |
| "loss": 0.8778, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 24.537037037037038, | |
| "grad_norm": 23.949947357177734, | |
| "learning_rate": 8.969352857748842e-07, | |
| "loss": 0.8672, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 31.42759895324707, | |
| "learning_rate": 7.565174948666382e-07, | |
| "loss": 0.904, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.5845070422535211, | |
| "eval_loss": 1.0513179302215576, | |
| "eval_runtime": 1.9542, | |
| "eval_samples_per_second": 145.331, | |
| "eval_steps_per_second": 18.422, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 25.462962962962962, | |
| "grad_norm": 20.711061477661133, | |
| "learning_rate": 6.271489000668418e-07, | |
| "loss": 0.7932, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 25.925925925925927, | |
| "grad_norm": 22.23466682434082, | |
| "learning_rate": 5.091663518214407e-07, | |
| "loss": 0.943, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.5880281690140845, | |
| "eval_loss": 1.04801344871521, | |
| "eval_runtime": 1.9702, | |
| "eval_samples_per_second": 144.148, | |
| "eval_steps_per_second": 18.272, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 26.38888888888889, | |
| "grad_norm": 18.16496467590332, | |
| "learning_rate": 4.0287705354446147e-07, | |
| "loss": 0.9108, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 26.85185185185185, | |
| "grad_norm": 50.363128662109375, | |
| "learning_rate": 3.085577617205765e-07, | |
| "loss": 0.862, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.5880281690140845, | |
| "eval_loss": 1.047555685043335, | |
| "eval_runtime": 1.9557, | |
| "eval_samples_per_second": 145.219, | |
| "eval_steps_per_second": 18.408, | |
| "step": 2916 | |
| }, | |
| { | |
| "epoch": 27.314814814814813, | |
| "grad_norm": 69.7729721069336, | |
| "learning_rate": 2.2645406528550407e-07, | |
| "loss": 0.8412, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 27.77777777777778, | |
| "grad_norm": 20.13194465637207, | |
| "learning_rate": 1.5677974616058856e-07, | |
| "loss": 0.864, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.5880281690140845, | |
| "eval_loss": 1.046897292137146, | |
| "eval_runtime": 1.954, | |
| "eval_samples_per_second": 145.344, | |
| "eval_steps_per_second": 18.424, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 28.24074074074074, | |
| "grad_norm": 29.41867446899414, | |
| "learning_rate": 9.971622260661007e-08, | |
| "loss": 0.8656, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 28.703703703703702, | |
| "grad_norm": 19.62738609313965, | |
| "learning_rate": 5.541207684621908e-08, | |
| "loss": 0.8879, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.5880281690140845, | |
| "eval_loss": 1.04678475856781, | |
| "eval_runtime": 1.9435, | |
| "eval_samples_per_second": 146.127, | |
| "eval_steps_per_second": 18.523, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 29.166666666666668, | |
| "grad_norm": 18.350698471069336, | |
| "learning_rate": 2.398266818496864e-08, | |
| "loss": 0.8594, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 29.62962962962963, | |
| "grad_norm": 28.054824829101562, | |
| "learning_rate": 5.509832638314061e-09, | |
| "loss": 0.8099, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.5880281690140845, | |
| "eval_loss": 1.0467545986175537, | |
| "eval_runtime": 3.2936, | |
| "eval_samples_per_second": 86.227, | |
| "eval_steps_per_second": 10.93, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 3240, | |
| "total_flos": 2.0405460235862016e+18, | |
| "train_loss": 1.012899116233543, | |
| "train_runtime": 686.1726, | |
| "train_samples_per_second": 37.6, | |
| "train_steps_per_second": 4.722 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 3240, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.0405460235862016e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |