{ "best_metric": 0.602112676056338, "best_model_checkpoint": "./Validated_cracks_raw_dataset_359_outputs/checkpoint-1620", "epoch": 30.0, "eval_steps": 500, "global_step": 3240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.46296296296296297, "grad_norm": 10.873927116394043, "learning_rate": 2.901234567901235e-06, "loss": 1.415, "step": 50 }, { "epoch": 0.9259259259259259, "grad_norm": 10.678427696228027, "learning_rate": 5.9876543209876546e-06, "loss": 1.3724, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.31338028169014087, "eval_loss": 1.368980884552002, "eval_runtime": 1.9715, "eval_samples_per_second": 144.049, "eval_steps_per_second": 18.26, "step": 108 }, { "epoch": 1.3888888888888888, "grad_norm": 15.727510452270508, "learning_rate": 9.074074074074075e-06, "loss": 1.3621, "step": 150 }, { "epoch": 1.8518518518518519, "grad_norm": 18.13157844543457, "learning_rate": 9.996809987196146e-06, "loss": 1.3528, "step": 200 }, { "epoch": 2.0, "eval_accuracy": 0.4014084507042254, "eval_loss": 1.3014895915985107, "eval_runtime": 1.9678, "eval_samples_per_second": 144.321, "eval_steps_per_second": 18.294, "step": 216 }, { "epoch": 2.314814814814815, "grad_norm": 11.158724784851074, "learning_rate": 9.981195232868493e-06, "loss": 1.2858, "step": 250 }, { "epoch": 2.7777777777777777, "grad_norm": 27.68938446044922, "learning_rate": 9.952610423187516e-06, "loss": 1.2839, "step": 300 }, { "epoch": 3.0, "eval_accuracy": 0.43309859154929575, "eval_loss": 1.2375338077545166, "eval_runtime": 1.9547, "eval_samples_per_second": 145.29, "eval_steps_per_second": 18.417, "step": 324 }, { "epoch": 3.240740740740741, "grad_norm": 16.059484481811523, "learning_rate": 9.91208532978737e-06, "loss": 1.2499, "step": 350 }, { "epoch": 3.7037037037037037, "grad_norm": 10.013123512268066, "learning_rate": 9.85807175279907e-06, "loss": 1.2608, "step": 400 }, { "epoch": 4.0, "eval_accuracy": 0.49295774647887325, "eval_loss": 1.197479486465454, "eval_runtime": 1.947, "eval_samples_per_second": 145.866, "eval_steps_per_second": 18.49, "step": 432 }, { "epoch": 4.166666666666667, "grad_norm": 24.753084182739258, "learning_rate": 9.791408709849578e-06, "loss": 1.2172, "step": 450 }, { "epoch": 4.62962962962963, "grad_norm": 12.23658275604248, "learning_rate": 9.712269778427157e-06, "loss": 1.2178, "step": 500 }, { "epoch": 5.0, "eval_accuracy": 0.5246478873239436, "eval_loss": 1.1367548704147339, "eval_runtime": 1.9388, "eval_samples_per_second": 146.485, "eval_steps_per_second": 18.568, "step": 540 }, { "epoch": 5.092592592592593, "grad_norm": 15.365643501281738, "learning_rate": 9.620861020786583e-06, "loss": 1.2052, "step": 550 }, { "epoch": 5.555555555555555, "grad_norm": 12.787246704101562, "learning_rate": 9.517420447403444e-06, "loss": 1.1624, "step": 600 }, { "epoch": 6.0, "eval_accuracy": 0.4964788732394366, "eval_loss": 1.1164129972457886, "eval_runtime": 1.936, "eval_samples_per_second": 146.695, "eval_steps_per_second": 18.595, "step": 648 }, { "epoch": 6.018518518518518, "grad_norm": 13.462636947631836, "learning_rate": 9.404634840109069e-06, "loss": 1.166, "step": 650 }, { "epoch": 6.481481481481482, "grad_norm": 15.236122131347656, "learning_rate": 9.278195395448948e-06, "loss": 1.1335, "step": 700 }, { "epoch": 6.944444444444445, "grad_norm": 19.687719345092773, "learning_rate": 9.140616369122732e-06, "loss": 1.1108, "step": 750 }, { "epoch": 7.0, "eval_accuracy": 0.528169014084507, "eval_loss": 1.106345772743225, "eval_runtime": 1.9599, "eval_samples_per_second": 144.907, "eval_steps_per_second": 18.368, "step": 756 }, { "epoch": 7.407407407407407, "grad_norm": 16.47871971130371, "learning_rate": 8.992255989929632e-06, "loss": 1.0708, "step": 800 }, { "epoch": 7.87037037037037, "grad_norm": 19.686279296875, "learning_rate": 8.833500559197024e-06, "loss": 1.1028, "step": 850 }, { "epoch": 8.0, "eval_accuracy": 0.5316901408450704, "eval_loss": 1.1173583269119263, "eval_runtime": 1.9473, "eval_samples_per_second": 145.843, "eval_steps_per_second": 18.487, "step": 864 }, { "epoch": 8.333333333333334, "grad_norm": 37.79273986816406, "learning_rate": 8.664763444927562e-06, "loss": 1.0619, "step": 900 }, { "epoch": 8.796296296296296, "grad_norm": 16.420129776000977, "learning_rate": 8.486484005469977e-06, "loss": 1.1023, "step": 950 }, { "epoch": 9.0, "eval_accuracy": 0.545774647887324, "eval_loss": 1.1122897863388062, "eval_runtime": 1.9381, "eval_samples_per_second": 146.538, "eval_steps_per_second": 18.575, "step": 972 }, { "epoch": 9.25925925925926, "grad_norm": 25.430509567260742, "learning_rate": 8.299126445516126e-06, "loss": 1.1072, "step": 1000 }, { "epoch": 9.722222222222221, "grad_norm": 35.4229850769043, "learning_rate": 8.103178607403005e-06, "loss": 1.0572, "step": 1050 }, { "epoch": 10.0, "eval_accuracy": 0.573943661971831, "eval_loss": 1.0754574537277222, "eval_runtime": 1.967, "eval_samples_per_second": 144.38, "eval_steps_per_second": 18.302, "step": 1080 }, { "epoch": 10.185185185185185, "grad_norm": 14.466697692871094, "learning_rate": 7.899150700867014e-06, "loss": 1.0489, "step": 1100 }, { "epoch": 10.648148148148149, "grad_norm": 21.01825714111328, "learning_rate": 7.687573974557857e-06, "loss": 0.9874, "step": 1150 }, { "epoch": 11.0, "eval_accuracy": 0.5598591549295775, "eval_loss": 1.095212459564209, "eval_runtime": 1.9552, "eval_samples_per_second": 145.255, "eval_steps_per_second": 18.413, "step": 1188 }, { "epoch": 11.11111111111111, "grad_norm": 16.48180389404297, "learning_rate": 7.4689993327712765e-06, "loss": 1.0212, "step": 1200 }, { "epoch": 11.574074074074074, "grad_norm": 18.663524627685547, "learning_rate": 7.243995901002312e-06, "loss": 1.0132, "step": 1250 }, { "epoch": 12.0, "eval_accuracy": 0.5774647887323944, "eval_loss": 1.0767173767089844, "eval_runtime": 1.9611, "eval_samples_per_second": 144.815, "eval_steps_per_second": 18.357, "step": 1296 }, { "epoch": 12.037037037037036, "grad_norm": 24.026588439941406, "learning_rate": 7.013149544054148e-06, "loss": 1.0111, "step": 1300 }, { "epoch": 12.5, "grad_norm": 20.475515365600586, "learning_rate": 6.777061340561082e-06, "loss": 1.005, "step": 1350 }, { "epoch": 12.962962962962964, "grad_norm": 49.801326751708984, "learning_rate": 6.5363460178976524e-06, "loss": 0.9898, "step": 1400 }, { "epoch": 13.0, "eval_accuracy": 0.5880281690140845, "eval_loss": 1.061557412147522, "eval_runtime": 1.9611, "eval_samples_per_second": 144.818, "eval_steps_per_second": 18.357, "step": 1404 }, { "epoch": 13.425925925925926, "grad_norm": 38.209232330322266, "learning_rate": 6.291630351549136e-06, "loss": 0.9332, "step": 1450 }, { "epoch": 13.88888888888889, "grad_norm": 16.34885025024414, "learning_rate": 6.043551533111121e-06, "loss": 1.0182, "step": 1500 }, { "epoch": 14.0, "eval_accuracy": 0.5809859154929577, "eval_loss": 1.0419806241989136, "eval_runtime": 1.9564, "eval_samples_per_second": 145.165, "eval_steps_per_second": 18.401, "step": 1512 }, { "epoch": 14.351851851851851, "grad_norm": 21.03826904296875, "learning_rate": 5.792755511167572e-06, "loss": 0.9279, "step": 1550 }, { "epoch": 14.814814814814815, "grad_norm": 34.984375, "learning_rate": 5.544968491028696e-06, "loss": 0.9889, "step": 1600 }, { "epoch": 15.0, "eval_accuracy": 0.602112676056338, "eval_loss": 1.0440722703933716, "eval_runtime": 1.9414, "eval_samples_per_second": 146.284, "eval_steps_per_second": 18.543, "step": 1620 }, { "epoch": 15.277777777777779, "grad_norm": 16.456146240234375, "learning_rate": 5.290724144552379e-06, "loss": 0.9673, "step": 1650 }, { "epoch": 15.74074074074074, "grad_norm": 27.009721755981445, "learning_rate": 5.035722809427533e-06, "loss": 0.9446, "step": 1700 }, { "epoch": 16.0, "eval_accuracy": 0.602112676056338, "eval_loss": 1.0512064695358276, "eval_runtime": 2.126, "eval_samples_per_second": 133.583, "eval_steps_per_second": 16.933, "step": 1728 }, { "epoch": 16.203703703703702, "grad_norm": 12.319013595581055, "learning_rate": 4.780628459113764e-06, "loss": 0.9024, "step": 1750 }, { "epoch": 16.666666666666668, "grad_norm": 20.682802200317383, "learning_rate": 4.526105309263983e-06, "loss": 0.9519, "step": 1800 }, { "epoch": 17.0, "eval_accuracy": 0.5704225352112676, "eval_loss": 1.0736767053604126, "eval_runtime": 2.1743, "eval_samples_per_second": 130.614, "eval_steps_per_second": 16.557, "step": 1836 }, { "epoch": 17.12962962962963, "grad_norm": 24.291671752929688, "learning_rate": 4.272816088237135e-06, "loss": 1.003, "step": 1850 }, { "epoch": 17.59259259259259, "grad_norm": 13.384744644165039, "learning_rate": 4.021420311483538e-06, "loss": 0.9458, "step": 1900 }, { "epoch": 18.0, "eval_accuracy": 0.5669014084507042, "eval_loss": 1.0471343994140625, "eval_runtime": 1.9519, "eval_samples_per_second": 145.497, "eval_steps_per_second": 18.443, "step": 1944 }, { "epoch": 18.055555555555557, "grad_norm": 15.239733695983887, "learning_rate": 3.7725725642960047e-06, "loss": 0.9798, "step": 1950 }, { "epoch": 18.51851851851852, "grad_norm": 24.09659767150879, "learning_rate": 3.526920797398148e-06, "loss": 0.9346, "step": 2000 }, { "epoch": 18.98148148148148, "grad_norm": 23.59765625, "learning_rate": 3.2851046398077705e-06, "loss": 0.9347, "step": 2050 }, { "epoch": 19.0, "eval_accuracy": 0.5845070422535211, "eval_loss": 1.051329255104065, "eval_runtime": 1.9643, "eval_samples_per_second": 144.583, "eval_steps_per_second": 18.327, "step": 2052 }, { "epoch": 19.444444444444443, "grad_norm": 22.252914428710938, "learning_rate": 3.0477537333683815e-06, "loss": 0.8767, "step": 2100 }, { "epoch": 19.90740740740741, "grad_norm": 32.37730026245117, "learning_rate": 2.815486093285317e-06, "loss": 0.8863, "step": 2150 }, { "epoch": 20.0, "eval_accuracy": 0.5950704225352113, "eval_loss": 1.0428193807601929, "eval_runtime": 1.9503, "eval_samples_per_second": 145.616, "eval_steps_per_second": 18.458, "step": 2160 }, { "epoch": 20.37037037037037, "grad_norm": 18.95844841003418, "learning_rate": 2.5889064989353797e-06, "loss": 0.9081, "step": 2200 }, { "epoch": 20.833333333333332, "grad_norm": 84.69548034667969, "learning_rate": 2.3686049191399614e-06, "loss": 0.8507, "step": 2250 }, { "epoch": 21.0, "eval_accuracy": 0.5950704225352113, "eval_loss": 1.0526705980300903, "eval_runtime": 1.9472, "eval_samples_per_second": 145.852, "eval_steps_per_second": 18.488, "step": 2268 }, { "epoch": 21.296296296296298, "grad_norm": 34.8240852355957, "learning_rate": 2.155154976001948e-06, "loss": 0.9308, "step": 2300 }, { "epoch": 21.75925925925926, "grad_norm": 39.87207794189453, "learning_rate": 1.949112451306282e-06, "loss": 0.8712, "step": 2350 }, { "epoch": 22.0, "eval_accuracy": 0.5915492957746479, "eval_loss": 1.056043267250061, "eval_runtime": 1.9559, "eval_samples_per_second": 145.204, "eval_steps_per_second": 18.406, "step": 2376 }, { "epoch": 22.22222222222222, "grad_norm": 15.800605773925781, "learning_rate": 1.7510138393732029e-06, "loss": 0.8973, "step": 2400 }, { "epoch": 22.685185185185187, "grad_norm": 13.362822532653809, "learning_rate": 1.5613749501322705e-06, "loss": 0.8857, "step": 2450 }, { "epoch": 23.0, "eval_accuracy": 0.5880281690140845, "eval_loss": 1.0447120666503906, "eval_runtime": 1.9485, "eval_samples_per_second": 145.752, "eval_steps_per_second": 18.476, "step": 2484 }, { "epoch": 23.14814814814815, "grad_norm": 26.928611755371094, "learning_rate": 1.3806895660544805e-06, "loss": 0.8446, "step": 2500 }, { "epoch": 23.61111111111111, "grad_norm": 52.092063903808594, "learning_rate": 1.2094281564395628e-06, "loss": 0.8848, "step": 2550 }, { "epoch": 24.0, "eval_accuracy": 0.5950704225352113, "eval_loss": 1.051204800605774, "eval_runtime": 1.9468, "eval_samples_per_second": 145.879, "eval_steps_per_second": 18.492, "step": 2592 }, { "epoch": 24.074074074074073, "grad_norm": 22.29905128479004, "learning_rate": 1.0480366524062041e-06, "loss": 0.8778, "step": 2600 }, { "epoch": 24.537037037037038, "grad_norm": 23.949947357177734, "learning_rate": 8.969352857748842e-07, "loss": 0.8672, "step": 2650 }, { "epoch": 25.0, "grad_norm": 31.42759895324707, "learning_rate": 7.565174948666382e-07, "loss": 0.904, "step": 2700 }, { "epoch": 25.0, "eval_accuracy": 0.5845070422535211, "eval_loss": 1.0513179302215576, "eval_runtime": 1.9542, "eval_samples_per_second": 145.331, "eval_steps_per_second": 18.422, "step": 2700 }, { "epoch": 25.462962962962962, "grad_norm": 20.711061477661133, "learning_rate": 6.271489000668418e-07, "loss": 0.7932, "step": 2750 }, { "epoch": 25.925925925925927, "grad_norm": 22.23466682434082, "learning_rate": 5.091663518214407e-07, "loss": 0.943, "step": 2800 }, { "epoch": 26.0, "eval_accuracy": 0.5880281690140845, "eval_loss": 1.04801344871521, "eval_runtime": 1.9702, "eval_samples_per_second": 144.148, "eval_steps_per_second": 18.272, "step": 2808 }, { "epoch": 26.38888888888889, "grad_norm": 18.16496467590332, "learning_rate": 4.0287705354446147e-07, "loss": 0.9108, "step": 2850 }, { "epoch": 26.85185185185185, "grad_norm": 50.363128662109375, "learning_rate": 3.085577617205765e-07, "loss": 0.862, "step": 2900 }, { "epoch": 27.0, "eval_accuracy": 0.5880281690140845, "eval_loss": 1.047555685043335, "eval_runtime": 1.9557, "eval_samples_per_second": 145.219, "eval_steps_per_second": 18.408, "step": 2916 }, { "epoch": 27.314814814814813, "grad_norm": 69.7729721069336, "learning_rate": 2.2645406528550407e-07, "loss": 0.8412, "step": 2950 }, { "epoch": 27.77777777777778, "grad_norm": 20.13194465637207, "learning_rate": 1.5677974616058856e-07, "loss": 0.864, "step": 3000 }, { "epoch": 28.0, "eval_accuracy": 0.5880281690140845, "eval_loss": 1.046897292137146, "eval_runtime": 1.954, "eval_samples_per_second": 145.344, "eval_steps_per_second": 18.424, "step": 3024 }, { "epoch": 28.24074074074074, "grad_norm": 29.41867446899414, "learning_rate": 9.971622260661007e-08, "loss": 0.8656, "step": 3050 }, { "epoch": 28.703703703703702, "grad_norm": 19.62738609313965, "learning_rate": 5.541207684621908e-08, "loss": 0.8879, "step": 3100 }, { "epoch": 29.0, "eval_accuracy": 0.5880281690140845, "eval_loss": 1.04678475856781, "eval_runtime": 1.9435, "eval_samples_per_second": 146.127, "eval_steps_per_second": 18.523, "step": 3132 }, { "epoch": 29.166666666666668, "grad_norm": 18.350698471069336, "learning_rate": 2.398266818496864e-08, "loss": 0.8594, "step": 3150 }, { "epoch": 29.62962962962963, "grad_norm": 28.054824829101562, "learning_rate": 5.509832638314061e-09, "loss": 0.8099, "step": 3200 }, { "epoch": 30.0, "eval_accuracy": 0.5880281690140845, "eval_loss": 1.0467545986175537, "eval_runtime": 3.2936, "eval_samples_per_second": 86.227, "eval_steps_per_second": 10.93, "step": 3240 }, { "epoch": 30.0, "step": 3240, "total_flos": 2.0405460235862016e+18, "train_loss": 1.012899116233543, "train_runtime": 686.1726, "train_samples_per_second": 37.6, "train_steps_per_second": 4.722 } ], "logging_steps": 50, "max_steps": 3240, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.0405460235862016e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }