| { | |
| "best_metric": 0.5330188679245284, | |
| "best_model_checkpoint": "./Validated_cracks_raw_dataset_266_outputs/checkpoint-1280", | |
| "epoch": 30.0, | |
| "eval_steps": 500, | |
| "global_step": 2400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 12.7417573928833, | |
| "learning_rate": 3.916666666666667e-06, | |
| "loss": 1.3904, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.35377358490566035, | |
| "eval_loss": 1.347093939781189, | |
| "eval_runtime": 1.5501, | |
| "eval_samples_per_second": 136.765, | |
| "eval_steps_per_second": 17.418, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 13.116891860961914, | |
| "learning_rate": 8.083333333333334e-06, | |
| "loss": 1.3764, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.875, | |
| "grad_norm": Infinity, | |
| "learning_rate": 9.996791734463078e-06, | |
| "loss": 1.3264, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.33962264150943394, | |
| "eval_loss": 1.2973852157592773, | |
| "eval_runtime": 1.5451, | |
| "eval_samples_per_second": 137.208, | |
| "eval_steps_per_second": 17.475, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 11.807487487792969, | |
| "learning_rate": 9.972609476841368e-06, | |
| "loss": 1.2875, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.3490566037735849, | |
| "eval_loss": 1.2867087125778198, | |
| "eval_runtime": 1.5667, | |
| "eval_samples_per_second": 135.313, | |
| "eval_steps_per_second": 17.233, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.125, | |
| "grad_norm": 13.677336692810059, | |
| "learning_rate": 9.92483424862726e-06, | |
| "loss": 1.2754, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 11.059372901916504, | |
| "learning_rate": 9.8536927234736e-06, | |
| "loss": 1.2415, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.37735849056603776, | |
| "eval_loss": 1.257009744644165, | |
| "eval_runtime": 1.5386, | |
| "eval_samples_per_second": 137.789, | |
| "eval_steps_per_second": 17.549, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.375, | |
| "grad_norm": 9.970629692077637, | |
| "learning_rate": 9.759522438425813e-06, | |
| "loss": 1.2347, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 36.81943130493164, | |
| "learning_rate": 9.642770192448537e-06, | |
| "loss": 1.2209, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.4339622641509434, | |
| "eval_loss": 1.214514136314392, | |
| "eval_runtime": 1.5466, | |
| "eval_samples_per_second": 137.075, | |
| "eval_steps_per_second": 17.458, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.625, | |
| "grad_norm": 25.951169967651367, | |
| "learning_rate": 9.50398992654976e-06, | |
| "loss": 1.1699, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.41509433962264153, | |
| "eval_loss": 1.2138594388961792, | |
| "eval_runtime": 1.5575, | |
| "eval_samples_per_second": 136.117, | |
| "eval_steps_per_second": 17.336, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "grad_norm": 33.16694259643555, | |
| "learning_rate": 9.343840095560373e-06, | |
| "loss": 1.1707, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 6.875, | |
| "grad_norm": 15.973888397216797, | |
| "learning_rate": 9.163080544038953e-06, | |
| "loss": 1.1315, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.46226415094339623, | |
| "eval_loss": 1.1762322187423706, | |
| "eval_runtime": 1.548, | |
| "eval_samples_per_second": 136.955, | |
| "eval_steps_per_second": 17.442, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "grad_norm": 16.8402042388916, | |
| "learning_rate": 8.962568901124326e-06, | |
| "loss": 1.1565, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.45754716981132076, | |
| "eval_loss": 1.1620187759399414, | |
| "eval_runtime": 1.5613, | |
| "eval_samples_per_second": 135.788, | |
| "eval_steps_per_second": 17.294, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 8.125, | |
| "grad_norm": 27.61142921447754, | |
| "learning_rate": 8.743256511440829e-06, | |
| "loss": 1.1218, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "grad_norm": 14.339193344116211, | |
| "learning_rate": 8.511092281712174e-06, | |
| "loss": 1.1111, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.4811320754716981, | |
| "eval_loss": 1.1559137105941772, | |
| "eval_runtime": 1.5476, | |
| "eval_samples_per_second": 136.986, | |
| "eval_steps_per_second": 17.446, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 9.375, | |
| "grad_norm": 22.12592124938965, | |
| "learning_rate": 8.257705467351144e-06, | |
| "loss": 1.0349, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 30.31277847290039, | |
| "learning_rate": 7.988862191016204e-06, | |
| "loss": 1.117, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.125497817993164, | |
| "eval_runtime": 1.5617, | |
| "eval_samples_per_second": 135.746, | |
| "eval_steps_per_second": 17.288, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 10.625, | |
| "grad_norm": 53.451171875, | |
| "learning_rate": 7.705838002605665e-06, | |
| "loss": 1.0174, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.5047169811320755, | |
| "eval_loss": 1.1186039447784424, | |
| "eval_runtime": 1.5425, | |
| "eval_samples_per_second": 137.44, | |
| "eval_steps_per_second": 17.504, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "grad_norm": 18.92786979675293, | |
| "learning_rate": 7.409975734566998e-06, | |
| "loss": 1.0209, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 11.875, | |
| "grad_norm": 25.6451358795166, | |
| "learning_rate": 7.102679130713538e-06, | |
| "loss": 1.0569, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.1092461347579956, | |
| "eval_runtime": 1.5482, | |
| "eval_samples_per_second": 136.93, | |
| "eval_steps_per_second": 17.439, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "grad_norm": 16.849384307861328, | |
| "learning_rate": 6.785406186042e-06, | |
| "loss": 1.0089, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.1156014204025269, | |
| "eval_runtime": 1.5482, | |
| "eval_samples_per_second": 136.935, | |
| "eval_steps_per_second": 17.44, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 13.125, | |
| "grad_norm": 29.936918258666992, | |
| "learning_rate": 6.45966222915063e-06, | |
| "loss": 1.0412, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 13.75, | |
| "grad_norm": 46.736331939697266, | |
| "learning_rate": 6.126992780079032e-06, | |
| "loss": 1.0413, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.49528301886792453, | |
| "eval_loss": 1.108467698097229, | |
| "eval_runtime": 1.5451, | |
| "eval_samples_per_second": 137.209, | |
| "eval_steps_per_second": 17.475, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 14.375, | |
| "grad_norm": 35.94117736816406, | |
| "learning_rate": 5.788976217456275e-06, | |
| "loss": 1.019, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 23.572219848632812, | |
| "learning_rate": 5.447216289748596e-06, | |
| "loss": 0.9958, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.5235849056603774, | |
| "eval_loss": 1.115731954574585, | |
| "eval_runtime": 1.5428, | |
| "eval_samples_per_second": 137.41, | |
| "eval_steps_per_second": 17.5, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 15.625, | |
| "grad_norm": 19.54017448425293, | |
| "learning_rate": 5.103334506137773e-06, | |
| "loss": 0.9969, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.5330188679245284, | |
| "eval_loss": 1.1048997640609741, | |
| "eval_runtime": 1.5622, | |
| "eval_samples_per_second": 135.705, | |
| "eval_steps_per_second": 17.283, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 16.25, | |
| "grad_norm": 33.59697341918945, | |
| "learning_rate": 4.758962443132227e-06, | |
| "loss": 0.9203, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 16.875, | |
| "grad_norm": 22.073335647583008, | |
| "learning_rate": 4.415734003412873e-06, | |
| "loss": 0.9918, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.5047169811320755, | |
| "eval_loss": 1.1045137643814087, | |
| "eval_runtime": 1.5338, | |
| "eval_samples_per_second": 138.214, | |
| "eval_steps_per_second": 17.603, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "grad_norm": 39.07722473144531, | |
| "learning_rate": 4.075277663642208e-06, | |
| "loss": 0.9798, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.5141509433962265, | |
| "eval_loss": 1.0932115316390991, | |
| "eval_runtime": 1.5496, | |
| "eval_samples_per_second": 136.805, | |
| "eval_steps_per_second": 17.423, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 18.125, | |
| "grad_norm": 34.9410285949707, | |
| "learning_rate": 3.739208748017647e-06, | |
| "loss": 0.9567, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 18.75, | |
| "grad_norm": 13.545350074768066, | |
| "learning_rate": 3.409121764227809e-06, | |
| "loss": 0.9232, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.5047169811320755, | |
| "eval_loss": 1.0961326360702515, | |
| "eval_runtime": 1.5504, | |
| "eval_samples_per_second": 136.736, | |
| "eval_steps_per_second": 17.414, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 19.375, | |
| "grad_norm": 21.65803337097168, | |
| "learning_rate": 3.0865828381745515e-06, | |
| "loss": 0.9919, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 24.88987159729004, | |
| "learning_rate": 2.7731222833547842e-06, | |
| "loss": 0.8817, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.49056603773584906, | |
| "eval_loss": 1.1115567684173584, | |
| "eval_runtime": 1.544, | |
| "eval_samples_per_second": 137.306, | |
| "eval_steps_per_second": 17.487, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 20.625, | |
| "grad_norm": 33.98754119873047, | |
| "learning_rate": 2.470227340157316e-06, | |
| "loss": 0.9587, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.5235849056603774, | |
| "eval_loss": 1.1000138521194458, | |
| "eval_runtime": 1.5478, | |
| "eval_samples_per_second": 136.971, | |
| "eval_steps_per_second": 17.444, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 21.25, | |
| "grad_norm": 18.886362075805664, | |
| "learning_rate": 2.179335119523745e-06, | |
| "loss": 0.8872, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 21.875, | |
| "grad_norm": 45.27411651611328, | |
| "learning_rate": 1.901825784452777e-06, | |
| "loss": 0.964, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.5094339622641509, | |
| "eval_loss": 1.097833275794983, | |
| "eval_runtime": 1.5565, | |
| "eval_samples_per_second": 136.207, | |
| "eval_steps_per_second": 17.347, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "grad_norm": 46.455753326416016, | |
| "learning_rate": 1.6390160016989487e-06, | |
| "loss": 0.8906, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.5235849056603774, | |
| "eval_loss": 1.0974699258804321, | |
| "eval_runtime": 1.5424, | |
| "eval_samples_per_second": 137.448, | |
| "eval_steps_per_second": 17.505, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 23.125, | |
| "grad_norm": 40.78911590576172, | |
| "learning_rate": 1.3921526947346902e-06, | |
| "loss": 0.945, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 23.75, | |
| "grad_norm": 23.577985763549805, | |
| "learning_rate": 1.162407127615357e-06, | |
| "loss": 0.896, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.5141509433962265, | |
| "eval_loss": 1.099574089050293, | |
| "eval_runtime": 1.549, | |
| "eval_samples_per_second": 136.865, | |
| "eval_steps_per_second": 17.431, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 24.375, | |
| "grad_norm": 33.863609313964844, | |
| "learning_rate": 9.508693478168346e-07, | |
| "loss": 0.891, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 27.020963668823242, | |
| "learning_rate": 7.585430144121319e-07, | |
| "loss": 0.9156, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.5235849056603774, | |
| "eval_loss": 1.0951563119888306, | |
| "eval_runtime": 1.5411, | |
| "eval_samples_per_second": 137.565, | |
| "eval_steps_per_second": 17.52, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 25.625, | |
| "grad_norm": 15.748483657836914, | |
| "learning_rate": 5.863406361251472e-07, | |
| "loss": 0.8797, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.5235849056603774, | |
| "eval_loss": 1.0959974527359009, | |
| "eval_runtime": 1.5455, | |
| "eval_samples_per_second": 137.175, | |
| "eval_steps_per_second": 17.47, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 26.25, | |
| "grad_norm": 25.159223556518555, | |
| "learning_rate": 4.350792418550509e-07, | |
| "loss": 0.9148, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 26.875, | |
| "grad_norm": 18.654874801635742, | |
| "learning_rate": 3.0547650421285216e-07, | |
| "loss": 0.8781, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.5235849056603774, | |
| "eval_loss": 1.0947679281234741, | |
| "eval_runtime": 1.5435, | |
| "eval_samples_per_second": 137.346, | |
| "eval_steps_per_second": 17.492, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 27.5, | |
| "grad_norm": 37.69408416748047, | |
| "learning_rate": 1.9814733446237356e-07, | |
| "loss": 0.8698, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.5235849056603774, | |
| "eval_loss": 1.0946481227874756, | |
| "eval_runtime": 1.5439, | |
| "eval_samples_per_second": 137.312, | |
| "eval_steps_per_second": 17.488, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 28.125, | |
| "grad_norm": 18.448745727539062, | |
| "learning_rate": 1.1360096502120387e-07, | |
| "loss": 0.915, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 28.75, | |
| "grad_norm": 25.56351661682129, | |
| "learning_rate": 5.223853336398632e-08, | |
| "loss": 0.9, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.5283018867924528, | |
| "eval_loss": 1.0949124097824097, | |
| "eval_runtime": 1.5384, | |
| "eval_samples_per_second": 137.806, | |
| "eval_steps_per_second": 17.551, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 29.375, | |
| "grad_norm": 20.619009017944336, | |
| "learning_rate": 1.4351178791384702e-08, | |
| "loss": 0.9145, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 16.861053466796875, | |
| "learning_rate": 1.1866109479674593e-10, | |
| "loss": 0.8853, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.5283018867924528, | |
| "eval_loss": 1.0948740243911743, | |
| "eval_runtime": 2.7457, | |
| "eval_samples_per_second": 77.212, | |
| "eval_steps_per_second": 9.834, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 2400, | |
| "total_flos": 1.5090549662800282e+18, | |
| "train_loss": 1.0357336870829263, | |
| "train_runtime": 587.3439, | |
| "train_samples_per_second": 32.485, | |
| "train_steps_per_second": 4.086 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 2400, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.5090549662800282e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |