| { | |
| "best_metric": 1.3909834623336792, | |
| "best_model_checkpoint": "finetuning/output/bart-base-finetuned_xe_ey_fae/checkpoint-25000", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 25377, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.804547424833511e-06, | |
| "loss": 5.4226, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_accuracy": 0.3627901941481408, | |
| "eval_loss": 3.8137550354003906, | |
| "eval_runtime": 98.6024, | |
| "eval_samples_per_second": 171.679, | |
| "eval_steps_per_second": 21.46, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.607518619222132e-06, | |
| "loss": 4.0408, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.46300121473546585, | |
| "eval_loss": 3.057621717453003, | |
| "eval_runtime": 99.414, | |
| "eval_samples_per_second": 170.278, | |
| "eval_steps_per_second": 21.285, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 9.41048981361075e-06, | |
| "loss": 3.4979, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 0.5132904448434071, | |
| "eval_loss": 2.70158314704895, | |
| "eval_runtime": 99.9098, | |
| "eval_samples_per_second": 169.433, | |
| "eval_steps_per_second": 21.179, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.21346100799937e-06, | |
| "loss": 3.1691, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.5430825323065444, | |
| "eval_loss": 2.4879872798919678, | |
| "eval_runtime": 99.733, | |
| "eval_samples_per_second": 169.733, | |
| "eval_steps_per_second": 21.217, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 9.01643220238799e-06, | |
| "loss": 2.9564, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_accuracy": 0.5644360825553116, | |
| "eval_loss": 2.330946445465088, | |
| "eval_runtime": 100.0932, | |
| "eval_samples_per_second": 169.122, | |
| "eval_steps_per_second": 21.14, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 8.819797454387831e-06, | |
| "loss": 2.8078, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_accuracy": 0.5792018144043999, | |
| "eval_loss": 2.232025384902954, | |
| "eval_runtime": 100.0923, | |
| "eval_samples_per_second": 169.124, | |
| "eval_steps_per_second": 21.14, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 8.622768648776452e-06, | |
| "loss": 2.6741, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.592379386392151, | |
| "eval_loss": 2.1506171226501465, | |
| "eval_runtime": 99.9507, | |
| "eval_samples_per_second": 169.364, | |
| "eval_steps_per_second": 21.17, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 8.425739843165071e-06, | |
| "loss": 2.5323, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.617633758132823, | |
| "eval_loss": 1.9845681190490723, | |
| "eval_runtime": 100.0279, | |
| "eval_samples_per_second": 169.233, | |
| "eval_steps_per_second": 21.154, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 8.22871103755369e-06, | |
| "loss": 2.3678, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.6374534268418744, | |
| "eval_loss": 1.8812607526779175, | |
| "eval_runtime": 100.101, | |
| "eval_samples_per_second": 169.109, | |
| "eval_steps_per_second": 21.139, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.03168223194231e-06, | |
| "loss": 2.25, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.6496838449438552, | |
| "eval_loss": 1.809983253479004, | |
| "eval_runtime": 100.2479, | |
| "eval_samples_per_second": 168.861, | |
| "eval_steps_per_second": 21.108, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 7.83465342633093e-06, | |
| "loss": 2.1795, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.6579494225370981, | |
| "eval_loss": 1.7632389068603516, | |
| "eval_runtime": 100.0951, | |
| "eval_samples_per_second": 169.119, | |
| "eval_steps_per_second": 21.14, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 7.63762462071955e-06, | |
| "loss": 2.1203, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.664559097259069, | |
| "eval_loss": 1.7238309383392334, | |
| "eval_runtime": 99.9087, | |
| "eval_samples_per_second": 169.435, | |
| "eval_steps_per_second": 21.179, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 7.440595815108169e-06, | |
| "loss": 2.0764, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.6713205569113848, | |
| "eval_loss": 1.6855953931808472, | |
| "eval_runtime": 100.047, | |
| "eval_samples_per_second": 169.201, | |
| "eval_steps_per_second": 21.15, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 7.2435670094967895e-06, | |
| "loss": 2.026, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.6759595736369565, | |
| "eval_loss": 1.6568557024002075, | |
| "eval_runtime": 99.903, | |
| "eval_samples_per_second": 169.444, | |
| "eval_steps_per_second": 21.181, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.046932261496632e-06, | |
| "loss": 1.9942, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.6803347736385223, | |
| "eval_loss": 1.6309233903884888, | |
| "eval_runtime": 100.1047, | |
| "eval_samples_per_second": 169.103, | |
| "eval_steps_per_second": 21.138, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 6.849903455885251e-06, | |
| "loss": 1.9665, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.6836478246699454, | |
| "eval_loss": 1.612231731414795, | |
| "eval_runtime": 206.2817, | |
| "eval_samples_per_second": 82.063, | |
| "eval_steps_per_second": 10.258, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 6.652874650273871e-06, | |
| "loss": 1.9395, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6866433413548132, | |
| "eval_loss": 1.5912940502166748, | |
| "eval_runtime": 206.5703, | |
| "eval_samples_per_second": 81.948, | |
| "eval_steps_per_second": 10.243, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 6.455845844662491e-06, | |
| "loss": 1.9155, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_accuracy": 0.6894629039599454, | |
| "eval_loss": 1.5758066177368164, | |
| "eval_runtime": 206.7537, | |
| "eval_samples_per_second": 81.875, | |
| "eval_steps_per_second": 10.234, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 6.25881703905111e-06, | |
| "loss": 1.8828, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_accuracy": 0.6918324332777558, | |
| "eval_loss": 1.5607072114944458, | |
| "eval_runtime": 203.7553, | |
| "eval_samples_per_second": 83.08, | |
| "eval_steps_per_second": 10.385, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 6.06178823343973e-06, | |
| "loss": 1.8721, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_accuracy": 0.6948063170580184, | |
| "eval_loss": 1.5421587228775024, | |
| "eval_runtime": 205.9617, | |
| "eval_samples_per_second": 82.19, | |
| "eval_steps_per_second": 10.274, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 5.8647594278283496e-06, | |
| "loss": 1.8474, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_accuracy": 0.6963892745418871, | |
| "eval_loss": 1.5320152044296265, | |
| "eval_runtime": 206.4027, | |
| "eval_samples_per_second": 82.014, | |
| "eval_steps_per_second": 10.252, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 5.667730622216968e-06, | |
| "loss": 1.8293, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_accuracy": 0.6978303363523796, | |
| "eval_loss": 1.5213782787322998, | |
| "eval_runtime": 206.4515, | |
| "eval_samples_per_second": 81.995, | |
| "eval_steps_per_second": 10.249, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 5.471095874216811e-06, | |
| "loss": 1.8129, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_accuracy": 0.6997515674908317, | |
| "eval_loss": 1.5102019309997559, | |
| "eval_runtime": 203.4625, | |
| "eval_samples_per_second": 83.2, | |
| "eval_steps_per_second": 10.4, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 5.274067068605431e-06, | |
| "loss": 1.8148, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_accuracy": 0.7013130680794967, | |
| "eval_loss": 1.5009928941726685, | |
| "eval_runtime": 206.7456, | |
| "eval_samples_per_second": 81.878, | |
| "eval_steps_per_second": 10.235, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 5.077038262994051e-06, | |
| "loss": 1.7903, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_accuracy": 0.7037519606361885, | |
| "eval_loss": 1.484366774559021, | |
| "eval_runtime": 207.0125, | |
| "eval_samples_per_second": 81.773, | |
| "eval_steps_per_second": 10.222, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.88000945738267e-06, | |
| "loss": 1.7815, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_accuracy": 0.7039102273054718, | |
| "eval_loss": 1.4823458194732666, | |
| "eval_runtime": 206.0669, | |
| "eval_samples_per_second": 82.148, | |
| "eval_steps_per_second": 10.269, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 4.68298065177129e-06, | |
| "loss": 1.7637, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_accuracy": 0.705173223800616, | |
| "eval_loss": 1.4746402502059937, | |
| "eval_runtime": 202.4173, | |
| "eval_samples_per_second": 83.629, | |
| "eval_steps_per_second": 10.454, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 4.485951846159909e-06, | |
| "loss": 1.7623, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_accuracy": 0.706123367116372, | |
| "eval_loss": 1.470130205154419, | |
| "eval_runtime": 205.8377, | |
| "eval_samples_per_second": 82.24, | |
| "eval_steps_per_second": 10.28, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 4.289317098159752e-06, | |
| "loss": 1.7402, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_accuracy": 0.7075649407306767, | |
| "eval_loss": 1.4597938060760498, | |
| "eval_runtime": 206.177, | |
| "eval_samples_per_second": 82.104, | |
| "eval_steps_per_second": 10.263, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 4.092288292548371e-06, | |
| "loss": 1.7376, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_accuracy": 0.7089666967285505, | |
| "eval_loss": 1.451911449432373, | |
| "eval_runtime": 206.3085, | |
| "eval_samples_per_second": 82.052, | |
| "eval_steps_per_second": 10.256, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 3.89525948693699e-06, | |
| "loss": 1.7287, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_accuracy": 0.7101150715078346, | |
| "eval_loss": 1.4501255750656128, | |
| "eval_runtime": 100.0594, | |
| "eval_samples_per_second": 169.18, | |
| "eval_steps_per_second": 21.147, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.6982306813256103e-06, | |
| "loss": 1.7273, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_accuracy": 0.7106747872019036, | |
| "eval_loss": 1.4408985376358032, | |
| "eval_runtime": 100.2351, | |
| "eval_samples_per_second": 168.883, | |
| "eval_steps_per_second": 21.11, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 3.5012018757142298e-06, | |
| "loss": 1.7119, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_accuracy": 0.7125312598082394, | |
| "eval_loss": 1.431384563446045, | |
| "eval_runtime": 100.206, | |
| "eval_samples_per_second": 168.932, | |
| "eval_steps_per_second": 21.117, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 3.3045671277140724e-06, | |
| "loss": 1.7098, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_accuracy": 0.712873669928985, | |
| "eval_loss": 1.4268542528152466, | |
| "eval_runtime": 99.9713, | |
| "eval_samples_per_second": 169.329, | |
| "eval_steps_per_second": 21.166, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 3.1075383221026915e-06, | |
| "loss": 1.6978, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_accuracy": 0.7132452679915875, | |
| "eval_loss": 1.4275221824645996, | |
| "eval_runtime": 100.0415, | |
| "eval_samples_per_second": 169.21, | |
| "eval_steps_per_second": 21.151, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 2.910509516491311e-06, | |
| "loss": 1.698, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "eval_accuracy": 0.7139832935058783, | |
| "eval_loss": 1.421799898147583, | |
| "eval_runtime": 100.2878, | |
| "eval_samples_per_second": 168.794, | |
| "eval_steps_per_second": 21.099, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 2.713480710879931e-06, | |
| "loss": 1.6837, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_accuracy": 0.7146896815582429, | |
| "eval_loss": 1.4150662422180176, | |
| "eval_runtime": 100.1729, | |
| "eval_samples_per_second": 168.988, | |
| "eval_steps_per_second": 21.123, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 2.5164519052685504e-06, | |
| "loss": 1.6908, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "eval_accuracy": 0.7148777636104067, | |
| "eval_loss": 1.413697361946106, | |
| "eval_runtime": 100.0403, | |
| "eval_samples_per_second": 169.212, | |
| "eval_steps_per_second": 21.151, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 2.3194230996571703e-06, | |
| "loss": 1.6902, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_accuracy": 0.7161167332062431, | |
| "eval_loss": 1.4084678888320923, | |
| "eval_runtime": 99.9514, | |
| "eval_samples_per_second": 169.362, | |
| "eval_steps_per_second": 21.17, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 2.12239429404579e-06, | |
| "loss": 1.6741, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_accuracy": 0.7153571848548731, | |
| "eval_loss": 1.4121222496032715, | |
| "eval_runtime": 99.7721, | |
| "eval_samples_per_second": 169.667, | |
| "eval_steps_per_second": 21.208, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.925759546045632e-06, | |
| "loss": 1.6823, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "eval_accuracy": 0.7164751883355099, | |
| "eval_loss": 1.4036943912506104, | |
| "eval_runtime": 96.9786, | |
| "eval_samples_per_second": 174.554, | |
| "eval_steps_per_second": 21.819, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 1.7287307404342515e-06, | |
| "loss": 1.6692, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_accuracy": 0.7164227335870778, | |
| "eval_loss": 1.4038887023925781, | |
| "eval_runtime": 96.6299, | |
| "eval_samples_per_second": 175.184, | |
| "eval_steps_per_second": 21.898, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 1.5317019348228712e-06, | |
| "loss": 1.6669, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "eval_accuracy": 0.7171810007042829, | |
| "eval_loss": 1.4014757871627808, | |
| "eval_runtime": 96.5289, | |
| "eval_samples_per_second": 175.367, | |
| "eval_steps_per_second": 21.921, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 1.334673129211491e-06, | |
| "loss": 1.6613, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "eval_accuracy": 0.7179443895145537, | |
| "eval_loss": 1.3979177474975586, | |
| "eval_runtime": 96.4739, | |
| "eval_samples_per_second": 175.467, | |
| "eval_steps_per_second": 21.933, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 1.1376443236001104e-06, | |
| "loss": 1.664, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_accuracy": 0.7180417425737022, | |
| "eval_loss": 1.3960251808166504, | |
| "eval_runtime": 96.2769, | |
| "eval_samples_per_second": 175.826, | |
| "eval_steps_per_second": 21.978, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 9.406155179887299e-07, | |
| "loss": 1.6615, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "eval_accuracy": 0.71719773048631, | |
| "eval_loss": 1.4012339115142822, | |
| "eval_runtime": 96.2915, | |
| "eval_samples_per_second": 175.8, | |
| "eval_steps_per_second": 21.975, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 7.435867123773496e-07, | |
| "loss": 1.6627, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_accuracy": 0.7177754487686726, | |
| "eval_loss": 1.3974287509918213, | |
| "eval_runtime": 96.2242, | |
| "eval_samples_per_second": 175.922, | |
| "eval_steps_per_second": 21.99, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 5.465579067659692e-07, | |
| "loss": 1.6489, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "eval_accuracy": 0.7182007239397646, | |
| "eval_loss": 1.3947515487670898, | |
| "eval_runtime": 96.06, | |
| "eval_samples_per_second": 176.223, | |
| "eval_steps_per_second": 22.028, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 3.499231587658116e-07, | |
| "loss": 1.6429, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "eval_accuracy": 0.7183795073646381, | |
| "eval_loss": 1.3920938968658447, | |
| "eval_runtime": 96.1507, | |
| "eval_samples_per_second": 176.057, | |
| "eval_steps_per_second": 22.007, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 1.528943531544312e-07, | |
| "loss": 1.6477, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_accuracy": 0.7182484820177487, | |
| "eval_loss": 1.3909834623336792, | |
| "eval_runtime": 96.1141, | |
| "eval_samples_per_second": 176.124, | |
| "eval_steps_per_second": 22.016, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 25377, | |
| "total_flos": 1.2378168378261504e+17, | |
| "train_loss": 2.057705193860182, | |
| "train_runtime": 15314.6638, | |
| "train_samples_per_second": 26.512, | |
| "train_steps_per_second": 1.657 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 25377, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 1.2378168378261504e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |