{ "best_metric": 90.5043271801771, "best_model_checkpoint": "./save/squad_t5_large_weighted_ce/checkpoint-87600", "epoch": 10.0, "global_step": 109500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 9.954337899543378e-05, "loss": 1.8164, "step": 500 }, { "epoch": 0.09, "learning_rate": 9.908675799086759e-05, "loss": 1.3696, "step": 1000 }, { "epoch": 0.14, "learning_rate": 9.863013698630137e-05, "loss": 1.2016, "step": 1500 }, { "epoch": 0.18, "learning_rate": 9.817351598173516e-05, "loss": 1.0794, "step": 2000 }, { "epoch": 0.23, "learning_rate": 9.771689497716895e-05, "loss": 1.0045, "step": 2500 }, { "epoch": 0.27, "learning_rate": 9.726027397260274e-05, "loss": 0.9416, "step": 3000 }, { "epoch": 0.32, "learning_rate": 9.680365296803654e-05, "loss": 0.8644, "step": 3500 }, { "epoch": 0.37, "learning_rate": 9.634703196347033e-05, "loss": 0.8424, "step": 4000 }, { "epoch": 0.41, "learning_rate": 9.58904109589041e-05, "loss": 0.808, "step": 4500 }, { "epoch": 0.46, "learning_rate": 9.543378995433791e-05, "loss": 0.7564, "step": 5000 }, { "epoch": 0.5, "eval_avg_block": 0.0, "eval_exact_match": 80.65279091769158, "eval_f1": 89.34215607214264, "eval_loss": 0.6778247952461243, "eval_runtime": 912.0299, "eval_samples_per_second": 11.683, "eval_steps_per_second": 0.183, "step": 5475 }, { "epoch": 0.5, "learning_rate": 9.497716894977169e-05, "loss": 0.7448, "step": 5500 }, { "epoch": 0.55, "learning_rate": 9.452054794520548e-05, "loss": 0.7018, "step": 6000 }, { "epoch": 0.59, "learning_rate": 9.406392694063927e-05, "loss": 0.688, "step": 6500 }, { "epoch": 0.64, "learning_rate": 9.360730593607307e-05, "loss": 0.6676, "step": 7000 }, { "epoch": 0.68, "learning_rate": 9.315068493150684e-05, "loss": 0.6728, "step": 7500 }, { "epoch": 0.73, "learning_rate": 9.269406392694065e-05, "loss": 0.6271, "step": 8000 }, { "epoch": 0.78, "learning_rate": 9.223744292237443e-05, "loss": 0.6221, "step": 8500 }, { "epoch": 0.82, "learning_rate": 9.178082191780822e-05, "loss": 0.6188, "step": 9000 }, { "epoch": 0.87, "learning_rate": 9.132420091324201e-05, "loss": 0.6049, "step": 9500 }, { "epoch": 0.91, "learning_rate": 9.08675799086758e-05, "loss": 0.6073, "step": 10000 }, { "epoch": 0.96, "learning_rate": 9.041095890410958e-05, "loss": 0.5808, "step": 10500 }, { "epoch": 1.0, "eval_avg_block": 0.0, "eval_exact_match": 81.17313150425733, "eval_f1": 89.63797178932128, "eval_loss": 0.5828052759170532, "eval_runtime": 910.6332, "eval_samples_per_second": 11.701, "eval_steps_per_second": 0.183, "step": 10950 }, { "epoch": 1.0, "learning_rate": 8.995433789954339e-05, "loss": 0.5682, "step": 11000 }, { "epoch": 1.05, "learning_rate": 8.949771689497717e-05, "loss": 0.4419, "step": 11500 }, { "epoch": 1.1, "learning_rate": 8.904109589041096e-05, "loss": 0.4315, "step": 12000 }, { "epoch": 1.14, "learning_rate": 8.858447488584475e-05, "loss": 0.4362, "step": 12500 }, { "epoch": 1.19, "learning_rate": 8.812785388127854e-05, "loss": 0.4269, "step": 13000 }, { "epoch": 1.23, "learning_rate": 8.767123287671233e-05, "loss": 0.4429, "step": 13500 }, { "epoch": 1.28, "learning_rate": 8.721461187214613e-05, "loss": 0.4262, "step": 14000 }, { "epoch": 1.32, "learning_rate": 8.67579908675799e-05, "loss": 0.4246, "step": 14500 }, { "epoch": 1.37, "learning_rate": 8.630136986301371e-05, "loss": 0.4423, "step": 15000 }, { "epoch": 1.42, "learning_rate": 8.584474885844749e-05, "loss": 0.4335, "step": 15500 }, { "epoch": 1.46, "learning_rate": 8.538812785388128e-05, "loss": 0.4052, "step": 16000 }, { "epoch": 1.5, "eval_avg_block": 0.0, "eval_exact_match": 81.09744560075686, "eval_f1": 89.68888250735179, "eval_loss": 0.5694797039031982, "eval_runtime": 912.5514, "eval_samples_per_second": 11.676, "eval_steps_per_second": 0.183, "step": 16425 }, { "epoch": 1.51, "learning_rate": 8.493150684931507e-05, "loss": 0.4188, "step": 16500 }, { "epoch": 1.55, "learning_rate": 8.447488584474886e-05, "loss": 0.421, "step": 17000 }, { "epoch": 1.6, "learning_rate": 8.401826484018264e-05, "loss": 0.4058, "step": 17500 }, { "epoch": 1.64, "learning_rate": 8.356164383561645e-05, "loss": 0.4133, "step": 18000 }, { "epoch": 1.69, "learning_rate": 8.310502283105023e-05, "loss": 0.4055, "step": 18500 }, { "epoch": 1.74, "learning_rate": 8.264840182648402e-05, "loss": 0.4151, "step": 19000 }, { "epoch": 1.78, "learning_rate": 8.219178082191781e-05, "loss": 0.4099, "step": 19500 }, { "epoch": 1.83, "learning_rate": 8.17351598173516e-05, "loss": 0.4065, "step": 20000 }, { "epoch": 1.87, "learning_rate": 8.127853881278538e-05, "loss": 0.4161, "step": 20500 }, { "epoch": 1.92, "learning_rate": 8.082191780821919e-05, "loss": 0.4022, "step": 21000 }, { "epoch": 1.96, "learning_rate": 8.036529680365296e-05, "loss": 0.4174, "step": 21500 }, { "epoch": 2.0, "eval_avg_block": 0.0, "eval_exact_match": 81.64616840113528, "eval_f1": 89.87366383095139, "eval_loss": 0.5433966517448425, "eval_runtime": 909.7255, "eval_samples_per_second": 11.712, "eval_steps_per_second": 0.184, "step": 21900 }, { "epoch": 2.01, "learning_rate": 7.990867579908676e-05, "loss": 0.3775, "step": 22000 }, { "epoch": 2.05, "learning_rate": 7.945205479452055e-05, "loss": 0.2915, "step": 22500 }, { "epoch": 2.1, "learning_rate": 7.899543378995434e-05, "loss": 0.2829, "step": 23000 }, { "epoch": 2.15, "learning_rate": 7.853881278538813e-05, "loss": 0.2738, "step": 23500 }, { "epoch": 2.19, "learning_rate": 7.808219178082192e-05, "loss": 0.2904, "step": 24000 }, { "epoch": 2.24, "learning_rate": 7.76255707762557e-05, "loss": 0.2788, "step": 24500 }, { "epoch": 2.28, "learning_rate": 7.716894977168951e-05, "loss": 0.2901, "step": 25000 }, { "epoch": 2.33, "learning_rate": 7.671232876712329e-05, "loss": 0.2819, "step": 25500 }, { "epoch": 2.37, "learning_rate": 7.625570776255708e-05, "loss": 0.2866, "step": 26000 }, { "epoch": 2.42, "learning_rate": 7.579908675799087e-05, "loss": 0.2993, "step": 26500 }, { "epoch": 2.47, "learning_rate": 7.534246575342466e-05, "loss": 0.2817, "step": 27000 }, { "epoch": 2.5, "eval_avg_block": 0.0, "eval_exact_match": 81.73131504257331, "eval_f1": 90.11057612224653, "eval_loss": 0.5817934274673462, "eval_runtime": 911.1532, "eval_samples_per_second": 11.694, "eval_steps_per_second": 0.183, "step": 27375 }, { "epoch": 2.51, "learning_rate": 7.488584474885844e-05, "loss": 0.2821, "step": 27500 }, { "epoch": 2.56, "learning_rate": 7.442922374429225e-05, "loss": 0.2881, "step": 28000 }, { "epoch": 2.6, "learning_rate": 7.397260273972603e-05, "loss": 0.3027, "step": 28500 }, { "epoch": 2.65, "learning_rate": 7.351598173515982e-05, "loss": 0.2849, "step": 29000 }, { "epoch": 2.69, "learning_rate": 7.305936073059361e-05, "loss": 0.2829, "step": 29500 }, { "epoch": 2.74, "learning_rate": 7.26027397260274e-05, "loss": 0.2895, "step": 30000 }, { "epoch": 2.79, "learning_rate": 7.21461187214612e-05, "loss": 0.2951, "step": 30500 }, { "epoch": 2.83, "learning_rate": 7.168949771689499e-05, "loss": 0.2771, "step": 31000 }, { "epoch": 2.88, "learning_rate": 7.123287671232876e-05, "loss": 0.297, "step": 31500 }, { "epoch": 2.92, "learning_rate": 7.077625570776256e-05, "loss": 0.29, "step": 32000 }, { "epoch": 2.97, "learning_rate": 7.031963470319635e-05, "loss": 0.2758, "step": 32500 }, { "epoch": 3.0, "eval_avg_block": 0.0, "eval_exact_match": 81.97729422894986, "eval_f1": 89.89881389854972, "eval_loss": 0.5549534559249878, "eval_runtime": 906.3902, "eval_samples_per_second": 11.755, "eval_steps_per_second": 0.184, "step": 32850 }, { "epoch": 3.01, "learning_rate": 6.986301369863014e-05, "loss": 0.2611, "step": 33000 }, { "epoch": 3.06, "learning_rate": 6.940639269406393e-05, "loss": 0.1965, "step": 33500 }, { "epoch": 3.11, "learning_rate": 6.894977168949772e-05, "loss": 0.2034, "step": 34000 }, { "epoch": 3.15, "learning_rate": 6.84931506849315e-05, "loss": 0.1989, "step": 34500 }, { "epoch": 3.2, "learning_rate": 6.803652968036531e-05, "loss": 0.1973, "step": 35000 }, { "epoch": 3.24, "learning_rate": 6.757990867579909e-05, "loss": 0.1983, "step": 35500 }, { "epoch": 3.29, "learning_rate": 6.712328767123288e-05, "loss": 0.2208, "step": 36000 }, { "epoch": 3.33, "learning_rate": 6.666666666666667e-05, "loss": 0.1901, "step": 36500 }, { "epoch": 3.38, "learning_rate": 6.621004566210046e-05, "loss": 0.2019, "step": 37000 }, { "epoch": 3.42, "learning_rate": 6.575342465753424e-05, "loss": 0.213, "step": 37500 }, { "epoch": 3.47, "learning_rate": 6.529680365296805e-05, "loss": 0.2143, "step": 38000 }, { "epoch": 3.5, "eval_avg_block": 0.0, "eval_exact_match": 82.47871333964049, "eval_f1": 90.24537185554753, "eval_loss": 0.6115335822105408, "eval_runtime": 908.6001, "eval_samples_per_second": 11.727, "eval_steps_per_second": 0.184, "step": 38325 }, { "epoch": 3.52, "learning_rate": 6.484018264840182e-05, "loss": 0.2103, "step": 38500 }, { "epoch": 3.56, "learning_rate": 6.438356164383562e-05, "loss": 0.205, "step": 39000 }, { "epoch": 3.61, "learning_rate": 6.392694063926941e-05, "loss": 0.1937, "step": 39500 }, { "epoch": 3.65, "learning_rate": 6.34703196347032e-05, "loss": 0.2139, "step": 40000 }, { "epoch": 3.7, "learning_rate": 6.301369863013699e-05, "loss": 0.2113, "step": 40500 }, { "epoch": 3.74, "learning_rate": 6.255707762557078e-05, "loss": 0.2018, "step": 41000 }, { "epoch": 3.79, "learning_rate": 6.210045662100456e-05, "loss": 0.2075, "step": 41500 }, { "epoch": 3.84, "learning_rate": 6.164383561643835e-05, "loss": 0.2113, "step": 42000 }, { "epoch": 3.88, "learning_rate": 6.118721461187215e-05, "loss": 0.2039, "step": 42500 }, { "epoch": 3.93, "learning_rate": 6.073059360730594e-05, "loss": 0.1937, "step": 43000 }, { "epoch": 3.97, "learning_rate": 6.0273972602739724e-05, "loss": 0.2084, "step": 43500 }, { "epoch": 4.0, "eval_avg_block": 0.0, "eval_exact_match": 82.4314096499527, "eval_f1": 90.31901460999805, "eval_loss": 0.6043493747711182, "eval_runtime": 909.6572, "eval_samples_per_second": 11.713, "eval_steps_per_second": 0.184, "step": 43800 }, { "epoch": 4.02, "learning_rate": 5.981735159817352e-05, "loss": 0.182, "step": 44000 }, { "epoch": 4.06, "learning_rate": 5.936073059360731e-05, "loss": 0.1508, "step": 44500 }, { "epoch": 4.11, "learning_rate": 5.89041095890411e-05, "loss": 0.14, "step": 45000 }, { "epoch": 4.16, "learning_rate": 5.8447488584474885e-05, "loss": 0.1492, "step": 45500 }, { "epoch": 4.2, "learning_rate": 5.7990867579908683e-05, "loss": 0.1541, "step": 46000 }, { "epoch": 4.25, "learning_rate": 5.753424657534247e-05, "loss": 0.1479, "step": 46500 }, { "epoch": 4.29, "learning_rate": 5.707762557077626e-05, "loss": 0.1401, "step": 47000 }, { "epoch": 4.34, "learning_rate": 5.6621004566210046e-05, "loss": 0.1558, "step": 47500 }, { "epoch": 4.38, "learning_rate": 5.616438356164384e-05, "loss": 0.1523, "step": 48000 }, { "epoch": 4.43, "learning_rate": 5.570776255707762e-05, "loss": 0.1414, "step": 48500 }, { "epoch": 4.47, "learning_rate": 5.525114155251142e-05, "loss": 0.1476, "step": 49000 }, { "epoch": 4.5, "eval_avg_block": 0.0, "eval_exact_match": 82.41248817407758, "eval_f1": 90.3531697774033, "eval_loss": 0.7035377025604248, "eval_runtime": 909.9008, "eval_samples_per_second": 11.71, "eval_steps_per_second": 0.184, "step": 49275 }, { "epoch": 4.52, "learning_rate": 5.479452054794521e-05, "loss": 0.1499, "step": 49500 }, { "epoch": 4.57, "learning_rate": 5.4337899543379e-05, "loss": 0.1567, "step": 50000 }, { "epoch": 4.61, "learning_rate": 5.3881278538812784e-05, "loss": 0.1436, "step": 50500 }, { "epoch": 4.66, "learning_rate": 5.342465753424658e-05, "loss": 0.1557, "step": 51000 }, { "epoch": 4.7, "learning_rate": 5.296803652968037e-05, "loss": 0.1491, "step": 51500 }, { "epoch": 4.75, "learning_rate": 5.251141552511416e-05, "loss": 0.1338, "step": 52000 }, { "epoch": 4.79, "learning_rate": 5.2054794520547945e-05, "loss": 0.137, "step": 52500 }, { "epoch": 4.84, "learning_rate": 5.159817351598174e-05, "loss": 0.1371, "step": 53000 }, { "epoch": 4.89, "learning_rate": 5.114155251141552e-05, "loss": 0.1561, "step": 53500 }, { "epoch": 4.93, "learning_rate": 5.068493150684932e-05, "loss": 0.1501, "step": 54000 }, { "epoch": 4.98, "learning_rate": 5.0228310502283106e-05, "loss": 0.1543, "step": 54500 }, { "epoch": 5.0, "eval_avg_block": 0.0, "eval_exact_match": 82.2421948912015, "eval_f1": 90.37064084267695, "eval_loss": 0.6705102324485779, "eval_runtime": 913.1361, "eval_samples_per_second": 11.669, "eval_steps_per_second": 0.183, "step": 54750 }, { "epoch": 5.02, "learning_rate": 4.977168949771689e-05, "loss": 0.1252, "step": 55000 }, { "epoch": 5.07, "learning_rate": 4.9315068493150684e-05, "loss": 0.1034, "step": 55500 }, { "epoch": 5.11, "learning_rate": 4.8858447488584476e-05, "loss": 0.1046, "step": 56000 }, { "epoch": 5.16, "learning_rate": 4.840182648401827e-05, "loss": 0.1137, "step": 56500 }, { "epoch": 5.21, "learning_rate": 4.794520547945205e-05, "loss": 0.1008, "step": 57000 }, { "epoch": 5.25, "learning_rate": 4.7488584474885845e-05, "loss": 0.1038, "step": 57500 }, { "epoch": 5.3, "learning_rate": 4.703196347031964e-05, "loss": 0.1057, "step": 58000 }, { "epoch": 5.34, "learning_rate": 4.657534246575342e-05, "loss": 0.1116, "step": 58500 }, { "epoch": 5.39, "learning_rate": 4.6118721461187214e-05, "loss": 0.1068, "step": 59000 }, { "epoch": 5.43, "learning_rate": 4.5662100456621006e-05, "loss": 0.1094, "step": 59500 }, { "epoch": 5.48, "learning_rate": 4.520547945205479e-05, "loss": 0.1063, "step": 60000 }, { "epoch": 5.5, "eval_avg_block": 0.0, "eval_exact_match": 82.73415326395458, "eval_f1": 90.38623521854846, "eval_loss": 0.7872011065483093, "eval_runtime": 910.5758, "eval_samples_per_second": 11.701, "eval_steps_per_second": 0.183, "step": 60225 }, { "epoch": 5.53, "learning_rate": 4.474885844748858e-05, "loss": 0.1028, "step": 60500 }, { "epoch": 5.57, "learning_rate": 4.4292237442922375e-05, "loss": 0.1056, "step": 61000 }, { "epoch": 5.62, "learning_rate": 4.383561643835617e-05, "loss": 0.1019, "step": 61500 }, { "epoch": 5.66, "learning_rate": 4.337899543378995e-05, "loss": 0.1081, "step": 62000 }, { "epoch": 5.71, "learning_rate": 4.2922374429223744e-05, "loss": 0.1095, "step": 62500 }, { "epoch": 5.75, "learning_rate": 4.2465753424657536e-05, "loss": 0.1045, "step": 63000 }, { "epoch": 5.8, "learning_rate": 4.200913242009132e-05, "loss": 0.1027, "step": 63500 }, { "epoch": 5.84, "learning_rate": 4.155251141552511e-05, "loss": 0.1056, "step": 64000 }, { "epoch": 5.89, "learning_rate": 4.1095890410958905e-05, "loss": 0.116, "step": 64500 }, { "epoch": 5.94, "learning_rate": 4.063926940639269e-05, "loss": 0.0982, "step": 65000 }, { "epoch": 5.98, "learning_rate": 4.018264840182648e-05, "loss": 0.1134, "step": 65500 }, { "epoch": 6.0, "eval_avg_block": 0.0, "eval_exact_match": 81.95837275307474, "eval_f1": 90.19705201865645, "eval_loss": 0.7851516604423523, "eval_runtime": 912.2321, "eval_samples_per_second": 11.68, "eval_steps_per_second": 0.183, "step": 65700 }, { "epoch": 6.03, "learning_rate": 3.9726027397260274e-05, "loss": 0.0826, "step": 66000 }, { "epoch": 6.07, "learning_rate": 3.9269406392694066e-05, "loss": 0.0789, "step": 66500 }, { "epoch": 6.12, "learning_rate": 3.881278538812785e-05, "loss": 0.081, "step": 67000 }, { "epoch": 6.16, "learning_rate": 3.8356164383561644e-05, "loss": 0.0797, "step": 67500 }, { "epoch": 6.21, "learning_rate": 3.7899543378995436e-05, "loss": 0.075, "step": 68000 }, { "epoch": 6.26, "learning_rate": 3.744292237442922e-05, "loss": 0.0801, "step": 68500 }, { "epoch": 6.3, "learning_rate": 3.698630136986301e-05, "loss": 0.0774, "step": 69000 }, { "epoch": 6.35, "learning_rate": 3.6529680365296805e-05, "loss": 0.0767, "step": 69500 }, { "epoch": 6.39, "learning_rate": 3.60730593607306e-05, "loss": 0.0775, "step": 70000 }, { "epoch": 6.44, "learning_rate": 3.561643835616438e-05, "loss": 0.0835, "step": 70500 }, { "epoch": 6.48, "learning_rate": 3.5159817351598174e-05, "loss": 0.0723, "step": 71000 }, { "epoch": 6.5, "eval_avg_block": 0.0, "eval_exact_match": 82.06244087038789, "eval_f1": 90.1486221845852, "eval_loss": 0.886091947555542, "eval_runtime": 911.8713, "eval_samples_per_second": 11.685, "eval_steps_per_second": 0.183, "step": 71175 }, { "epoch": 6.53, "learning_rate": 3.4703196347031966e-05, "loss": 0.0732, "step": 71500 }, { "epoch": 6.58, "learning_rate": 3.424657534246575e-05, "loss": 0.0792, "step": 72000 }, { "epoch": 6.62, "learning_rate": 3.378995433789954e-05, "loss": 0.0861, "step": 72500 }, { "epoch": 6.67, "learning_rate": 3.3333333333333335e-05, "loss": 0.0781, "step": 73000 }, { "epoch": 6.71, "learning_rate": 3.287671232876712e-05, "loss": 0.0799, "step": 73500 }, { "epoch": 6.76, "learning_rate": 3.242009132420091e-05, "loss": 0.0777, "step": 74000 }, { "epoch": 6.8, "learning_rate": 3.1963470319634704e-05, "loss": 0.0796, "step": 74500 }, { "epoch": 6.85, "learning_rate": 3.1506849315068496e-05, "loss": 0.0766, "step": 75000 }, { "epoch": 6.89, "learning_rate": 3.105022831050228e-05, "loss": 0.077, "step": 75500 }, { "epoch": 6.94, "learning_rate": 3.059360730593607e-05, "loss": 0.0786, "step": 76000 }, { "epoch": 6.99, "learning_rate": 3.0136986301369862e-05, "loss": 0.0813, "step": 76500 }, { "epoch": 7.0, "eval_avg_block": 0.0, "eval_exact_match": 82.29895931882687, "eval_f1": 90.39702429599244, "eval_loss": 0.8638301491737366, "eval_runtime": 912.084, "eval_samples_per_second": 11.682, "eval_steps_per_second": 0.183, "step": 76650 }, { "epoch": 7.03, "learning_rate": 2.9680365296803654e-05, "loss": 0.0578, "step": 77000 }, { "epoch": 7.08, "learning_rate": 2.9223744292237442e-05, "loss": 0.0605, "step": 77500 }, { "epoch": 7.12, "learning_rate": 2.8767123287671234e-05, "loss": 0.0503, "step": 78000 }, { "epoch": 7.17, "learning_rate": 2.8310502283105023e-05, "loss": 0.0538, "step": 78500 }, { "epoch": 7.21, "learning_rate": 2.785388127853881e-05, "loss": 0.0629, "step": 79000 }, { "epoch": 7.26, "learning_rate": 2.7397260273972603e-05, "loss": 0.0535, "step": 79500 }, { "epoch": 7.31, "learning_rate": 2.6940639269406392e-05, "loss": 0.0599, "step": 80000 }, { "epoch": 7.35, "learning_rate": 2.6484018264840184e-05, "loss": 0.0597, "step": 80500 }, { "epoch": 7.4, "learning_rate": 2.6027397260273973e-05, "loss": 0.0545, "step": 81000 }, { "epoch": 7.44, "learning_rate": 2.557077625570776e-05, "loss": 0.0602, "step": 81500 }, { "epoch": 7.49, "learning_rate": 2.5114155251141553e-05, "loss": 0.0557, "step": 82000 }, { "epoch": 7.5, "eval_avg_block": 0.0, "eval_exact_match": 82.2421948912015, "eval_f1": 90.29358670439699, "eval_loss": 0.9340036511421204, "eval_runtime": 908.9095, "eval_samples_per_second": 11.723, "eval_steps_per_second": 0.184, "step": 82125 }, { "epoch": 7.53, "learning_rate": 2.4657534246575342e-05, "loss": 0.06, "step": 82500 }, { "epoch": 7.58, "learning_rate": 2.4200913242009134e-05, "loss": 0.0627, "step": 83000 }, { "epoch": 7.63, "learning_rate": 2.3744292237442922e-05, "loss": 0.0575, "step": 83500 }, { "epoch": 7.67, "learning_rate": 2.328767123287671e-05, "loss": 0.0558, "step": 84000 }, { "epoch": 7.72, "learning_rate": 2.2831050228310503e-05, "loss": 0.0625, "step": 84500 }, { "epoch": 7.76, "learning_rate": 2.237442922374429e-05, "loss": 0.0614, "step": 85000 }, { "epoch": 7.81, "learning_rate": 2.1917808219178083e-05, "loss": 0.0561, "step": 85500 }, { "epoch": 7.85, "learning_rate": 2.1461187214611872e-05, "loss": 0.0584, "step": 86000 }, { "epoch": 7.9, "learning_rate": 2.100456621004566e-05, "loss": 0.0611, "step": 86500 }, { "epoch": 7.95, "learning_rate": 2.0547945205479453e-05, "loss": 0.0618, "step": 87000 }, { "epoch": 7.99, "learning_rate": 2.009132420091324e-05, "loss": 0.0615, "step": 87500 }, { "epoch": 8.0, "eval_avg_block": 0.0, "eval_exact_match": 82.76253547776727, "eval_f1": 90.5043271801771, "eval_loss": 0.9232891798019409, "eval_runtime": 909.3529, "eval_samples_per_second": 11.717, "eval_steps_per_second": 0.184, "step": 87600 }, { "epoch": 8.04, "learning_rate": 1.9634703196347033e-05, "loss": 0.0478, "step": 88000 }, { "epoch": 8.08, "learning_rate": 1.9178082191780822e-05, "loss": 0.0494, "step": 88500 }, { "epoch": 8.13, "learning_rate": 1.872146118721461e-05, "loss": 0.0473, "step": 89000 }, { "epoch": 8.17, "learning_rate": 1.8264840182648402e-05, "loss": 0.0421, "step": 89500 }, { "epoch": 8.22, "learning_rate": 1.780821917808219e-05, "loss": 0.0372, "step": 90000 }, { "epoch": 8.26, "learning_rate": 1.7351598173515983e-05, "loss": 0.0496, "step": 90500 }, { "epoch": 8.31, "learning_rate": 1.689497716894977e-05, "loss": 0.0433, "step": 91000 }, { "epoch": 8.36, "learning_rate": 1.643835616438356e-05, "loss": 0.0458, "step": 91500 }, { "epoch": 8.4, "learning_rate": 1.5981735159817352e-05, "loss": 0.0501, "step": 92000 }, { "epoch": 8.45, "learning_rate": 1.552511415525114e-05, "loss": 0.0477, "step": 92500 }, { "epoch": 8.49, "learning_rate": 1.5068493150684931e-05, "loss": 0.0448, "step": 93000 }, { "epoch": 8.5, "eval_avg_block": 0.0, "eval_exact_match": 82.55439924314096, "eval_f1": 90.46637956923067, "eval_loss": 1.0223475694656372, "eval_runtime": 911.1533, "eval_samples_per_second": 11.694, "eval_steps_per_second": 0.183, "step": 93075 }, { "epoch": 8.54, "learning_rate": 1.4611872146118721e-05, "loss": 0.0444, "step": 93500 }, { "epoch": 8.58, "learning_rate": 1.4155251141552511e-05, "loss": 0.0485, "step": 94000 }, { "epoch": 8.63, "learning_rate": 1.3698630136986302e-05, "loss": 0.0433, "step": 94500 }, { "epoch": 8.68, "learning_rate": 1.3242009132420092e-05, "loss": 0.041, "step": 95000 }, { "epoch": 8.72, "learning_rate": 1.278538812785388e-05, "loss": 0.0454, "step": 95500 }, { "epoch": 8.77, "learning_rate": 1.2328767123287671e-05, "loss": 0.0414, "step": 96000 }, { "epoch": 8.81, "learning_rate": 1.1872146118721461e-05, "loss": 0.0467, "step": 96500 }, { "epoch": 8.86, "learning_rate": 1.1415525114155251e-05, "loss": 0.0443, "step": 97000 }, { "epoch": 8.9, "learning_rate": 1.0958904109589042e-05, "loss": 0.0479, "step": 97500 }, { "epoch": 8.95, "learning_rate": 1.050228310502283e-05, "loss": 0.0433, "step": 98000 }, { "epoch": 9.0, "learning_rate": 1.004566210045662e-05, "loss": 0.0417, "step": 98500 }, { "epoch": 9.0, "eval_avg_block": 0.0, "eval_exact_match": 82.39356669820246, "eval_f1": 90.33816430966378, "eval_loss": 1.0318303108215332, "eval_runtime": 909.0606, "eval_samples_per_second": 11.721, "eval_steps_per_second": 0.184, "step": 98550 }, { "epoch": 9.04, "learning_rate": 9.589041095890411e-06, "loss": 0.036, "step": 99000 }, { "epoch": 9.09, "learning_rate": 9.132420091324201e-06, "loss": 0.0347, "step": 99500 }, { "epoch": 9.13, "learning_rate": 8.675799086757991e-06, "loss": 0.0358, "step": 100000 }, { "epoch": 9.18, "learning_rate": 8.21917808219178e-06, "loss": 0.0346, "step": 100500 }, { "epoch": 9.22, "learning_rate": 7.76255707762557e-06, "loss": 0.0358, "step": 101000 }, { "epoch": 9.27, "learning_rate": 7.305936073059361e-06, "loss": 0.0419, "step": 101500 }, { "epoch": 9.32, "learning_rate": 6.849315068493151e-06, "loss": 0.0415, "step": 102000 }, { "epoch": 9.36, "learning_rate": 6.39269406392694e-06, "loss": 0.0335, "step": 102500 }, { "epoch": 9.41, "learning_rate": 5.936073059360731e-06, "loss": 0.034, "step": 103000 }, { "epoch": 9.45, "learning_rate": 5.479452054794521e-06, "loss": 0.0406, "step": 103500 }, { "epoch": 9.5, "learning_rate": 5.02283105022831e-06, "loss": 0.0326, "step": 104000 }, { "epoch": 9.5, "eval_avg_block": 0.0, "eval_exact_match": 82.06244087038789, "eval_f1": 90.1959647507603, "eval_loss": 1.0786569118499756, "eval_runtime": 911.242, "eval_samples_per_second": 11.693, "eval_steps_per_second": 0.183, "step": 104025 }, { "epoch": 9.54, "learning_rate": 4.566210045662101e-06, "loss": 0.0362, "step": 104500 }, { "epoch": 9.59, "learning_rate": 4.10958904109589e-06, "loss": 0.0388, "step": 105000 }, { "epoch": 9.63, "learning_rate": 3.6529680365296803e-06, "loss": 0.0401, "step": 105500 }, { "epoch": 9.68, "learning_rate": 3.19634703196347e-06, "loss": 0.0396, "step": 106000 }, { "epoch": 9.73, "learning_rate": 2.7397260273972604e-06, "loss": 0.0336, "step": 106500 }, { "epoch": 9.77, "learning_rate": 2.2831050228310503e-06, "loss": 0.041, "step": 107000 }, { "epoch": 9.82, "learning_rate": 1.8264840182648401e-06, "loss": 0.0399, "step": 107500 }, { "epoch": 9.86, "learning_rate": 1.3698630136986302e-06, "loss": 0.0328, "step": 108000 }, { "epoch": 9.91, "learning_rate": 9.132420091324201e-07, "loss": 0.0373, "step": 108500 }, { "epoch": 9.95, "learning_rate": 4.5662100456621004e-07, "loss": 0.0378, "step": 109000 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 0.0332, "step": 109500 }, { "epoch": 10.0, "eval_avg_block": 0.0, "eval_exact_match": 82.19489120151371, "eval_f1": 90.25266366616492, "eval_loss": 1.07804274559021, "eval_runtime": 908.2606, "eval_samples_per_second": 11.731, "eval_steps_per_second": 0.184, "step": 109500 }, { "epoch": 10.0, "step": 109500, "total_flos": 1.89655978082304e+18, "train_loss": 0.2221686732322658, "train_runtime": 157184.0634, "train_samples_per_second": 5.573, "train_steps_per_second": 0.697 } ], "max_steps": 109500, "num_train_epochs": 10, "total_flos": 1.89655978082304e+18, "trial_name": null, "trial_params": null }