t5-squad / trainer_state.json
jvelja's picture
Upload 2 files
c204b7b verified
{
"best_metric": 90.5043271801771,
"best_model_checkpoint": "./save/squad_t5_large_weighted_ce/checkpoint-87600",
"epoch": 10.0,
"global_step": 109500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 9.954337899543378e-05,
"loss": 1.8164,
"step": 500
},
{
"epoch": 0.09,
"learning_rate": 9.908675799086759e-05,
"loss": 1.3696,
"step": 1000
},
{
"epoch": 0.14,
"learning_rate": 9.863013698630137e-05,
"loss": 1.2016,
"step": 1500
},
{
"epoch": 0.18,
"learning_rate": 9.817351598173516e-05,
"loss": 1.0794,
"step": 2000
},
{
"epoch": 0.23,
"learning_rate": 9.771689497716895e-05,
"loss": 1.0045,
"step": 2500
},
{
"epoch": 0.27,
"learning_rate": 9.726027397260274e-05,
"loss": 0.9416,
"step": 3000
},
{
"epoch": 0.32,
"learning_rate": 9.680365296803654e-05,
"loss": 0.8644,
"step": 3500
},
{
"epoch": 0.37,
"learning_rate": 9.634703196347033e-05,
"loss": 0.8424,
"step": 4000
},
{
"epoch": 0.41,
"learning_rate": 9.58904109589041e-05,
"loss": 0.808,
"step": 4500
},
{
"epoch": 0.46,
"learning_rate": 9.543378995433791e-05,
"loss": 0.7564,
"step": 5000
},
{
"epoch": 0.5,
"eval_avg_block": 0.0,
"eval_exact_match": 80.65279091769158,
"eval_f1": 89.34215607214264,
"eval_loss": 0.6778247952461243,
"eval_runtime": 912.0299,
"eval_samples_per_second": 11.683,
"eval_steps_per_second": 0.183,
"step": 5475
},
{
"epoch": 0.5,
"learning_rate": 9.497716894977169e-05,
"loss": 0.7448,
"step": 5500
},
{
"epoch": 0.55,
"learning_rate": 9.452054794520548e-05,
"loss": 0.7018,
"step": 6000
},
{
"epoch": 0.59,
"learning_rate": 9.406392694063927e-05,
"loss": 0.688,
"step": 6500
},
{
"epoch": 0.64,
"learning_rate": 9.360730593607307e-05,
"loss": 0.6676,
"step": 7000
},
{
"epoch": 0.68,
"learning_rate": 9.315068493150684e-05,
"loss": 0.6728,
"step": 7500
},
{
"epoch": 0.73,
"learning_rate": 9.269406392694065e-05,
"loss": 0.6271,
"step": 8000
},
{
"epoch": 0.78,
"learning_rate": 9.223744292237443e-05,
"loss": 0.6221,
"step": 8500
},
{
"epoch": 0.82,
"learning_rate": 9.178082191780822e-05,
"loss": 0.6188,
"step": 9000
},
{
"epoch": 0.87,
"learning_rate": 9.132420091324201e-05,
"loss": 0.6049,
"step": 9500
},
{
"epoch": 0.91,
"learning_rate": 9.08675799086758e-05,
"loss": 0.6073,
"step": 10000
},
{
"epoch": 0.96,
"learning_rate": 9.041095890410958e-05,
"loss": 0.5808,
"step": 10500
},
{
"epoch": 1.0,
"eval_avg_block": 0.0,
"eval_exact_match": 81.17313150425733,
"eval_f1": 89.63797178932128,
"eval_loss": 0.5828052759170532,
"eval_runtime": 910.6332,
"eval_samples_per_second": 11.701,
"eval_steps_per_second": 0.183,
"step": 10950
},
{
"epoch": 1.0,
"learning_rate": 8.995433789954339e-05,
"loss": 0.5682,
"step": 11000
},
{
"epoch": 1.05,
"learning_rate": 8.949771689497717e-05,
"loss": 0.4419,
"step": 11500
},
{
"epoch": 1.1,
"learning_rate": 8.904109589041096e-05,
"loss": 0.4315,
"step": 12000
},
{
"epoch": 1.14,
"learning_rate": 8.858447488584475e-05,
"loss": 0.4362,
"step": 12500
},
{
"epoch": 1.19,
"learning_rate": 8.812785388127854e-05,
"loss": 0.4269,
"step": 13000
},
{
"epoch": 1.23,
"learning_rate": 8.767123287671233e-05,
"loss": 0.4429,
"step": 13500
},
{
"epoch": 1.28,
"learning_rate": 8.721461187214613e-05,
"loss": 0.4262,
"step": 14000
},
{
"epoch": 1.32,
"learning_rate": 8.67579908675799e-05,
"loss": 0.4246,
"step": 14500
},
{
"epoch": 1.37,
"learning_rate": 8.630136986301371e-05,
"loss": 0.4423,
"step": 15000
},
{
"epoch": 1.42,
"learning_rate": 8.584474885844749e-05,
"loss": 0.4335,
"step": 15500
},
{
"epoch": 1.46,
"learning_rate": 8.538812785388128e-05,
"loss": 0.4052,
"step": 16000
},
{
"epoch": 1.5,
"eval_avg_block": 0.0,
"eval_exact_match": 81.09744560075686,
"eval_f1": 89.68888250735179,
"eval_loss": 0.5694797039031982,
"eval_runtime": 912.5514,
"eval_samples_per_second": 11.676,
"eval_steps_per_second": 0.183,
"step": 16425
},
{
"epoch": 1.51,
"learning_rate": 8.493150684931507e-05,
"loss": 0.4188,
"step": 16500
},
{
"epoch": 1.55,
"learning_rate": 8.447488584474886e-05,
"loss": 0.421,
"step": 17000
},
{
"epoch": 1.6,
"learning_rate": 8.401826484018264e-05,
"loss": 0.4058,
"step": 17500
},
{
"epoch": 1.64,
"learning_rate": 8.356164383561645e-05,
"loss": 0.4133,
"step": 18000
},
{
"epoch": 1.69,
"learning_rate": 8.310502283105023e-05,
"loss": 0.4055,
"step": 18500
},
{
"epoch": 1.74,
"learning_rate": 8.264840182648402e-05,
"loss": 0.4151,
"step": 19000
},
{
"epoch": 1.78,
"learning_rate": 8.219178082191781e-05,
"loss": 0.4099,
"step": 19500
},
{
"epoch": 1.83,
"learning_rate": 8.17351598173516e-05,
"loss": 0.4065,
"step": 20000
},
{
"epoch": 1.87,
"learning_rate": 8.127853881278538e-05,
"loss": 0.4161,
"step": 20500
},
{
"epoch": 1.92,
"learning_rate": 8.082191780821919e-05,
"loss": 0.4022,
"step": 21000
},
{
"epoch": 1.96,
"learning_rate": 8.036529680365296e-05,
"loss": 0.4174,
"step": 21500
},
{
"epoch": 2.0,
"eval_avg_block": 0.0,
"eval_exact_match": 81.64616840113528,
"eval_f1": 89.87366383095139,
"eval_loss": 0.5433966517448425,
"eval_runtime": 909.7255,
"eval_samples_per_second": 11.712,
"eval_steps_per_second": 0.184,
"step": 21900
},
{
"epoch": 2.01,
"learning_rate": 7.990867579908676e-05,
"loss": 0.3775,
"step": 22000
},
{
"epoch": 2.05,
"learning_rate": 7.945205479452055e-05,
"loss": 0.2915,
"step": 22500
},
{
"epoch": 2.1,
"learning_rate": 7.899543378995434e-05,
"loss": 0.2829,
"step": 23000
},
{
"epoch": 2.15,
"learning_rate": 7.853881278538813e-05,
"loss": 0.2738,
"step": 23500
},
{
"epoch": 2.19,
"learning_rate": 7.808219178082192e-05,
"loss": 0.2904,
"step": 24000
},
{
"epoch": 2.24,
"learning_rate": 7.76255707762557e-05,
"loss": 0.2788,
"step": 24500
},
{
"epoch": 2.28,
"learning_rate": 7.716894977168951e-05,
"loss": 0.2901,
"step": 25000
},
{
"epoch": 2.33,
"learning_rate": 7.671232876712329e-05,
"loss": 0.2819,
"step": 25500
},
{
"epoch": 2.37,
"learning_rate": 7.625570776255708e-05,
"loss": 0.2866,
"step": 26000
},
{
"epoch": 2.42,
"learning_rate": 7.579908675799087e-05,
"loss": 0.2993,
"step": 26500
},
{
"epoch": 2.47,
"learning_rate": 7.534246575342466e-05,
"loss": 0.2817,
"step": 27000
},
{
"epoch": 2.5,
"eval_avg_block": 0.0,
"eval_exact_match": 81.73131504257331,
"eval_f1": 90.11057612224653,
"eval_loss": 0.5817934274673462,
"eval_runtime": 911.1532,
"eval_samples_per_second": 11.694,
"eval_steps_per_second": 0.183,
"step": 27375
},
{
"epoch": 2.51,
"learning_rate": 7.488584474885844e-05,
"loss": 0.2821,
"step": 27500
},
{
"epoch": 2.56,
"learning_rate": 7.442922374429225e-05,
"loss": 0.2881,
"step": 28000
},
{
"epoch": 2.6,
"learning_rate": 7.397260273972603e-05,
"loss": 0.3027,
"step": 28500
},
{
"epoch": 2.65,
"learning_rate": 7.351598173515982e-05,
"loss": 0.2849,
"step": 29000
},
{
"epoch": 2.69,
"learning_rate": 7.305936073059361e-05,
"loss": 0.2829,
"step": 29500
},
{
"epoch": 2.74,
"learning_rate": 7.26027397260274e-05,
"loss": 0.2895,
"step": 30000
},
{
"epoch": 2.79,
"learning_rate": 7.21461187214612e-05,
"loss": 0.2951,
"step": 30500
},
{
"epoch": 2.83,
"learning_rate": 7.168949771689499e-05,
"loss": 0.2771,
"step": 31000
},
{
"epoch": 2.88,
"learning_rate": 7.123287671232876e-05,
"loss": 0.297,
"step": 31500
},
{
"epoch": 2.92,
"learning_rate": 7.077625570776256e-05,
"loss": 0.29,
"step": 32000
},
{
"epoch": 2.97,
"learning_rate": 7.031963470319635e-05,
"loss": 0.2758,
"step": 32500
},
{
"epoch": 3.0,
"eval_avg_block": 0.0,
"eval_exact_match": 81.97729422894986,
"eval_f1": 89.89881389854972,
"eval_loss": 0.5549534559249878,
"eval_runtime": 906.3902,
"eval_samples_per_second": 11.755,
"eval_steps_per_second": 0.184,
"step": 32850
},
{
"epoch": 3.01,
"learning_rate": 6.986301369863014e-05,
"loss": 0.2611,
"step": 33000
},
{
"epoch": 3.06,
"learning_rate": 6.940639269406393e-05,
"loss": 0.1965,
"step": 33500
},
{
"epoch": 3.11,
"learning_rate": 6.894977168949772e-05,
"loss": 0.2034,
"step": 34000
},
{
"epoch": 3.15,
"learning_rate": 6.84931506849315e-05,
"loss": 0.1989,
"step": 34500
},
{
"epoch": 3.2,
"learning_rate": 6.803652968036531e-05,
"loss": 0.1973,
"step": 35000
},
{
"epoch": 3.24,
"learning_rate": 6.757990867579909e-05,
"loss": 0.1983,
"step": 35500
},
{
"epoch": 3.29,
"learning_rate": 6.712328767123288e-05,
"loss": 0.2208,
"step": 36000
},
{
"epoch": 3.33,
"learning_rate": 6.666666666666667e-05,
"loss": 0.1901,
"step": 36500
},
{
"epoch": 3.38,
"learning_rate": 6.621004566210046e-05,
"loss": 0.2019,
"step": 37000
},
{
"epoch": 3.42,
"learning_rate": 6.575342465753424e-05,
"loss": 0.213,
"step": 37500
},
{
"epoch": 3.47,
"learning_rate": 6.529680365296805e-05,
"loss": 0.2143,
"step": 38000
},
{
"epoch": 3.5,
"eval_avg_block": 0.0,
"eval_exact_match": 82.47871333964049,
"eval_f1": 90.24537185554753,
"eval_loss": 0.6115335822105408,
"eval_runtime": 908.6001,
"eval_samples_per_second": 11.727,
"eval_steps_per_second": 0.184,
"step": 38325
},
{
"epoch": 3.52,
"learning_rate": 6.484018264840182e-05,
"loss": 0.2103,
"step": 38500
},
{
"epoch": 3.56,
"learning_rate": 6.438356164383562e-05,
"loss": 0.205,
"step": 39000
},
{
"epoch": 3.61,
"learning_rate": 6.392694063926941e-05,
"loss": 0.1937,
"step": 39500
},
{
"epoch": 3.65,
"learning_rate": 6.34703196347032e-05,
"loss": 0.2139,
"step": 40000
},
{
"epoch": 3.7,
"learning_rate": 6.301369863013699e-05,
"loss": 0.2113,
"step": 40500
},
{
"epoch": 3.74,
"learning_rate": 6.255707762557078e-05,
"loss": 0.2018,
"step": 41000
},
{
"epoch": 3.79,
"learning_rate": 6.210045662100456e-05,
"loss": 0.2075,
"step": 41500
},
{
"epoch": 3.84,
"learning_rate": 6.164383561643835e-05,
"loss": 0.2113,
"step": 42000
},
{
"epoch": 3.88,
"learning_rate": 6.118721461187215e-05,
"loss": 0.2039,
"step": 42500
},
{
"epoch": 3.93,
"learning_rate": 6.073059360730594e-05,
"loss": 0.1937,
"step": 43000
},
{
"epoch": 3.97,
"learning_rate": 6.0273972602739724e-05,
"loss": 0.2084,
"step": 43500
},
{
"epoch": 4.0,
"eval_avg_block": 0.0,
"eval_exact_match": 82.4314096499527,
"eval_f1": 90.31901460999805,
"eval_loss": 0.6043493747711182,
"eval_runtime": 909.6572,
"eval_samples_per_second": 11.713,
"eval_steps_per_second": 0.184,
"step": 43800
},
{
"epoch": 4.02,
"learning_rate": 5.981735159817352e-05,
"loss": 0.182,
"step": 44000
},
{
"epoch": 4.06,
"learning_rate": 5.936073059360731e-05,
"loss": 0.1508,
"step": 44500
},
{
"epoch": 4.11,
"learning_rate": 5.89041095890411e-05,
"loss": 0.14,
"step": 45000
},
{
"epoch": 4.16,
"learning_rate": 5.8447488584474885e-05,
"loss": 0.1492,
"step": 45500
},
{
"epoch": 4.2,
"learning_rate": 5.7990867579908683e-05,
"loss": 0.1541,
"step": 46000
},
{
"epoch": 4.25,
"learning_rate": 5.753424657534247e-05,
"loss": 0.1479,
"step": 46500
},
{
"epoch": 4.29,
"learning_rate": 5.707762557077626e-05,
"loss": 0.1401,
"step": 47000
},
{
"epoch": 4.34,
"learning_rate": 5.6621004566210046e-05,
"loss": 0.1558,
"step": 47500
},
{
"epoch": 4.38,
"learning_rate": 5.616438356164384e-05,
"loss": 0.1523,
"step": 48000
},
{
"epoch": 4.43,
"learning_rate": 5.570776255707762e-05,
"loss": 0.1414,
"step": 48500
},
{
"epoch": 4.47,
"learning_rate": 5.525114155251142e-05,
"loss": 0.1476,
"step": 49000
},
{
"epoch": 4.5,
"eval_avg_block": 0.0,
"eval_exact_match": 82.41248817407758,
"eval_f1": 90.3531697774033,
"eval_loss": 0.7035377025604248,
"eval_runtime": 909.9008,
"eval_samples_per_second": 11.71,
"eval_steps_per_second": 0.184,
"step": 49275
},
{
"epoch": 4.52,
"learning_rate": 5.479452054794521e-05,
"loss": 0.1499,
"step": 49500
},
{
"epoch": 4.57,
"learning_rate": 5.4337899543379e-05,
"loss": 0.1567,
"step": 50000
},
{
"epoch": 4.61,
"learning_rate": 5.3881278538812784e-05,
"loss": 0.1436,
"step": 50500
},
{
"epoch": 4.66,
"learning_rate": 5.342465753424658e-05,
"loss": 0.1557,
"step": 51000
},
{
"epoch": 4.7,
"learning_rate": 5.296803652968037e-05,
"loss": 0.1491,
"step": 51500
},
{
"epoch": 4.75,
"learning_rate": 5.251141552511416e-05,
"loss": 0.1338,
"step": 52000
},
{
"epoch": 4.79,
"learning_rate": 5.2054794520547945e-05,
"loss": 0.137,
"step": 52500
},
{
"epoch": 4.84,
"learning_rate": 5.159817351598174e-05,
"loss": 0.1371,
"step": 53000
},
{
"epoch": 4.89,
"learning_rate": 5.114155251141552e-05,
"loss": 0.1561,
"step": 53500
},
{
"epoch": 4.93,
"learning_rate": 5.068493150684932e-05,
"loss": 0.1501,
"step": 54000
},
{
"epoch": 4.98,
"learning_rate": 5.0228310502283106e-05,
"loss": 0.1543,
"step": 54500
},
{
"epoch": 5.0,
"eval_avg_block": 0.0,
"eval_exact_match": 82.2421948912015,
"eval_f1": 90.37064084267695,
"eval_loss": 0.6705102324485779,
"eval_runtime": 913.1361,
"eval_samples_per_second": 11.669,
"eval_steps_per_second": 0.183,
"step": 54750
},
{
"epoch": 5.02,
"learning_rate": 4.977168949771689e-05,
"loss": 0.1252,
"step": 55000
},
{
"epoch": 5.07,
"learning_rate": 4.9315068493150684e-05,
"loss": 0.1034,
"step": 55500
},
{
"epoch": 5.11,
"learning_rate": 4.8858447488584476e-05,
"loss": 0.1046,
"step": 56000
},
{
"epoch": 5.16,
"learning_rate": 4.840182648401827e-05,
"loss": 0.1137,
"step": 56500
},
{
"epoch": 5.21,
"learning_rate": 4.794520547945205e-05,
"loss": 0.1008,
"step": 57000
},
{
"epoch": 5.25,
"learning_rate": 4.7488584474885845e-05,
"loss": 0.1038,
"step": 57500
},
{
"epoch": 5.3,
"learning_rate": 4.703196347031964e-05,
"loss": 0.1057,
"step": 58000
},
{
"epoch": 5.34,
"learning_rate": 4.657534246575342e-05,
"loss": 0.1116,
"step": 58500
},
{
"epoch": 5.39,
"learning_rate": 4.6118721461187214e-05,
"loss": 0.1068,
"step": 59000
},
{
"epoch": 5.43,
"learning_rate": 4.5662100456621006e-05,
"loss": 0.1094,
"step": 59500
},
{
"epoch": 5.48,
"learning_rate": 4.520547945205479e-05,
"loss": 0.1063,
"step": 60000
},
{
"epoch": 5.5,
"eval_avg_block": 0.0,
"eval_exact_match": 82.73415326395458,
"eval_f1": 90.38623521854846,
"eval_loss": 0.7872011065483093,
"eval_runtime": 910.5758,
"eval_samples_per_second": 11.701,
"eval_steps_per_second": 0.183,
"step": 60225
},
{
"epoch": 5.53,
"learning_rate": 4.474885844748858e-05,
"loss": 0.1028,
"step": 60500
},
{
"epoch": 5.57,
"learning_rate": 4.4292237442922375e-05,
"loss": 0.1056,
"step": 61000
},
{
"epoch": 5.62,
"learning_rate": 4.383561643835617e-05,
"loss": 0.1019,
"step": 61500
},
{
"epoch": 5.66,
"learning_rate": 4.337899543378995e-05,
"loss": 0.1081,
"step": 62000
},
{
"epoch": 5.71,
"learning_rate": 4.2922374429223744e-05,
"loss": 0.1095,
"step": 62500
},
{
"epoch": 5.75,
"learning_rate": 4.2465753424657536e-05,
"loss": 0.1045,
"step": 63000
},
{
"epoch": 5.8,
"learning_rate": 4.200913242009132e-05,
"loss": 0.1027,
"step": 63500
},
{
"epoch": 5.84,
"learning_rate": 4.155251141552511e-05,
"loss": 0.1056,
"step": 64000
},
{
"epoch": 5.89,
"learning_rate": 4.1095890410958905e-05,
"loss": 0.116,
"step": 64500
},
{
"epoch": 5.94,
"learning_rate": 4.063926940639269e-05,
"loss": 0.0982,
"step": 65000
},
{
"epoch": 5.98,
"learning_rate": 4.018264840182648e-05,
"loss": 0.1134,
"step": 65500
},
{
"epoch": 6.0,
"eval_avg_block": 0.0,
"eval_exact_match": 81.95837275307474,
"eval_f1": 90.19705201865645,
"eval_loss": 0.7851516604423523,
"eval_runtime": 912.2321,
"eval_samples_per_second": 11.68,
"eval_steps_per_second": 0.183,
"step": 65700
},
{
"epoch": 6.03,
"learning_rate": 3.9726027397260274e-05,
"loss": 0.0826,
"step": 66000
},
{
"epoch": 6.07,
"learning_rate": 3.9269406392694066e-05,
"loss": 0.0789,
"step": 66500
},
{
"epoch": 6.12,
"learning_rate": 3.881278538812785e-05,
"loss": 0.081,
"step": 67000
},
{
"epoch": 6.16,
"learning_rate": 3.8356164383561644e-05,
"loss": 0.0797,
"step": 67500
},
{
"epoch": 6.21,
"learning_rate": 3.7899543378995436e-05,
"loss": 0.075,
"step": 68000
},
{
"epoch": 6.26,
"learning_rate": 3.744292237442922e-05,
"loss": 0.0801,
"step": 68500
},
{
"epoch": 6.3,
"learning_rate": 3.698630136986301e-05,
"loss": 0.0774,
"step": 69000
},
{
"epoch": 6.35,
"learning_rate": 3.6529680365296805e-05,
"loss": 0.0767,
"step": 69500
},
{
"epoch": 6.39,
"learning_rate": 3.60730593607306e-05,
"loss": 0.0775,
"step": 70000
},
{
"epoch": 6.44,
"learning_rate": 3.561643835616438e-05,
"loss": 0.0835,
"step": 70500
},
{
"epoch": 6.48,
"learning_rate": 3.5159817351598174e-05,
"loss": 0.0723,
"step": 71000
},
{
"epoch": 6.5,
"eval_avg_block": 0.0,
"eval_exact_match": 82.06244087038789,
"eval_f1": 90.1486221845852,
"eval_loss": 0.886091947555542,
"eval_runtime": 911.8713,
"eval_samples_per_second": 11.685,
"eval_steps_per_second": 0.183,
"step": 71175
},
{
"epoch": 6.53,
"learning_rate": 3.4703196347031966e-05,
"loss": 0.0732,
"step": 71500
},
{
"epoch": 6.58,
"learning_rate": 3.424657534246575e-05,
"loss": 0.0792,
"step": 72000
},
{
"epoch": 6.62,
"learning_rate": 3.378995433789954e-05,
"loss": 0.0861,
"step": 72500
},
{
"epoch": 6.67,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.0781,
"step": 73000
},
{
"epoch": 6.71,
"learning_rate": 3.287671232876712e-05,
"loss": 0.0799,
"step": 73500
},
{
"epoch": 6.76,
"learning_rate": 3.242009132420091e-05,
"loss": 0.0777,
"step": 74000
},
{
"epoch": 6.8,
"learning_rate": 3.1963470319634704e-05,
"loss": 0.0796,
"step": 74500
},
{
"epoch": 6.85,
"learning_rate": 3.1506849315068496e-05,
"loss": 0.0766,
"step": 75000
},
{
"epoch": 6.89,
"learning_rate": 3.105022831050228e-05,
"loss": 0.077,
"step": 75500
},
{
"epoch": 6.94,
"learning_rate": 3.059360730593607e-05,
"loss": 0.0786,
"step": 76000
},
{
"epoch": 6.99,
"learning_rate": 3.0136986301369862e-05,
"loss": 0.0813,
"step": 76500
},
{
"epoch": 7.0,
"eval_avg_block": 0.0,
"eval_exact_match": 82.29895931882687,
"eval_f1": 90.39702429599244,
"eval_loss": 0.8638301491737366,
"eval_runtime": 912.084,
"eval_samples_per_second": 11.682,
"eval_steps_per_second": 0.183,
"step": 76650
},
{
"epoch": 7.03,
"learning_rate": 2.9680365296803654e-05,
"loss": 0.0578,
"step": 77000
},
{
"epoch": 7.08,
"learning_rate": 2.9223744292237442e-05,
"loss": 0.0605,
"step": 77500
},
{
"epoch": 7.12,
"learning_rate": 2.8767123287671234e-05,
"loss": 0.0503,
"step": 78000
},
{
"epoch": 7.17,
"learning_rate": 2.8310502283105023e-05,
"loss": 0.0538,
"step": 78500
},
{
"epoch": 7.21,
"learning_rate": 2.785388127853881e-05,
"loss": 0.0629,
"step": 79000
},
{
"epoch": 7.26,
"learning_rate": 2.7397260273972603e-05,
"loss": 0.0535,
"step": 79500
},
{
"epoch": 7.31,
"learning_rate": 2.6940639269406392e-05,
"loss": 0.0599,
"step": 80000
},
{
"epoch": 7.35,
"learning_rate": 2.6484018264840184e-05,
"loss": 0.0597,
"step": 80500
},
{
"epoch": 7.4,
"learning_rate": 2.6027397260273973e-05,
"loss": 0.0545,
"step": 81000
},
{
"epoch": 7.44,
"learning_rate": 2.557077625570776e-05,
"loss": 0.0602,
"step": 81500
},
{
"epoch": 7.49,
"learning_rate": 2.5114155251141553e-05,
"loss": 0.0557,
"step": 82000
},
{
"epoch": 7.5,
"eval_avg_block": 0.0,
"eval_exact_match": 82.2421948912015,
"eval_f1": 90.29358670439699,
"eval_loss": 0.9340036511421204,
"eval_runtime": 908.9095,
"eval_samples_per_second": 11.723,
"eval_steps_per_second": 0.184,
"step": 82125
},
{
"epoch": 7.53,
"learning_rate": 2.4657534246575342e-05,
"loss": 0.06,
"step": 82500
},
{
"epoch": 7.58,
"learning_rate": 2.4200913242009134e-05,
"loss": 0.0627,
"step": 83000
},
{
"epoch": 7.63,
"learning_rate": 2.3744292237442922e-05,
"loss": 0.0575,
"step": 83500
},
{
"epoch": 7.67,
"learning_rate": 2.328767123287671e-05,
"loss": 0.0558,
"step": 84000
},
{
"epoch": 7.72,
"learning_rate": 2.2831050228310503e-05,
"loss": 0.0625,
"step": 84500
},
{
"epoch": 7.76,
"learning_rate": 2.237442922374429e-05,
"loss": 0.0614,
"step": 85000
},
{
"epoch": 7.81,
"learning_rate": 2.1917808219178083e-05,
"loss": 0.0561,
"step": 85500
},
{
"epoch": 7.85,
"learning_rate": 2.1461187214611872e-05,
"loss": 0.0584,
"step": 86000
},
{
"epoch": 7.9,
"learning_rate": 2.100456621004566e-05,
"loss": 0.0611,
"step": 86500
},
{
"epoch": 7.95,
"learning_rate": 2.0547945205479453e-05,
"loss": 0.0618,
"step": 87000
},
{
"epoch": 7.99,
"learning_rate": 2.009132420091324e-05,
"loss": 0.0615,
"step": 87500
},
{
"epoch": 8.0,
"eval_avg_block": 0.0,
"eval_exact_match": 82.76253547776727,
"eval_f1": 90.5043271801771,
"eval_loss": 0.9232891798019409,
"eval_runtime": 909.3529,
"eval_samples_per_second": 11.717,
"eval_steps_per_second": 0.184,
"step": 87600
},
{
"epoch": 8.04,
"learning_rate": 1.9634703196347033e-05,
"loss": 0.0478,
"step": 88000
},
{
"epoch": 8.08,
"learning_rate": 1.9178082191780822e-05,
"loss": 0.0494,
"step": 88500
},
{
"epoch": 8.13,
"learning_rate": 1.872146118721461e-05,
"loss": 0.0473,
"step": 89000
},
{
"epoch": 8.17,
"learning_rate": 1.8264840182648402e-05,
"loss": 0.0421,
"step": 89500
},
{
"epoch": 8.22,
"learning_rate": 1.780821917808219e-05,
"loss": 0.0372,
"step": 90000
},
{
"epoch": 8.26,
"learning_rate": 1.7351598173515983e-05,
"loss": 0.0496,
"step": 90500
},
{
"epoch": 8.31,
"learning_rate": 1.689497716894977e-05,
"loss": 0.0433,
"step": 91000
},
{
"epoch": 8.36,
"learning_rate": 1.643835616438356e-05,
"loss": 0.0458,
"step": 91500
},
{
"epoch": 8.4,
"learning_rate": 1.5981735159817352e-05,
"loss": 0.0501,
"step": 92000
},
{
"epoch": 8.45,
"learning_rate": 1.552511415525114e-05,
"loss": 0.0477,
"step": 92500
},
{
"epoch": 8.49,
"learning_rate": 1.5068493150684931e-05,
"loss": 0.0448,
"step": 93000
},
{
"epoch": 8.5,
"eval_avg_block": 0.0,
"eval_exact_match": 82.55439924314096,
"eval_f1": 90.46637956923067,
"eval_loss": 1.0223475694656372,
"eval_runtime": 911.1533,
"eval_samples_per_second": 11.694,
"eval_steps_per_second": 0.183,
"step": 93075
},
{
"epoch": 8.54,
"learning_rate": 1.4611872146118721e-05,
"loss": 0.0444,
"step": 93500
},
{
"epoch": 8.58,
"learning_rate": 1.4155251141552511e-05,
"loss": 0.0485,
"step": 94000
},
{
"epoch": 8.63,
"learning_rate": 1.3698630136986302e-05,
"loss": 0.0433,
"step": 94500
},
{
"epoch": 8.68,
"learning_rate": 1.3242009132420092e-05,
"loss": 0.041,
"step": 95000
},
{
"epoch": 8.72,
"learning_rate": 1.278538812785388e-05,
"loss": 0.0454,
"step": 95500
},
{
"epoch": 8.77,
"learning_rate": 1.2328767123287671e-05,
"loss": 0.0414,
"step": 96000
},
{
"epoch": 8.81,
"learning_rate": 1.1872146118721461e-05,
"loss": 0.0467,
"step": 96500
},
{
"epoch": 8.86,
"learning_rate": 1.1415525114155251e-05,
"loss": 0.0443,
"step": 97000
},
{
"epoch": 8.9,
"learning_rate": 1.0958904109589042e-05,
"loss": 0.0479,
"step": 97500
},
{
"epoch": 8.95,
"learning_rate": 1.050228310502283e-05,
"loss": 0.0433,
"step": 98000
},
{
"epoch": 9.0,
"learning_rate": 1.004566210045662e-05,
"loss": 0.0417,
"step": 98500
},
{
"epoch": 9.0,
"eval_avg_block": 0.0,
"eval_exact_match": 82.39356669820246,
"eval_f1": 90.33816430966378,
"eval_loss": 1.0318303108215332,
"eval_runtime": 909.0606,
"eval_samples_per_second": 11.721,
"eval_steps_per_second": 0.184,
"step": 98550
},
{
"epoch": 9.04,
"learning_rate": 9.589041095890411e-06,
"loss": 0.036,
"step": 99000
},
{
"epoch": 9.09,
"learning_rate": 9.132420091324201e-06,
"loss": 0.0347,
"step": 99500
},
{
"epoch": 9.13,
"learning_rate": 8.675799086757991e-06,
"loss": 0.0358,
"step": 100000
},
{
"epoch": 9.18,
"learning_rate": 8.21917808219178e-06,
"loss": 0.0346,
"step": 100500
},
{
"epoch": 9.22,
"learning_rate": 7.76255707762557e-06,
"loss": 0.0358,
"step": 101000
},
{
"epoch": 9.27,
"learning_rate": 7.305936073059361e-06,
"loss": 0.0419,
"step": 101500
},
{
"epoch": 9.32,
"learning_rate": 6.849315068493151e-06,
"loss": 0.0415,
"step": 102000
},
{
"epoch": 9.36,
"learning_rate": 6.39269406392694e-06,
"loss": 0.0335,
"step": 102500
},
{
"epoch": 9.41,
"learning_rate": 5.936073059360731e-06,
"loss": 0.034,
"step": 103000
},
{
"epoch": 9.45,
"learning_rate": 5.479452054794521e-06,
"loss": 0.0406,
"step": 103500
},
{
"epoch": 9.5,
"learning_rate": 5.02283105022831e-06,
"loss": 0.0326,
"step": 104000
},
{
"epoch": 9.5,
"eval_avg_block": 0.0,
"eval_exact_match": 82.06244087038789,
"eval_f1": 90.1959647507603,
"eval_loss": 1.0786569118499756,
"eval_runtime": 911.242,
"eval_samples_per_second": 11.693,
"eval_steps_per_second": 0.183,
"step": 104025
},
{
"epoch": 9.54,
"learning_rate": 4.566210045662101e-06,
"loss": 0.0362,
"step": 104500
},
{
"epoch": 9.59,
"learning_rate": 4.10958904109589e-06,
"loss": 0.0388,
"step": 105000
},
{
"epoch": 9.63,
"learning_rate": 3.6529680365296803e-06,
"loss": 0.0401,
"step": 105500
},
{
"epoch": 9.68,
"learning_rate": 3.19634703196347e-06,
"loss": 0.0396,
"step": 106000
},
{
"epoch": 9.73,
"learning_rate": 2.7397260273972604e-06,
"loss": 0.0336,
"step": 106500
},
{
"epoch": 9.77,
"learning_rate": 2.2831050228310503e-06,
"loss": 0.041,
"step": 107000
},
{
"epoch": 9.82,
"learning_rate": 1.8264840182648401e-06,
"loss": 0.0399,
"step": 107500
},
{
"epoch": 9.86,
"learning_rate": 1.3698630136986302e-06,
"loss": 0.0328,
"step": 108000
},
{
"epoch": 9.91,
"learning_rate": 9.132420091324201e-07,
"loss": 0.0373,
"step": 108500
},
{
"epoch": 9.95,
"learning_rate": 4.5662100456621004e-07,
"loss": 0.0378,
"step": 109000
},
{
"epoch": 10.0,
"learning_rate": 0.0,
"loss": 0.0332,
"step": 109500
},
{
"epoch": 10.0,
"eval_avg_block": 0.0,
"eval_exact_match": 82.19489120151371,
"eval_f1": 90.25266366616492,
"eval_loss": 1.07804274559021,
"eval_runtime": 908.2606,
"eval_samples_per_second": 11.731,
"eval_steps_per_second": 0.184,
"step": 109500
},
{
"epoch": 10.0,
"step": 109500,
"total_flos": 1.89655978082304e+18,
"train_loss": 0.2221686732322658,
"train_runtime": 157184.0634,
"train_samples_per_second": 5.573,
"train_steps_per_second": 0.697
}
],
"max_steps": 109500,
"num_train_epochs": 10,
"total_flos": 1.89655978082304e+18,
"trial_name": null,
"trial_params": null
}