| { | |
| "best_metric": 90.5043271801771, | |
| "best_model_checkpoint": "./save/squad_t5_large_weighted_ce/checkpoint-87600", | |
| "epoch": 10.0, | |
| "global_step": 109500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.954337899543378e-05, | |
| "loss": 1.8164, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.908675799086759e-05, | |
| "loss": 1.3696, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.863013698630137e-05, | |
| "loss": 1.2016, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 9.817351598173516e-05, | |
| "loss": 1.0794, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 9.771689497716895e-05, | |
| "loss": 1.0045, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 9.726027397260274e-05, | |
| "loss": 0.9416, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 9.680365296803654e-05, | |
| "loss": 0.8644, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 9.634703196347033e-05, | |
| "loss": 0.8424, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 9.58904109589041e-05, | |
| "loss": 0.808, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 9.543378995433791e-05, | |
| "loss": 0.7564, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 80.65279091769158, | |
| "eval_f1": 89.34215607214264, | |
| "eval_loss": 0.6778247952461243, | |
| "eval_runtime": 912.0299, | |
| "eval_samples_per_second": 11.683, | |
| "eval_steps_per_second": 0.183, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 9.497716894977169e-05, | |
| "loss": 0.7448, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 9.452054794520548e-05, | |
| "loss": 0.7018, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 9.406392694063927e-05, | |
| "loss": 0.688, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 9.360730593607307e-05, | |
| "loss": 0.6676, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.315068493150684e-05, | |
| "loss": 0.6728, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 9.269406392694065e-05, | |
| "loss": 0.6271, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 9.223744292237443e-05, | |
| "loss": 0.6221, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.178082191780822e-05, | |
| "loss": 0.6188, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 9.132420091324201e-05, | |
| "loss": 0.6049, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 9.08675799086758e-05, | |
| "loss": 0.6073, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.041095890410958e-05, | |
| "loss": 0.5808, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 81.17313150425733, | |
| "eval_f1": 89.63797178932128, | |
| "eval_loss": 0.5828052759170532, | |
| "eval_runtime": 910.6332, | |
| "eval_samples_per_second": 11.701, | |
| "eval_steps_per_second": 0.183, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 8.995433789954339e-05, | |
| "loss": 0.5682, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 8.949771689497717e-05, | |
| "loss": 0.4419, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 8.904109589041096e-05, | |
| "loss": 0.4315, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 8.858447488584475e-05, | |
| "loss": 0.4362, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 8.812785388127854e-05, | |
| "loss": 0.4269, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 8.767123287671233e-05, | |
| "loss": 0.4429, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 8.721461187214613e-05, | |
| "loss": 0.4262, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 8.67579908675799e-05, | |
| "loss": 0.4246, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 8.630136986301371e-05, | |
| "loss": 0.4423, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 8.584474885844749e-05, | |
| "loss": 0.4335, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 8.538812785388128e-05, | |
| "loss": 0.4052, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 81.09744560075686, | |
| "eval_f1": 89.68888250735179, | |
| "eval_loss": 0.5694797039031982, | |
| "eval_runtime": 912.5514, | |
| "eval_samples_per_second": 11.676, | |
| "eval_steps_per_second": 0.183, | |
| "step": 16425 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 8.493150684931507e-05, | |
| "loss": 0.4188, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 8.447488584474886e-05, | |
| "loss": 0.421, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 8.401826484018264e-05, | |
| "loss": 0.4058, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 8.356164383561645e-05, | |
| "loss": 0.4133, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 8.310502283105023e-05, | |
| "loss": 0.4055, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 8.264840182648402e-05, | |
| "loss": 0.4151, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 8.219178082191781e-05, | |
| "loss": 0.4099, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 8.17351598173516e-05, | |
| "loss": 0.4065, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 8.127853881278538e-05, | |
| "loss": 0.4161, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 8.082191780821919e-05, | |
| "loss": 0.4022, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 8.036529680365296e-05, | |
| "loss": 0.4174, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 81.64616840113528, | |
| "eval_f1": 89.87366383095139, | |
| "eval_loss": 0.5433966517448425, | |
| "eval_runtime": 909.7255, | |
| "eval_samples_per_second": 11.712, | |
| "eval_steps_per_second": 0.184, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 7.990867579908676e-05, | |
| "loss": 0.3775, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 7.945205479452055e-05, | |
| "loss": 0.2915, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 7.899543378995434e-05, | |
| "loss": 0.2829, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 7.853881278538813e-05, | |
| "loss": 0.2738, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 7.808219178082192e-05, | |
| "loss": 0.2904, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 7.76255707762557e-05, | |
| "loss": 0.2788, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 7.716894977168951e-05, | |
| "loss": 0.2901, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 7.671232876712329e-05, | |
| "loss": 0.2819, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 7.625570776255708e-05, | |
| "loss": 0.2866, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 7.579908675799087e-05, | |
| "loss": 0.2993, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 7.534246575342466e-05, | |
| "loss": 0.2817, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 81.73131504257331, | |
| "eval_f1": 90.11057612224653, | |
| "eval_loss": 0.5817934274673462, | |
| "eval_runtime": 911.1532, | |
| "eval_samples_per_second": 11.694, | |
| "eval_steps_per_second": 0.183, | |
| "step": 27375 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 7.488584474885844e-05, | |
| "loss": 0.2821, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.442922374429225e-05, | |
| "loss": 0.2881, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 7.397260273972603e-05, | |
| "loss": 0.3027, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 7.351598173515982e-05, | |
| "loss": 0.2849, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 7.305936073059361e-05, | |
| "loss": 0.2829, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 7.26027397260274e-05, | |
| "loss": 0.2895, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 7.21461187214612e-05, | |
| "loss": 0.2951, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 7.168949771689499e-05, | |
| "loss": 0.2771, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 7.123287671232876e-05, | |
| "loss": 0.297, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 7.077625570776256e-05, | |
| "loss": 0.29, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 7.031963470319635e-05, | |
| "loss": 0.2758, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 81.97729422894986, | |
| "eval_f1": 89.89881389854972, | |
| "eval_loss": 0.5549534559249878, | |
| "eval_runtime": 906.3902, | |
| "eval_samples_per_second": 11.755, | |
| "eval_steps_per_second": 0.184, | |
| "step": 32850 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 6.986301369863014e-05, | |
| "loss": 0.2611, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 6.940639269406393e-05, | |
| "loss": 0.1965, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 6.894977168949772e-05, | |
| "loss": 0.2034, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 6.84931506849315e-05, | |
| "loss": 0.1989, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 6.803652968036531e-05, | |
| "loss": 0.1973, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 6.757990867579909e-05, | |
| "loss": 0.1983, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 6.712328767123288e-05, | |
| "loss": 0.2208, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.1901, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 6.621004566210046e-05, | |
| "loss": 0.2019, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 6.575342465753424e-05, | |
| "loss": 0.213, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 6.529680365296805e-05, | |
| "loss": 0.2143, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 82.47871333964049, | |
| "eval_f1": 90.24537185554753, | |
| "eval_loss": 0.6115335822105408, | |
| "eval_runtime": 908.6001, | |
| "eval_samples_per_second": 11.727, | |
| "eval_steps_per_second": 0.184, | |
| "step": 38325 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 6.484018264840182e-05, | |
| "loss": 0.2103, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 6.438356164383562e-05, | |
| "loss": 0.205, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 6.392694063926941e-05, | |
| "loss": 0.1937, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 6.34703196347032e-05, | |
| "loss": 0.2139, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 6.301369863013699e-05, | |
| "loss": 0.2113, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 6.255707762557078e-05, | |
| "loss": 0.2018, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 6.210045662100456e-05, | |
| "loss": 0.2075, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 6.164383561643835e-05, | |
| "loss": 0.2113, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 6.118721461187215e-05, | |
| "loss": 0.2039, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 6.073059360730594e-05, | |
| "loss": 0.1937, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 6.0273972602739724e-05, | |
| "loss": 0.2084, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 82.4314096499527, | |
| "eval_f1": 90.31901460999805, | |
| "eval_loss": 0.6043493747711182, | |
| "eval_runtime": 909.6572, | |
| "eval_samples_per_second": 11.713, | |
| "eval_steps_per_second": 0.184, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 5.981735159817352e-05, | |
| "loss": 0.182, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 5.936073059360731e-05, | |
| "loss": 0.1508, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 5.89041095890411e-05, | |
| "loss": 0.14, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 5.8447488584474885e-05, | |
| "loss": 0.1492, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 5.7990867579908683e-05, | |
| "loss": 0.1541, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 5.753424657534247e-05, | |
| "loss": 0.1479, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 5.707762557077626e-05, | |
| "loss": 0.1401, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 5.6621004566210046e-05, | |
| "loss": 0.1558, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 5.616438356164384e-05, | |
| "loss": 0.1523, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 5.570776255707762e-05, | |
| "loss": 0.1414, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 5.525114155251142e-05, | |
| "loss": 0.1476, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 82.41248817407758, | |
| "eval_f1": 90.3531697774033, | |
| "eval_loss": 0.7035377025604248, | |
| "eval_runtime": 909.9008, | |
| "eval_samples_per_second": 11.71, | |
| "eval_steps_per_second": 0.184, | |
| "step": 49275 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 5.479452054794521e-05, | |
| "loss": 0.1499, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 5.4337899543379e-05, | |
| "loss": 0.1567, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 5.3881278538812784e-05, | |
| "loss": 0.1436, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 5.342465753424658e-05, | |
| "loss": 0.1557, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 5.296803652968037e-05, | |
| "loss": 0.1491, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 5.251141552511416e-05, | |
| "loss": 0.1338, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 5.2054794520547945e-05, | |
| "loss": 0.137, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 5.159817351598174e-05, | |
| "loss": 0.1371, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 5.114155251141552e-05, | |
| "loss": 0.1561, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 5.068493150684932e-05, | |
| "loss": 0.1501, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 5.0228310502283106e-05, | |
| "loss": 0.1543, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 82.2421948912015, | |
| "eval_f1": 90.37064084267695, | |
| "eval_loss": 0.6705102324485779, | |
| "eval_runtime": 913.1361, | |
| "eval_samples_per_second": 11.669, | |
| "eval_steps_per_second": 0.183, | |
| "step": 54750 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 4.977168949771689e-05, | |
| "loss": 0.1252, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 4.9315068493150684e-05, | |
| "loss": 0.1034, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 4.8858447488584476e-05, | |
| "loss": 0.1046, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 4.840182648401827e-05, | |
| "loss": 0.1137, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 4.794520547945205e-05, | |
| "loss": 0.1008, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 4.7488584474885845e-05, | |
| "loss": 0.1038, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 4.703196347031964e-05, | |
| "loss": 0.1057, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 4.657534246575342e-05, | |
| "loss": 0.1116, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 4.6118721461187214e-05, | |
| "loss": 0.1068, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 4.5662100456621006e-05, | |
| "loss": 0.1094, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 4.520547945205479e-05, | |
| "loss": 0.1063, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 82.73415326395458, | |
| "eval_f1": 90.38623521854846, | |
| "eval_loss": 0.7872011065483093, | |
| "eval_runtime": 910.5758, | |
| "eval_samples_per_second": 11.701, | |
| "eval_steps_per_second": 0.183, | |
| "step": 60225 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 4.474885844748858e-05, | |
| "loss": 0.1028, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 4.4292237442922375e-05, | |
| "loss": 0.1056, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 4.383561643835617e-05, | |
| "loss": 0.1019, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 4.337899543378995e-05, | |
| "loss": 0.1081, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 4.2922374429223744e-05, | |
| "loss": 0.1095, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 4.2465753424657536e-05, | |
| "loss": 0.1045, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 4.200913242009132e-05, | |
| "loss": 0.1027, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 4.155251141552511e-05, | |
| "loss": 0.1056, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 4.1095890410958905e-05, | |
| "loss": 0.116, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 4.063926940639269e-05, | |
| "loss": 0.0982, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 4.018264840182648e-05, | |
| "loss": 0.1134, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 81.95837275307474, | |
| "eval_f1": 90.19705201865645, | |
| "eval_loss": 0.7851516604423523, | |
| "eval_runtime": 912.2321, | |
| "eval_samples_per_second": 11.68, | |
| "eval_steps_per_second": 0.183, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 3.9726027397260274e-05, | |
| "loss": 0.0826, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 3.9269406392694066e-05, | |
| "loss": 0.0789, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 3.881278538812785e-05, | |
| "loss": 0.081, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 3.8356164383561644e-05, | |
| "loss": 0.0797, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 3.7899543378995436e-05, | |
| "loss": 0.075, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 3.744292237442922e-05, | |
| "loss": 0.0801, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 3.698630136986301e-05, | |
| "loss": 0.0774, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 3.6529680365296805e-05, | |
| "loss": 0.0767, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 3.60730593607306e-05, | |
| "loss": 0.0775, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 3.561643835616438e-05, | |
| "loss": 0.0835, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 3.5159817351598174e-05, | |
| "loss": 0.0723, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 82.06244087038789, | |
| "eval_f1": 90.1486221845852, | |
| "eval_loss": 0.886091947555542, | |
| "eval_runtime": 911.8713, | |
| "eval_samples_per_second": 11.685, | |
| "eval_steps_per_second": 0.183, | |
| "step": 71175 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 3.4703196347031966e-05, | |
| "loss": 0.0732, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 3.424657534246575e-05, | |
| "loss": 0.0792, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 3.378995433789954e-05, | |
| "loss": 0.0861, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.0781, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 3.287671232876712e-05, | |
| "loss": 0.0799, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 3.242009132420091e-05, | |
| "loss": 0.0777, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 3.1963470319634704e-05, | |
| "loss": 0.0796, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 3.1506849315068496e-05, | |
| "loss": 0.0766, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 3.105022831050228e-05, | |
| "loss": 0.077, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 3.059360730593607e-05, | |
| "loss": 0.0786, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 3.0136986301369862e-05, | |
| "loss": 0.0813, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 82.29895931882687, | |
| "eval_f1": 90.39702429599244, | |
| "eval_loss": 0.8638301491737366, | |
| "eval_runtime": 912.084, | |
| "eval_samples_per_second": 11.682, | |
| "eval_steps_per_second": 0.183, | |
| "step": 76650 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 2.9680365296803654e-05, | |
| "loss": 0.0578, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 2.9223744292237442e-05, | |
| "loss": 0.0605, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 2.8767123287671234e-05, | |
| "loss": 0.0503, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 2.8310502283105023e-05, | |
| "loss": 0.0538, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 2.785388127853881e-05, | |
| "loss": 0.0629, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 2.7397260273972603e-05, | |
| "loss": 0.0535, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 2.6940639269406392e-05, | |
| "loss": 0.0599, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 2.6484018264840184e-05, | |
| "loss": 0.0597, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 2.6027397260273973e-05, | |
| "loss": 0.0545, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 2.557077625570776e-05, | |
| "loss": 0.0602, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 2.5114155251141553e-05, | |
| "loss": 0.0557, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 82.2421948912015, | |
| "eval_f1": 90.29358670439699, | |
| "eval_loss": 0.9340036511421204, | |
| "eval_runtime": 908.9095, | |
| "eval_samples_per_second": 11.723, | |
| "eval_steps_per_second": 0.184, | |
| "step": 82125 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 2.4657534246575342e-05, | |
| "loss": 0.06, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 2.4200913242009134e-05, | |
| "loss": 0.0627, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 2.3744292237442922e-05, | |
| "loss": 0.0575, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 2.328767123287671e-05, | |
| "loss": 0.0558, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 2.2831050228310503e-05, | |
| "loss": 0.0625, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 2.237442922374429e-05, | |
| "loss": 0.0614, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 2.1917808219178083e-05, | |
| "loss": 0.0561, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 2.1461187214611872e-05, | |
| "loss": 0.0584, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 2.100456621004566e-05, | |
| "loss": 0.0611, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 2.0547945205479453e-05, | |
| "loss": 0.0618, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "learning_rate": 2.009132420091324e-05, | |
| "loss": 0.0615, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 82.76253547776727, | |
| "eval_f1": 90.5043271801771, | |
| "eval_loss": 0.9232891798019409, | |
| "eval_runtime": 909.3529, | |
| "eval_samples_per_second": 11.717, | |
| "eval_steps_per_second": 0.184, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 1.9634703196347033e-05, | |
| "loss": 0.0478, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 1.9178082191780822e-05, | |
| "loss": 0.0494, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "learning_rate": 1.872146118721461e-05, | |
| "loss": 0.0473, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 1.8264840182648402e-05, | |
| "loss": 0.0421, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 1.780821917808219e-05, | |
| "loss": 0.0372, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "learning_rate": 1.7351598173515983e-05, | |
| "loss": 0.0496, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "learning_rate": 1.689497716894977e-05, | |
| "loss": 0.0433, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "learning_rate": 1.643835616438356e-05, | |
| "loss": 0.0458, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "learning_rate": 1.5981735159817352e-05, | |
| "loss": 0.0501, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "learning_rate": 1.552511415525114e-05, | |
| "loss": 0.0477, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 8.49, | |
| "learning_rate": 1.5068493150684931e-05, | |
| "loss": 0.0448, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 82.55439924314096, | |
| "eval_f1": 90.46637956923067, | |
| "eval_loss": 1.0223475694656372, | |
| "eval_runtime": 911.1533, | |
| "eval_samples_per_second": 11.694, | |
| "eval_steps_per_second": 0.183, | |
| "step": 93075 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 1.4611872146118721e-05, | |
| "loss": 0.0444, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 8.58, | |
| "learning_rate": 1.4155251141552511e-05, | |
| "loss": 0.0485, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 8.63, | |
| "learning_rate": 1.3698630136986302e-05, | |
| "loss": 0.0433, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "learning_rate": 1.3242009132420092e-05, | |
| "loss": 0.041, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "learning_rate": 1.278538812785388e-05, | |
| "loss": 0.0454, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 8.77, | |
| "learning_rate": 1.2328767123287671e-05, | |
| "loss": 0.0414, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "learning_rate": 1.1872146118721461e-05, | |
| "loss": 0.0467, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "learning_rate": 1.1415525114155251e-05, | |
| "loss": 0.0443, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "learning_rate": 1.0958904109589042e-05, | |
| "loss": 0.0479, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 1.050228310502283e-05, | |
| "loss": 0.0433, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 1.004566210045662e-05, | |
| "loss": 0.0417, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 82.39356669820246, | |
| "eval_f1": 90.33816430966378, | |
| "eval_loss": 1.0318303108215332, | |
| "eval_runtime": 909.0606, | |
| "eval_samples_per_second": 11.721, | |
| "eval_steps_per_second": 0.184, | |
| "step": 98550 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "learning_rate": 9.589041095890411e-06, | |
| "loss": 0.036, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "learning_rate": 9.132420091324201e-06, | |
| "loss": 0.0347, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 8.675799086757991e-06, | |
| "loss": 0.0358, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "learning_rate": 8.21917808219178e-06, | |
| "loss": 0.0346, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "learning_rate": 7.76255707762557e-06, | |
| "loss": 0.0358, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "learning_rate": 7.305936073059361e-06, | |
| "loss": 0.0419, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 6.849315068493151e-06, | |
| "loss": 0.0415, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "learning_rate": 6.39269406392694e-06, | |
| "loss": 0.0335, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "learning_rate": 5.936073059360731e-06, | |
| "loss": 0.034, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "learning_rate": 5.479452054794521e-06, | |
| "loss": 0.0406, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 5.02283105022831e-06, | |
| "loss": 0.0326, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 82.06244087038789, | |
| "eval_f1": 90.1959647507603, | |
| "eval_loss": 1.0786569118499756, | |
| "eval_runtime": 911.242, | |
| "eval_samples_per_second": 11.693, | |
| "eval_steps_per_second": 0.183, | |
| "step": 104025 | |
| }, | |
| { | |
| "epoch": 9.54, | |
| "learning_rate": 4.566210045662101e-06, | |
| "loss": 0.0362, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "learning_rate": 4.10958904109589e-06, | |
| "loss": 0.0388, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 3.6529680365296803e-06, | |
| "loss": 0.0401, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "learning_rate": 3.19634703196347e-06, | |
| "loss": 0.0396, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "learning_rate": 2.7397260273972604e-06, | |
| "loss": 0.0336, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "learning_rate": 2.2831050228310503e-06, | |
| "loss": 0.041, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "learning_rate": 1.8264840182648401e-06, | |
| "loss": 0.0399, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 1.3698630136986302e-06, | |
| "loss": 0.0328, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "learning_rate": 9.132420091324201e-07, | |
| "loss": 0.0373, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 9.95, | |
| "learning_rate": 4.5662100456621004e-07, | |
| "loss": 0.0378, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0332, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_avg_block": 0.0, | |
| "eval_exact_match": 82.19489120151371, | |
| "eval_f1": 90.25266366616492, | |
| "eval_loss": 1.07804274559021, | |
| "eval_runtime": 908.2606, | |
| "eval_samples_per_second": 11.731, | |
| "eval_steps_per_second": 0.184, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 109500, | |
| "total_flos": 1.89655978082304e+18, | |
| "train_loss": 0.2221686732322658, | |
| "train_runtime": 157184.0634, | |
| "train_samples_per_second": 5.573, | |
| "train_steps_per_second": 0.697 | |
| } | |
| ], | |
| "max_steps": 109500, | |
| "num_train_epochs": 10, | |
| "total_flos": 1.89655978082304e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |