| { |
| "best_global_step": 5000, |
| "best_metric": 80.46875241956705, |
| "best_model_checkpoint": "bert-soccer-qa/checkpoint-5000", |
| "epoch": 0.7768800497203232, |
| "eval_steps": 100, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007768800497203232, |
| "grad_norm": 9.272425651550293, |
| "learning_rate": 9.99925419515227e-06, |
| "loss": 1.1446, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.015537600994406464, |
| "grad_norm": 12.237289428710938, |
| "learning_rate": 9.99847731510255e-06, |
| "loss": 0.9455, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.015537600994406464, |
| "eval_HasAns_exact": 69.85208252238225, |
| "eval_HasAns_f1": 77.13603651720207, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 69.85208252238225, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 77.13603651720207, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 69.85208252238225, |
| "eval_f1": 77.13603651720207, |
| "eval_loss": 0.8127343058586121, |
| "eval_runtime": 202.0338, |
| "eval_samples_per_second": 127.157, |
| "eval_steps_per_second": 7.949, |
| "eval_total": 25690, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.023306401491609695, |
| "grad_norm": 5.626998424530029, |
| "learning_rate": 9.997700435052828e-06, |
| "loss": 0.8129, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.031075201988812928, |
| "grad_norm": 6.87526798248291, |
| "learning_rate": 9.996923555003108e-06, |
| "loss": 0.8743, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.031075201988812928, |
| "eval_HasAns_exact": 70.21409108602569, |
| "eval_HasAns_f1": 77.38207884007946, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 70.21409108602569, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 77.38207884007946, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 70.21409108602569, |
| "eval_f1": 77.38207884007946, |
| "eval_loss": 0.7383215427398682, |
| "eval_runtime": 202.5157, |
| "eval_samples_per_second": 126.854, |
| "eval_steps_per_second": 7.93, |
| "eval_total": 25690, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.03884400248601616, |
| "grad_norm": 11.461019515991211, |
| "learning_rate": 9.996146674953388e-06, |
| "loss": 0.7825, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.04661280298321939, |
| "grad_norm": 9.630631446838379, |
| "learning_rate": 9.995369794903668e-06, |
| "loss": 0.7189, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.04661280298321939, |
| "eval_HasAns_exact": 71.03931490852472, |
| "eval_HasAns_f1": 78.08897109574, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 71.03931490852472, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.08897109574, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 71.03931490852472, |
| "eval_f1": 78.08897109574, |
| "eval_loss": 0.7194859981536865, |
| "eval_runtime": 202.3679, |
| "eval_samples_per_second": 126.947, |
| "eval_steps_per_second": 7.936, |
| "eval_total": 25690, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.054381603480422626, |
| "grad_norm": 11.377346992492676, |
| "learning_rate": 9.994592914853948e-06, |
| "loss": 0.7889, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.062150403977625855, |
| "grad_norm": 7.450821399688721, |
| "learning_rate": 9.993816034804228e-06, |
| "loss": 0.7367, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.062150403977625855, |
| "eval_HasAns_exact": 71.08991825613079, |
| "eval_HasAns_f1": 78.00619214091435, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 71.08991825613079, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.00619214091435, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 71.08991825613079, |
| "eval_f1": 78.00619214091435, |
| "eval_loss": 0.683600902557373, |
| "eval_runtime": 202.4728, |
| "eval_samples_per_second": 126.881, |
| "eval_steps_per_second": 7.932, |
| "eval_total": 25690, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.06991920447482909, |
| "grad_norm": 14.92029857635498, |
| "learning_rate": 9.993039154754508e-06, |
| "loss": 0.7838, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.07768800497203232, |
| "grad_norm": 9.030844688415527, |
| "learning_rate": 9.992262274704786e-06, |
| "loss": 0.6469, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.07768800497203232, |
| "eval_HasAns_exact": 71.42467886337096, |
| "eval_HasAns_f1": 78.23334576784337, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 71.42467886337096, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.23334576784337, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 71.42467886337096, |
| "eval_f1": 78.23334576784337, |
| "eval_loss": 0.6646179556846619, |
| "eval_runtime": 202.6333, |
| "eval_samples_per_second": 126.781, |
| "eval_steps_per_second": 7.926, |
| "eval_total": 25690, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.08545680546923555, |
| "grad_norm": 10.103166580200195, |
| "learning_rate": 9.991485394655066e-06, |
| "loss": 0.725, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.09322560596643878, |
| "grad_norm": 6.986274242401123, |
| "learning_rate": 9.990708514605346e-06, |
| "loss": 0.6657, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.09322560596643878, |
| "eval_HasAns_exact": 70.2841572596341, |
| "eval_HasAns_f1": 77.1247889416155, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 70.2841572596341, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 77.1247889416155, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 70.2841572596341, |
| "eval_f1": 77.1247889416155, |
| "eval_loss": 0.6492825150489807, |
| "eval_runtime": 203.3854, |
| "eval_samples_per_second": 126.312, |
| "eval_steps_per_second": 7.896, |
| "eval_total": 25690, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.10099440646364201, |
| "grad_norm": 10.636602401733398, |
| "learning_rate": 9.989931634555626e-06, |
| "loss": 0.7337, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.10876320696084525, |
| "grad_norm": 8.252824783325195, |
| "learning_rate": 9.989154754505906e-06, |
| "loss": 0.662, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.10876320696084525, |
| "eval_HasAns_exact": 72.22654729466718, |
| "eval_HasAns_f1": 79.0143805614001, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.22654729466718, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.0143805614001, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.22654729466718, |
| "eval_f1": 79.0143805614001, |
| "eval_loss": 0.6340453028678894, |
| "eval_runtime": 202.8216, |
| "eval_samples_per_second": 126.663, |
| "eval_steps_per_second": 7.918, |
| "eval_total": 25690, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.11653200745804848, |
| "grad_norm": 8.463785171508789, |
| "learning_rate": 9.988377874456184e-06, |
| "loss": 0.7265, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.12430080795525171, |
| "grad_norm": 9.748174667358398, |
| "learning_rate": 9.987600994406464e-06, |
| "loss": 0.6969, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.12430080795525171, |
| "eval_HasAns_exact": 72.1292331646555, |
| "eval_HasAns_f1": 78.83892105701732, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.1292331646555, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.83892105701732, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.1292331646555, |
| "eval_f1": 78.83892105701732, |
| "eval_loss": 0.6085864901542664, |
| "eval_runtime": 202.3194, |
| "eval_samples_per_second": 126.977, |
| "eval_steps_per_second": 7.938, |
| "eval_total": 25690, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.13206960845245494, |
| "grad_norm": 9.786517143249512, |
| "learning_rate": 9.986824114356744e-06, |
| "loss": 0.7111, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.13983840894965818, |
| "grad_norm": 8.391840934753418, |
| "learning_rate": 9.986047234307024e-06, |
| "loss": 0.669, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.13983840894965818, |
| "eval_HasAns_exact": 71.89567925262749, |
| "eval_HasAns_f1": 78.61371443349637, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 71.89567925262749, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.61371443349637, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 71.89567925262749, |
| "eval_f1": 78.61371443349637, |
| "eval_loss": 0.5937665104866028, |
| "eval_runtime": 202.8476, |
| "eval_samples_per_second": 126.647, |
| "eval_steps_per_second": 7.917, |
| "eval_total": 25690, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.1476072094468614, |
| "grad_norm": 11.602773666381836, |
| "learning_rate": 9.985270354257304e-06, |
| "loss": 0.7253, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.15537600994406464, |
| "grad_norm": 9.150772094726562, |
| "learning_rate": 9.984493474207582e-06, |
| "loss": 0.6676, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.15537600994406464, |
| "eval_HasAns_exact": 72.28493577267419, |
| "eval_HasAns_f1": 78.8876551339714, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.28493577267419, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.8876551339714, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.28493577267419, |
| "eval_f1": 78.8876551339714, |
| "eval_loss": 0.5816648602485657, |
| "eval_runtime": 202.8184, |
| "eval_samples_per_second": 126.665, |
| "eval_steps_per_second": 7.918, |
| "eval_total": 25690, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.16314481044126786, |
| "grad_norm": 10.605375289916992, |
| "learning_rate": 9.983716594157864e-06, |
| "loss": 0.7131, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.1709136109384711, |
| "grad_norm": 13.075970649719238, |
| "learning_rate": 9.982939714108144e-06, |
| "loss": 0.6664, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.1709136109384711, |
| "eval_HasAns_exact": 71.95406773063449, |
| "eval_HasAns_f1": 78.6814885378308, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 71.95406773063449, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.6814885378308, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 71.95406773063449, |
| "eval_f1": 78.6814885378308, |
| "eval_loss": 0.5695982575416565, |
| "eval_runtime": 202.5931, |
| "eval_samples_per_second": 126.806, |
| "eval_steps_per_second": 7.927, |
| "eval_total": 25690, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.17868241143567434, |
| "grad_norm": 12.333536148071289, |
| "learning_rate": 9.982162834058422e-06, |
| "loss": 0.6016, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.18645121193287756, |
| "grad_norm": 9.748809814453125, |
| "learning_rate": 9.981385954008702e-06, |
| "loss": 0.6006, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.18645121193287756, |
| "eval_HasAns_exact": 72.06305955624757, |
| "eval_HasAns_f1": 78.75337714295185, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.06305955624757, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.75337714295185, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.06305955624757, |
| "eval_f1": 78.75337714295185, |
| "eval_loss": 0.5660755038261414, |
| "eval_runtime": 202.8834, |
| "eval_samples_per_second": 126.624, |
| "eval_steps_per_second": 7.916, |
| "eval_total": 25690, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.1942200124300808, |
| "grad_norm": 9.964409828186035, |
| "learning_rate": 9.980609073958982e-06, |
| "loss": 0.6268, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.20198881292728402, |
| "grad_norm": 6.387030124664307, |
| "learning_rate": 9.979832193909262e-06, |
| "loss": 0.6111, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.20198881292728402, |
| "eval_HasAns_exact": 72.65862203191904, |
| "eval_HasAns_f1": 79.23512118883761, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.65862203191904, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.23512118883761, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.65862203191904, |
| "eval_f1": 79.23512118883761, |
| "eval_loss": 0.5586764216423035, |
| "eval_runtime": 203.0862, |
| "eval_samples_per_second": 126.498, |
| "eval_steps_per_second": 7.908, |
| "eval_total": 25690, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.20975761342448726, |
| "grad_norm": 9.103731155395508, |
| "learning_rate": 9.979055313859542e-06, |
| "loss": 0.566, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.2175264139216905, |
| "grad_norm": 13.135197639465332, |
| "learning_rate": 9.97827843380982e-06, |
| "loss": 0.5793, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.2175264139216905, |
| "eval_HasAns_exact": 72.39392759828728, |
| "eval_HasAns_f1": 79.00515970901382, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.39392759828728, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.00515970901382, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.39392759828728, |
| "eval_f1": 79.00515970901382, |
| "eval_loss": 0.5600055456161499, |
| "eval_runtime": 202.5804, |
| "eval_samples_per_second": 126.814, |
| "eval_steps_per_second": 7.928, |
| "eval_total": 25690, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.22529521441889372, |
| "grad_norm": 9.040102005004883, |
| "learning_rate": 9.9775015537601e-06, |
| "loss": 0.6309, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.23306401491609696, |
| "grad_norm": 11.526878356933594, |
| "learning_rate": 9.97672467371038e-06, |
| "loss": 0.6064, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.23306401491609696, |
| "eval_HasAns_exact": 72.74425846632931, |
| "eval_HasAns_f1": 79.36155045560372, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.74425846632931, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.36155045560372, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.74425846632931, |
| "eval_f1": 79.36155045560372, |
| "eval_loss": 0.5500572323799133, |
| "eval_runtime": 202.9048, |
| "eval_samples_per_second": 126.611, |
| "eval_steps_per_second": 7.915, |
| "eval_total": 25690, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.24083281541330018, |
| "grad_norm": 4.359344482421875, |
| "learning_rate": 9.97594779366066e-06, |
| "loss": 0.6823, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.24860161591050342, |
| "grad_norm": 9.813569068908691, |
| "learning_rate": 9.97517091361094e-06, |
| "loss": 0.6314, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.24860161591050342, |
| "eval_HasAns_exact": 72.27715064227326, |
| "eval_HasAns_f1": 78.8175772466993, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.27715064227326, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.8175772466993, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.27715064227326, |
| "eval_f1": 78.8175772466993, |
| "eval_loss": 0.5353918075561523, |
| "eval_runtime": 202.6516, |
| "eval_samples_per_second": 126.769, |
| "eval_steps_per_second": 7.925, |
| "eval_total": 25690, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.25637041640770664, |
| "grad_norm": 7.802361965179443, |
| "learning_rate": 9.974394033561218e-06, |
| "loss": 0.6026, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.2641392169049099, |
| "grad_norm": 15.137731552124023, |
| "learning_rate": 9.973617153511498e-06, |
| "loss": 0.6741, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.2641392169049099, |
| "eval_HasAns_exact": 72.1058777734527, |
| "eval_HasAns_f1": 78.67773271200794, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.1058777734527, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.67773271200794, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.1058777734527, |
| "eval_f1": 78.67773271200794, |
| "eval_loss": 0.5329614877700806, |
| "eval_runtime": 202.7192, |
| "eval_samples_per_second": 126.727, |
| "eval_steps_per_second": 7.922, |
| "eval_total": 25690, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.2719080174021131, |
| "grad_norm": 11.30902099609375, |
| "learning_rate": 9.972840273461778e-06, |
| "loss": 0.6069, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.27967681789931637, |
| "grad_norm": 9.311911582946777, |
| "learning_rate": 9.972063393412058e-06, |
| "loss": 0.5912, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.27967681789931637, |
| "eval_HasAns_exact": 72.24990268586998, |
| "eval_HasAns_f1": 78.7891482246884, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.24990268586998, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.7891482246884, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.24990268586998, |
| "eval_f1": 78.7891482246884, |
| "eval_loss": 0.5291240811347961, |
| "eval_runtime": 202.8091, |
| "eval_samples_per_second": 126.671, |
| "eval_steps_per_second": 7.919, |
| "eval_total": 25690, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.28744561839651955, |
| "grad_norm": 9.725021362304688, |
| "learning_rate": 9.971286513362338e-06, |
| "loss": 0.5943, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.2952144188937228, |
| "grad_norm": 8.321113586425781, |
| "learning_rate": 9.970509633312618e-06, |
| "loss": 0.584, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.2952144188937228, |
| "eval_HasAns_exact": 72.56909303230829, |
| "eval_HasAns_f1": 79.12955371544794, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.56909303230829, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.12955371544794, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.56909303230829, |
| "eval_f1": 79.12955371544794, |
| "eval_loss": 0.5198299288749695, |
| "eval_runtime": 202.5651, |
| "eval_samples_per_second": 126.823, |
| "eval_steps_per_second": 7.928, |
| "eval_total": 25690, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.30298321939092604, |
| "grad_norm": 11.973769187927246, |
| "learning_rate": 9.969732753262898e-06, |
| "loss": 0.5895, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.3107520198881293, |
| "grad_norm": 9.40645980834961, |
| "learning_rate": 9.968955873213176e-06, |
| "loss": 0.64, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3107520198881293, |
| "eval_HasAns_exact": 72.79096924873491, |
| "eval_HasAns_f1": 79.28733698122942, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.79096924873491, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.28733698122942, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.79096924873491, |
| "eval_f1": 79.28733698122942, |
| "eval_loss": 0.5117060542106628, |
| "eval_runtime": 203.1079, |
| "eval_samples_per_second": 126.485, |
| "eval_steps_per_second": 7.907, |
| "eval_total": 25690, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3185208203853325, |
| "grad_norm": 14.14991569519043, |
| "learning_rate": 9.968178993163456e-06, |
| "loss": 0.5559, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.3262896208825357, |
| "grad_norm": 10.641693115234375, |
| "learning_rate": 9.967402113113736e-06, |
| "loss": 0.5361, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.3262896208825357, |
| "eval_HasAns_exact": 73.13351498637603, |
| "eval_HasAns_f1": 79.60268173326928, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.13351498637603, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.60268173326928, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.13351498637603, |
| "eval_f1": 79.60268173326928, |
| "eval_loss": 0.5079160928726196, |
| "eval_runtime": 202.7995, |
| "eval_samples_per_second": 126.677, |
| "eval_steps_per_second": 7.919, |
| "eval_total": 25690, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.33405842137973896, |
| "grad_norm": 10.088223457336426, |
| "learning_rate": 9.966625233064016e-06, |
| "loss": 0.6151, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.3418272218769422, |
| "grad_norm": 10.863611221313477, |
| "learning_rate": 9.965848353014296e-06, |
| "loss": 0.5935, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.3418272218769422, |
| "eval_HasAns_exact": 72.9349941611522, |
| "eval_HasAns_f1": 79.49240914807372, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.9349941611522, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.49240914807372, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.9349941611522, |
| "eval_f1": 79.49240914807372, |
| "eval_loss": 0.5025383234024048, |
| "eval_runtime": 203.3956, |
| "eval_samples_per_second": 126.306, |
| "eval_steps_per_second": 7.896, |
| "eval_total": 25690, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.34959602237414544, |
| "grad_norm": 12.565064430236816, |
| "learning_rate": 9.965071472964574e-06, |
| "loss": 0.5667, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.3573648228713487, |
| "grad_norm": 5.066905975341797, |
| "learning_rate": 9.964294592914854e-06, |
| "loss": 0.5198, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.3573648228713487, |
| "eval_HasAns_exact": 72.6975476839237, |
| "eval_HasAns_f1": 79.25327470362464, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.6975476839237, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.25327470362464, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.6975476839237, |
| "eval_f1": 79.25327470362464, |
| "eval_loss": 0.4996239244937897, |
| "eval_runtime": 204.0149, |
| "eval_samples_per_second": 125.922, |
| "eval_steps_per_second": 7.872, |
| "eval_total": 25690, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.3651336233685519, |
| "grad_norm": 10.777655601501465, |
| "learning_rate": 9.963517712865134e-06, |
| "loss": 0.4983, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.3729024238657551, |
| "grad_norm": 6.770049095153809, |
| "learning_rate": 9.962740832815414e-06, |
| "loss": 0.5474, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.3729024238657551, |
| "eval_HasAns_exact": 73.29700272479565, |
| "eval_HasAns_f1": 79.7561576366954, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.29700272479565, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.7561576366954, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.29700272479565, |
| "eval_f1": 79.7561576366954, |
| "eval_loss": 0.4912045896053314, |
| "eval_runtime": 202.5387, |
| "eval_samples_per_second": 126.84, |
| "eval_steps_per_second": 7.929, |
| "eval_total": 25690, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.38067122436295836, |
| "grad_norm": 11.886155128479004, |
| "learning_rate": 9.961963952765694e-06, |
| "loss": 0.5038, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.3884400248601616, |
| "grad_norm": 15.60519027709961, |
| "learning_rate": 9.961187072715973e-06, |
| "loss": 0.5655, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.3884400248601616, |
| "eval_HasAns_exact": 73.46049046321527, |
| "eval_HasAns_f1": 79.95807844595133, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.46049046321527, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.95807844595133, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.46049046321527, |
| "eval_f1": 79.95807844595133, |
| "eval_loss": 0.48474493622779846, |
| "eval_runtime": 202.6752, |
| "eval_samples_per_second": 126.755, |
| "eval_steps_per_second": 7.924, |
| "eval_total": 25690, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.39620882535736485, |
| "grad_norm": 12.03936767578125, |
| "learning_rate": 9.960410192666252e-06, |
| "loss": 0.5519, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.40397762585456803, |
| "grad_norm": 9.260823249816895, |
| "learning_rate": 9.959633312616532e-06, |
| "loss": 0.5652, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.40397762585456803, |
| "eval_HasAns_exact": 73.33203581159984, |
| "eval_HasAns_f1": 79.82072091311194, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.33203581159984, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.82072091311194, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.33203581159984, |
| "eval_f1": 79.82072091311194, |
| "eval_loss": 0.47837215662002563, |
| "eval_runtime": 202.9924, |
| "eval_samples_per_second": 126.556, |
| "eval_steps_per_second": 7.912, |
| "eval_total": 25690, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.4117464263517713, |
| "grad_norm": 4.781002998352051, |
| "learning_rate": 9.958856432566812e-06, |
| "loss": 0.5185, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.4195152268489745, |
| "grad_norm": 12.118820190429688, |
| "learning_rate": 9.958079552517092e-06, |
| "loss": 0.5288, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.4195152268489745, |
| "eval_HasAns_exact": 73.48384585441806, |
| "eval_HasAns_f1": 79.92611407047283, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.48384585441806, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.92611407047283, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.48384585441806, |
| "eval_f1": 79.92611407047283, |
| "eval_loss": 0.48464033007621765, |
| "eval_runtime": 202.821, |
| "eval_samples_per_second": 126.663, |
| "eval_steps_per_second": 7.918, |
| "eval_total": 25690, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.42728402734617776, |
| "grad_norm": 10.118515968322754, |
| "learning_rate": 9.957302672467372e-06, |
| "loss": 0.5322, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.435052827843381, |
| "grad_norm": 14.269044876098633, |
| "learning_rate": 9.956525792417652e-06, |
| "loss": 0.539, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.435052827843381, |
| "eval_HasAns_exact": 73.07901907356948, |
| "eval_HasAns_f1": 79.5400837491081, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.07901907356948, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.5400837491081, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.07901907356948, |
| "eval_f1": 79.5400837491081, |
| "eval_loss": 0.47385331988334656, |
| "eval_runtime": 202.429, |
| "eval_samples_per_second": 126.909, |
| "eval_steps_per_second": 7.934, |
| "eval_total": 25690, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.4428216283405842, |
| "grad_norm": 5.349546432495117, |
| "learning_rate": 9.95574891236793e-06, |
| "loss": 0.5163, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.45059042883778744, |
| "grad_norm": 7.0492472648620605, |
| "learning_rate": 9.95497203231821e-06, |
| "loss": 0.5493, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.45059042883778744, |
| "eval_HasAns_exact": 73.3008952899961, |
| "eval_HasAns_f1": 79.69201771156926, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.3008952899961, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.69201771156926, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.3008952899961, |
| "eval_f1": 79.69201771156926, |
| "eval_loss": 0.46940287947654724, |
| "eval_runtime": 202.5768, |
| "eval_samples_per_second": 126.816, |
| "eval_steps_per_second": 7.928, |
| "eval_total": 25690, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.4583592293349907, |
| "grad_norm": 11.887402534484863, |
| "learning_rate": 9.95419515226849e-06, |
| "loss": 0.5449, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.4661280298321939, |
| "grad_norm": 13.259313583374023, |
| "learning_rate": 9.95341827221877e-06, |
| "loss": 0.4785, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.4661280298321939, |
| "eval_HasAns_exact": 73.94316854807317, |
| "eval_HasAns_f1": 80.19664333797313, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.94316854807317, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 80.19664333797313, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.94316854807317, |
| "eval_f1": 80.19664333797313, |
| "eval_loss": 0.4669385254383087, |
| "eval_runtime": 202.9252, |
| "eval_samples_per_second": 126.598, |
| "eval_steps_per_second": 7.914, |
| "eval_total": 25690, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.47389683032939717, |
| "grad_norm": 10.316826820373535, |
| "learning_rate": 9.95264139216905e-06, |
| "loss": 0.5423, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.48166563082660035, |
| "grad_norm": 12.742466926574707, |
| "learning_rate": 9.951864512119329e-06, |
| "loss": 0.4979, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.48166563082660035, |
| "eval_HasAns_exact": 73.51887894122227, |
| "eval_HasAns_f1": 79.8527980954033, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.51887894122227, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.8527980954033, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.51887894122227, |
| "eval_f1": 79.8527980954033, |
| "eval_loss": 0.4643915295600891, |
| "eval_runtime": 202.4105, |
| "eval_samples_per_second": 126.92, |
| "eval_steps_per_second": 7.934, |
| "eval_total": 25690, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.4894344313238036, |
| "grad_norm": 10.1032133102417, |
| "learning_rate": 9.951087632069609e-06, |
| "loss": 0.5123, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.49720323182100684, |
| "grad_norm": 11.031068801879883, |
| "learning_rate": 9.950310752019889e-06, |
| "loss": 0.4908, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.49720323182100684, |
| "eval_HasAns_exact": 73.5733748540288, |
| "eval_HasAns_f1": 79.90847878948638, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.5733748540288, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.90847878948638, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.5733748540288, |
| "eval_f1": 79.90847878948638, |
| "eval_loss": 0.4575786292552948, |
| "eval_runtime": 203.0942, |
| "eval_samples_per_second": 126.493, |
| "eval_steps_per_second": 7.908, |
| "eval_total": 25690, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.5049720323182101, |
| "grad_norm": 8.769746780395508, |
| "learning_rate": 9.949533871970169e-06, |
| "loss": 0.5333, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.5127408328154133, |
| "grad_norm": 5.714237689971924, |
| "learning_rate": 9.948756991920449e-06, |
| "loss": 0.4845, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.5127408328154133, |
| "eval_HasAns_exact": 73.81471389645776, |
| "eval_HasAns_f1": 80.1410176783366, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.81471389645776, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 80.1410176783366, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.81471389645776, |
| "eval_f1": 80.1410176783366, |
| "eval_loss": 0.4491117000579834, |
| "eval_runtime": 203.2042, |
| "eval_samples_per_second": 126.425, |
| "eval_steps_per_second": 7.903, |
| "eval_total": 25690, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.5205096333126166, |
| "grad_norm": 7.215436935424805, |
| "learning_rate": 9.947980111870727e-06, |
| "loss": 0.5222, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.5282784338098198, |
| "grad_norm": 5.259133338928223, |
| "learning_rate": 9.947203231821008e-06, |
| "loss": 0.5234, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.5282784338098198, |
| "eval_HasAns_exact": 73.60062281043207, |
| "eval_HasAns_f1": 79.98815996264248, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.60062281043207, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.98815996264248, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.60062281043207, |
| "eval_f1": 79.98815996264248, |
| "eval_loss": 0.44988834857940674, |
| "eval_runtime": 202.7517, |
| "eval_samples_per_second": 126.707, |
| "eval_steps_per_second": 7.921, |
| "eval_total": 25690, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.536047234307023, |
| "grad_norm": 17.840496063232422, |
| "learning_rate": 9.946426351771288e-06, |
| "loss": 0.5067, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.5438160348042262, |
| "grad_norm": 12.869794845581055, |
| "learning_rate": 9.945649471721567e-06, |
| "loss": 0.5345, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.5438160348042262, |
| "eval_HasAns_exact": 73.63954846243675, |
| "eval_HasAns_f1": 79.9724238557947, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.63954846243675, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.9724238557947, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.63954846243675, |
| "eval_f1": 79.9724238557947, |
| "eval_loss": 0.4415437877178192, |
| "eval_runtime": 202.8713, |
| "eval_samples_per_second": 126.632, |
| "eval_steps_per_second": 7.916, |
| "eval_total": 25690, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.5515848353014294, |
| "grad_norm": 12.861027717590332, |
| "learning_rate": 9.944872591671847e-06, |
| "loss": 0.5208, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.5593536357986327, |
| "grad_norm": 8.758796691894531, |
| "learning_rate": 9.944095711622127e-06, |
| "loss": 0.5153, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.5593536357986327, |
| "eval_HasAns_exact": 73.52277150642273, |
| "eval_HasAns_f1": 79.84709284781033, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.52277150642273, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.84709284781033, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.52277150642273, |
| "eval_f1": 79.84709284781033, |
| "eval_loss": 0.43877625465393066, |
| "eval_runtime": 202.3294, |
| "eval_samples_per_second": 126.971, |
| "eval_steps_per_second": 7.938, |
| "eval_total": 25690, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.5671224362958359, |
| "grad_norm": 12.555262565612793, |
| "learning_rate": 9.943318831572407e-06, |
| "loss": 0.5308, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.5748912367930391, |
| "grad_norm": 15.02426815032959, |
| "learning_rate": 9.942541951522687e-06, |
| "loss": 0.5113, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.5748912367930391, |
| "eval_HasAns_exact": 73.93927598287272, |
| "eval_HasAns_f1": 80.22597942158374, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.93927598287272, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 80.22597942158374, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.93927598287272, |
| "eval_f1": 80.22597942158374, |
| "eval_loss": 0.44514408707618713, |
| "eval_runtime": 202.5852, |
| "eval_samples_per_second": 126.811, |
| "eval_steps_per_second": 7.928, |
| "eval_total": 25690, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.5826600372902424, |
| "grad_norm": 9.149701118469238, |
| "learning_rate": 9.941765071472965e-06, |
| "loss": 0.5102, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.5904288377874456, |
| "grad_norm": 4.010859966278076, |
| "learning_rate": 9.940988191423245e-06, |
| "loss": 0.4752, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.5904288377874456, |
| "eval_HasAns_exact": 73.73297002724796, |
| "eval_HasAns_f1": 80.00469577758224, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.73297002724796, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 80.00469577758224, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.73297002724796, |
| "eval_f1": 80.00469577758224, |
| "eval_loss": 0.4426616132259369, |
| "eval_runtime": 202.7934, |
| "eval_samples_per_second": 126.681, |
| "eval_steps_per_second": 7.919, |
| "eval_total": 25690, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.5981976382846489, |
| "grad_norm": 4.767871379852295, |
| "learning_rate": 9.940211311373525e-06, |
| "loss": 0.5013, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.6059664387818521, |
| "grad_norm": 11.838374137878418, |
| "learning_rate": 9.939434431323805e-06, |
| "loss": 0.5161, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.6059664387818521, |
| "eval_HasAns_exact": 73.70182950564421, |
| "eval_HasAns_f1": 79.9274484107698, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.70182950564421, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.9274484107698, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.70182950564421, |
| "eval_f1": 79.9274484107698, |
| "eval_loss": 0.4381595253944397, |
| "eval_runtime": 202.3985, |
| "eval_samples_per_second": 126.928, |
| "eval_steps_per_second": 7.935, |
| "eval_total": 25690, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.6137352392790553, |
| "grad_norm": 9.235823631286621, |
| "learning_rate": 9.938657551274085e-06, |
| "loss": 0.4906, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.6215040397762586, |
| "grad_norm": 6.428686141967773, |
| "learning_rate": 9.937880671224363e-06, |
| "loss": 0.4734, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.6215040397762586, |
| "eval_HasAns_exact": 73.88867263526664, |
| "eval_HasAns_f1": 80.07731060014116, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.88867263526664, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 80.07731060014116, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.88867263526664, |
| "eval_f1": 80.07731060014116, |
| "eval_loss": 0.43801450729370117, |
| "eval_runtime": 202.7309, |
| "eval_samples_per_second": 126.72, |
| "eval_steps_per_second": 7.922, |
| "eval_total": 25690, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.6292728402734618, |
| "grad_norm": 6.1717352867126465, |
| "learning_rate": 9.937103791174643e-06, |
| "loss": 0.5139, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.637041640770665, |
| "grad_norm": 8.657469749450684, |
| "learning_rate": 9.936326911124923e-06, |
| "loss": 0.4852, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.637041640770665, |
| "eval_HasAns_exact": 74.00934215648113, |
| "eval_HasAns_f1": 80.17172940362448, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 74.00934215648113, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 80.17172940362448, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 74.00934215648113, |
| "eval_f1": 80.17172940362448, |
| "eval_loss": 0.4333701431751251, |
| "eval_runtime": 202.3246, |
| "eval_samples_per_second": 126.974, |
| "eval_steps_per_second": 7.938, |
| "eval_total": 25690, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.6448104412678682, |
| "grad_norm": 15.310400009155273, |
| "learning_rate": 9.935565568676197e-06, |
| "loss": 0.4829, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.6525792417650714, |
| "grad_norm": 8.805957794189453, |
| "learning_rate": 9.934788688626477e-06, |
| "loss": 0.5121, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.6525792417650714, |
| "eval_HasAns_exact": 73.94706111327365, |
| "eval_HasAns_f1": 80.13206637005086, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.94706111327365, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 80.13206637005086, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.94706111327365, |
| "eval_f1": 80.13206637005086, |
| "eval_loss": 0.42443910241127014, |
| "eval_runtime": 202.6593, |
| "eval_samples_per_second": 126.764, |
| "eval_steps_per_second": 7.925, |
| "eval_total": 25690, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.6603480422622747, |
| "grad_norm": 15.132555961608887, |
| "learning_rate": 9.934011808576757e-06, |
| "loss": 0.4709, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.6681168427594779, |
| "grad_norm": 9.101954460144043, |
| "learning_rate": 9.933234928527035e-06, |
| "loss": 0.4475, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.6681168427594779, |
| "eval_HasAns_exact": 73.97820163487738, |
| "eval_HasAns_f1": 80.1311355196045, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.97820163487738, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 80.1311355196045, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.97820163487738, |
| "eval_f1": 80.1311355196045, |
| "eval_loss": 0.4284706115722656, |
| "eval_runtime": 202.5904, |
| "eval_samples_per_second": 126.808, |
| "eval_steps_per_second": 7.927, |
| "eval_total": 25690, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.6758856432566812, |
| "grad_norm": 9.306312561035156, |
| "learning_rate": 9.932458048477315e-06, |
| "loss": 0.4783, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.6836544437538844, |
| "grad_norm": 11.343928337097168, |
| "learning_rate": 9.931681168427595e-06, |
| "loss": 0.4963, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.6836544437538844, |
| "eval_HasAns_exact": 74.00155702608019, |
| "eval_HasAns_f1": 80.11240181139512, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 74.00155702608019, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 80.11240181139512, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 74.00155702608019, |
| "eval_f1": 80.11240181139512, |
| "eval_loss": 0.41757330298423767, |
| "eval_runtime": 202.6748, |
| "eval_samples_per_second": 126.755, |
| "eval_steps_per_second": 7.924, |
| "eval_total": 25690, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.6914232442510876, |
| "grad_norm": 4.360355854034424, |
| "learning_rate": 9.930904288377875e-06, |
| "loss": 0.4787, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.6991920447482909, |
| "grad_norm": 11.314460754394531, |
| "learning_rate": 9.930127408328155e-06, |
| "loss": 0.478, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.6991920447482909, |
| "eval_HasAns_exact": 74.28571428571429, |
| "eval_HasAns_f1": 80.33741809886175, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 74.28571428571429, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 80.33741809886175, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 74.28571428571429, |
| "eval_f1": 80.33741809886175, |
| "eval_loss": 0.4186555743217468, |
| "eval_runtime": 202.8043, |
| "eval_samples_per_second": 126.674, |
| "eval_steps_per_second": 7.919, |
| "eval_total": 25690, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.7069608452454941, |
| "grad_norm": 6.806874752044678, |
| "learning_rate": 9.929350528278434e-06, |
| "loss": 0.4358, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.7147296457426974, |
| "grad_norm": 12.57098388671875, |
| "learning_rate": 9.928573648228714e-06, |
| "loss": 0.4634, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.7147296457426974, |
| "eval_HasAns_exact": 74.01712728688206, |
| "eval_HasAns_f1": 80.14050760913791, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 74.01712728688206, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 80.14050760913791, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 74.01712728688206, |
| "eval_f1": 80.14050760913791, |
| "eval_loss": 0.41584375500679016, |
| "eval_runtime": 202.6443, |
| "eval_samples_per_second": 126.774, |
| "eval_steps_per_second": 7.925, |
| "eval_total": 25690, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.7224984462399006, |
| "grad_norm": 16.962316513061523, |
| "learning_rate": 9.927796768178995e-06, |
| "loss": 0.4566, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.7302672467371037, |
| "grad_norm": 7.093566417694092, |
| "learning_rate": 9.927019888129273e-06, |
| "loss": 0.4562, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.7302672467371037, |
| "eval_HasAns_exact": 74.04437524328533, |
| "eval_HasAns_f1": 80.11735159092045, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 74.04437524328533, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 80.11735159092045, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 74.04437524328533, |
| "eval_f1": 80.11735159092045, |
| "eval_loss": 0.4158124327659607, |
| "eval_runtime": 202.4966, |
| "eval_samples_per_second": 126.866, |
| "eval_steps_per_second": 7.931, |
| "eval_total": 25690, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.738036047234307, |
| "grad_norm": 6.718151569366455, |
| "learning_rate": 9.926243008079553e-06, |
| "loss": 0.4779, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.7458048477315102, |
| "grad_norm": 14.11593246459961, |
| "learning_rate": 9.925466128029833e-06, |
| "loss": 0.509, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.7458048477315102, |
| "eval_HasAns_exact": 73.90813546126897, |
| "eval_HasAns_f1": 79.98627578181686, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.90813546126897, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.98627578181686, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.90813546126897, |
| "eval_f1": 79.98627578181686, |
| "eval_loss": 0.41596540808677673, |
| "eval_runtime": 202.581, |
| "eval_samples_per_second": 126.813, |
| "eval_steps_per_second": 7.928, |
| "eval_total": 25690, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.7535736482287135, |
| "grad_norm": 10.98654556274414, |
| "learning_rate": 9.924689247980113e-06, |
| "loss": 0.4164, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.7613424487259167, |
| "grad_norm": 9.946949005126953, |
| "learning_rate": 9.923912367930393e-06, |
| "loss": 0.5062, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.7613424487259167, |
| "eval_HasAns_exact": 74.01323472168158, |
| "eval_HasAns_f1": 80.12072572139839, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 74.01323472168158, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 80.12072572139839, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 74.01323472168158, |
| "eval_f1": 80.12072572139839, |
| "eval_loss": 0.4129224419593811, |
| "eval_runtime": 202.7016, |
| "eval_samples_per_second": 126.738, |
| "eval_steps_per_second": 7.923, |
| "eval_total": 25690, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.7691112492231199, |
| "grad_norm": 10.8538236618042, |
| "learning_rate": 9.923135487880672e-06, |
| "loss": 0.5012, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.7768800497203232, |
| "grad_norm": 8.31200122833252, |
| "learning_rate": 9.922358607830952e-06, |
| "loss": 0.4404, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.7768800497203232, |
| "eval_HasAns_exact": 74.33631763332036, |
| "eval_HasAns_f1": 80.46875241956705, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 74.33631763332036, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 80.46875241956705, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 74.33631763332036, |
| "eval_f1": 80.46875241956705, |
| "eval_loss": 0.41118866205215454, |
| "eval_runtime": 202.5674, |
| "eval_samples_per_second": 126.822, |
| "eval_steps_per_second": 7.928, |
| "eval_total": 25690, |
| "step": 5000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 643600, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 10, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.180748107776e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|