| { |
| "best_global_step": 2500, |
| "best_metric": 79.95807844595133, |
| "best_model_checkpoint": "bert-soccer-qa/checkpoint-2500", |
| "epoch": 0.3884400248601616, |
| "eval_steps": 100, |
| "global_step": 2500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007768800497203232, |
| "grad_norm": 9.272425651550293, |
| "learning_rate": 9.99925419515227e-06, |
| "loss": 1.1446, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.015537600994406464, |
| "grad_norm": 12.237289428710938, |
| "learning_rate": 9.99847731510255e-06, |
| "loss": 0.9455, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.015537600994406464, |
| "eval_HasAns_exact": 69.85208252238225, |
| "eval_HasAns_f1": 77.13603651720207, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 69.85208252238225, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 77.13603651720207, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 69.85208252238225, |
| "eval_f1": 77.13603651720207, |
| "eval_loss": 0.8127343058586121, |
| "eval_runtime": 202.0338, |
| "eval_samples_per_second": 127.157, |
| "eval_steps_per_second": 7.949, |
| "eval_total": 25690, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.023306401491609695, |
| "grad_norm": 5.626998424530029, |
| "learning_rate": 9.997700435052828e-06, |
| "loss": 0.8129, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.031075201988812928, |
| "grad_norm": 6.87526798248291, |
| "learning_rate": 9.996923555003108e-06, |
| "loss": 0.8743, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.031075201988812928, |
| "eval_HasAns_exact": 70.21409108602569, |
| "eval_HasAns_f1": 77.38207884007946, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 70.21409108602569, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 77.38207884007946, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 70.21409108602569, |
| "eval_f1": 77.38207884007946, |
| "eval_loss": 0.7383215427398682, |
| "eval_runtime": 202.5157, |
| "eval_samples_per_second": 126.854, |
| "eval_steps_per_second": 7.93, |
| "eval_total": 25690, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.03884400248601616, |
| "grad_norm": 11.461019515991211, |
| "learning_rate": 9.996146674953388e-06, |
| "loss": 0.7825, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.04661280298321939, |
| "grad_norm": 9.630631446838379, |
| "learning_rate": 9.995369794903668e-06, |
| "loss": 0.7189, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.04661280298321939, |
| "eval_HasAns_exact": 71.03931490852472, |
| "eval_HasAns_f1": 78.08897109574, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 71.03931490852472, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.08897109574, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 71.03931490852472, |
| "eval_f1": 78.08897109574, |
| "eval_loss": 0.7194859981536865, |
| "eval_runtime": 202.3679, |
| "eval_samples_per_second": 126.947, |
| "eval_steps_per_second": 7.936, |
| "eval_total": 25690, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.054381603480422626, |
| "grad_norm": 11.377346992492676, |
| "learning_rate": 9.994592914853948e-06, |
| "loss": 0.7889, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.062150403977625855, |
| "grad_norm": 7.450821399688721, |
| "learning_rate": 9.993816034804228e-06, |
| "loss": 0.7367, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.062150403977625855, |
| "eval_HasAns_exact": 71.08991825613079, |
| "eval_HasAns_f1": 78.00619214091435, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 71.08991825613079, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.00619214091435, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 71.08991825613079, |
| "eval_f1": 78.00619214091435, |
| "eval_loss": 0.683600902557373, |
| "eval_runtime": 202.4728, |
| "eval_samples_per_second": 126.881, |
| "eval_steps_per_second": 7.932, |
| "eval_total": 25690, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.06991920447482909, |
| "grad_norm": 14.92029857635498, |
| "learning_rate": 9.993039154754508e-06, |
| "loss": 0.7838, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.07768800497203232, |
| "grad_norm": 9.030844688415527, |
| "learning_rate": 9.992262274704786e-06, |
| "loss": 0.6469, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.07768800497203232, |
| "eval_HasAns_exact": 71.42467886337096, |
| "eval_HasAns_f1": 78.23334576784337, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 71.42467886337096, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.23334576784337, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 71.42467886337096, |
| "eval_f1": 78.23334576784337, |
| "eval_loss": 0.6646179556846619, |
| "eval_runtime": 202.6333, |
| "eval_samples_per_second": 126.781, |
| "eval_steps_per_second": 7.926, |
| "eval_total": 25690, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.08545680546923555, |
| "grad_norm": 10.103166580200195, |
| "learning_rate": 9.991485394655066e-06, |
| "loss": 0.725, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.09322560596643878, |
| "grad_norm": 6.986274242401123, |
| "learning_rate": 9.990708514605346e-06, |
| "loss": 0.6657, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.09322560596643878, |
| "eval_HasAns_exact": 70.2841572596341, |
| "eval_HasAns_f1": 77.1247889416155, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 70.2841572596341, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 77.1247889416155, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 70.2841572596341, |
| "eval_f1": 77.1247889416155, |
| "eval_loss": 0.6492825150489807, |
| "eval_runtime": 203.3854, |
| "eval_samples_per_second": 126.312, |
| "eval_steps_per_second": 7.896, |
| "eval_total": 25690, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.10099440646364201, |
| "grad_norm": 10.636602401733398, |
| "learning_rate": 9.989931634555626e-06, |
| "loss": 0.7337, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.10876320696084525, |
| "grad_norm": 8.252824783325195, |
| "learning_rate": 9.989154754505906e-06, |
| "loss": 0.662, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.10876320696084525, |
| "eval_HasAns_exact": 72.22654729466718, |
| "eval_HasAns_f1": 79.0143805614001, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.22654729466718, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.0143805614001, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.22654729466718, |
| "eval_f1": 79.0143805614001, |
| "eval_loss": 0.6340453028678894, |
| "eval_runtime": 202.8216, |
| "eval_samples_per_second": 126.663, |
| "eval_steps_per_second": 7.918, |
| "eval_total": 25690, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.11653200745804848, |
| "grad_norm": 8.463785171508789, |
| "learning_rate": 9.988377874456184e-06, |
| "loss": 0.7265, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.12430080795525171, |
| "grad_norm": 9.748174667358398, |
| "learning_rate": 9.987600994406464e-06, |
| "loss": 0.6969, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.12430080795525171, |
| "eval_HasAns_exact": 72.1292331646555, |
| "eval_HasAns_f1": 78.83892105701732, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.1292331646555, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.83892105701732, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.1292331646555, |
| "eval_f1": 78.83892105701732, |
| "eval_loss": 0.6085864901542664, |
| "eval_runtime": 202.3194, |
| "eval_samples_per_second": 126.977, |
| "eval_steps_per_second": 7.938, |
| "eval_total": 25690, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.13206960845245494, |
| "grad_norm": 9.786517143249512, |
| "learning_rate": 9.986824114356744e-06, |
| "loss": 0.7111, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.13983840894965818, |
| "grad_norm": 8.391840934753418, |
| "learning_rate": 9.986047234307024e-06, |
| "loss": 0.669, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.13983840894965818, |
| "eval_HasAns_exact": 71.89567925262749, |
| "eval_HasAns_f1": 78.61371443349637, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 71.89567925262749, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.61371443349637, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 71.89567925262749, |
| "eval_f1": 78.61371443349637, |
| "eval_loss": 0.5937665104866028, |
| "eval_runtime": 202.8476, |
| "eval_samples_per_second": 126.647, |
| "eval_steps_per_second": 7.917, |
| "eval_total": 25690, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.1476072094468614, |
| "grad_norm": 11.602773666381836, |
| "learning_rate": 9.985270354257304e-06, |
| "loss": 0.7253, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.15537600994406464, |
| "grad_norm": 9.150772094726562, |
| "learning_rate": 9.984493474207582e-06, |
| "loss": 0.6676, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.15537600994406464, |
| "eval_HasAns_exact": 72.28493577267419, |
| "eval_HasAns_f1": 78.8876551339714, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.28493577267419, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.8876551339714, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.28493577267419, |
| "eval_f1": 78.8876551339714, |
| "eval_loss": 0.5816648602485657, |
| "eval_runtime": 202.8184, |
| "eval_samples_per_second": 126.665, |
| "eval_steps_per_second": 7.918, |
| "eval_total": 25690, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.16314481044126786, |
| "grad_norm": 10.605375289916992, |
| "learning_rate": 9.983716594157864e-06, |
| "loss": 0.7131, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.1709136109384711, |
| "grad_norm": 13.075970649719238, |
| "learning_rate": 9.982939714108144e-06, |
| "loss": 0.6664, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.1709136109384711, |
| "eval_HasAns_exact": 71.95406773063449, |
| "eval_HasAns_f1": 78.6814885378308, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 71.95406773063449, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.6814885378308, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 71.95406773063449, |
| "eval_f1": 78.6814885378308, |
| "eval_loss": 0.5695982575416565, |
| "eval_runtime": 202.5931, |
| "eval_samples_per_second": 126.806, |
| "eval_steps_per_second": 7.927, |
| "eval_total": 25690, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.17868241143567434, |
| "grad_norm": 12.333536148071289, |
| "learning_rate": 9.982162834058422e-06, |
| "loss": 0.6016, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.18645121193287756, |
| "grad_norm": 9.748809814453125, |
| "learning_rate": 9.981385954008702e-06, |
| "loss": 0.6006, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.18645121193287756, |
| "eval_HasAns_exact": 72.06305955624757, |
| "eval_HasAns_f1": 78.75337714295185, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.06305955624757, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.75337714295185, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.06305955624757, |
| "eval_f1": 78.75337714295185, |
| "eval_loss": 0.5660755038261414, |
| "eval_runtime": 202.8834, |
| "eval_samples_per_second": 126.624, |
| "eval_steps_per_second": 7.916, |
| "eval_total": 25690, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.1942200124300808, |
| "grad_norm": 9.964409828186035, |
| "learning_rate": 9.980609073958982e-06, |
| "loss": 0.6268, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.20198881292728402, |
| "grad_norm": 6.387030124664307, |
| "learning_rate": 9.979832193909262e-06, |
| "loss": 0.6111, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.20198881292728402, |
| "eval_HasAns_exact": 72.65862203191904, |
| "eval_HasAns_f1": 79.23512118883761, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.65862203191904, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.23512118883761, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.65862203191904, |
| "eval_f1": 79.23512118883761, |
| "eval_loss": 0.5586764216423035, |
| "eval_runtime": 203.0862, |
| "eval_samples_per_second": 126.498, |
| "eval_steps_per_second": 7.908, |
| "eval_total": 25690, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.20975761342448726, |
| "grad_norm": 9.103731155395508, |
| "learning_rate": 9.979055313859542e-06, |
| "loss": 0.566, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.2175264139216905, |
| "grad_norm": 13.135197639465332, |
| "learning_rate": 9.97827843380982e-06, |
| "loss": 0.5793, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.2175264139216905, |
| "eval_HasAns_exact": 72.39392759828728, |
| "eval_HasAns_f1": 79.00515970901382, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.39392759828728, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.00515970901382, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.39392759828728, |
| "eval_f1": 79.00515970901382, |
| "eval_loss": 0.5600055456161499, |
| "eval_runtime": 202.5804, |
| "eval_samples_per_second": 126.814, |
| "eval_steps_per_second": 7.928, |
| "eval_total": 25690, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.22529521441889372, |
| "grad_norm": 9.040102005004883, |
| "learning_rate": 9.9775015537601e-06, |
| "loss": 0.6309, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.23306401491609696, |
| "grad_norm": 11.526878356933594, |
| "learning_rate": 9.97672467371038e-06, |
| "loss": 0.6064, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.23306401491609696, |
| "eval_HasAns_exact": 72.74425846632931, |
| "eval_HasAns_f1": 79.36155045560372, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.74425846632931, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.36155045560372, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.74425846632931, |
| "eval_f1": 79.36155045560372, |
| "eval_loss": 0.5500572323799133, |
| "eval_runtime": 202.9048, |
| "eval_samples_per_second": 126.611, |
| "eval_steps_per_second": 7.915, |
| "eval_total": 25690, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.24083281541330018, |
| "grad_norm": 4.359344482421875, |
| "learning_rate": 9.97594779366066e-06, |
| "loss": 0.6823, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.24860161591050342, |
| "grad_norm": 9.813569068908691, |
| "learning_rate": 9.97517091361094e-06, |
| "loss": 0.6314, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.24860161591050342, |
| "eval_HasAns_exact": 72.27715064227326, |
| "eval_HasAns_f1": 78.8175772466993, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.27715064227326, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.8175772466993, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.27715064227326, |
| "eval_f1": 78.8175772466993, |
| "eval_loss": 0.5353918075561523, |
| "eval_runtime": 202.6516, |
| "eval_samples_per_second": 126.769, |
| "eval_steps_per_second": 7.925, |
| "eval_total": 25690, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.25637041640770664, |
| "grad_norm": 7.802361965179443, |
| "learning_rate": 9.974394033561218e-06, |
| "loss": 0.6026, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.2641392169049099, |
| "grad_norm": 15.137731552124023, |
| "learning_rate": 9.973617153511498e-06, |
| "loss": 0.6741, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.2641392169049099, |
| "eval_HasAns_exact": 72.1058777734527, |
| "eval_HasAns_f1": 78.67773271200794, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.1058777734527, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.67773271200794, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.1058777734527, |
| "eval_f1": 78.67773271200794, |
| "eval_loss": 0.5329614877700806, |
| "eval_runtime": 202.7192, |
| "eval_samples_per_second": 126.727, |
| "eval_steps_per_second": 7.922, |
| "eval_total": 25690, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.2719080174021131, |
| "grad_norm": 11.30902099609375, |
| "learning_rate": 9.972840273461778e-06, |
| "loss": 0.6069, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.27967681789931637, |
| "grad_norm": 9.311911582946777, |
| "learning_rate": 9.972063393412058e-06, |
| "loss": 0.5912, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.27967681789931637, |
| "eval_HasAns_exact": 72.24990268586998, |
| "eval_HasAns_f1": 78.7891482246884, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.24990268586998, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.7891482246884, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.24990268586998, |
| "eval_f1": 78.7891482246884, |
| "eval_loss": 0.5291240811347961, |
| "eval_runtime": 202.8091, |
| "eval_samples_per_second": 126.671, |
| "eval_steps_per_second": 7.919, |
| "eval_total": 25690, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.28744561839651955, |
| "grad_norm": 9.725021362304688, |
| "learning_rate": 9.971286513362338e-06, |
| "loss": 0.5943, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.2952144188937228, |
| "grad_norm": 8.321113586425781, |
| "learning_rate": 9.970509633312618e-06, |
| "loss": 0.584, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.2952144188937228, |
| "eval_HasAns_exact": 72.56909303230829, |
| "eval_HasAns_f1": 79.12955371544794, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.56909303230829, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.12955371544794, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.56909303230829, |
| "eval_f1": 79.12955371544794, |
| "eval_loss": 0.5198299288749695, |
| "eval_runtime": 202.5651, |
| "eval_samples_per_second": 126.823, |
| "eval_steps_per_second": 7.928, |
| "eval_total": 25690, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.30298321939092604, |
| "grad_norm": 11.973769187927246, |
| "learning_rate": 9.969732753262898e-06, |
| "loss": 0.5895, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.3107520198881293, |
| "grad_norm": 9.40645980834961, |
| "learning_rate": 9.968955873213176e-06, |
| "loss": 0.64, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3107520198881293, |
| "eval_HasAns_exact": 72.79096924873491, |
| "eval_HasAns_f1": 79.28733698122942, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.79096924873491, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.28733698122942, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.79096924873491, |
| "eval_f1": 79.28733698122942, |
| "eval_loss": 0.5117060542106628, |
| "eval_runtime": 203.1079, |
| "eval_samples_per_second": 126.485, |
| "eval_steps_per_second": 7.907, |
| "eval_total": 25690, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3185208203853325, |
| "grad_norm": 14.14991569519043, |
| "learning_rate": 9.968178993163456e-06, |
| "loss": 0.5559, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.3262896208825357, |
| "grad_norm": 10.641693115234375, |
| "learning_rate": 9.967402113113736e-06, |
| "loss": 0.5361, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.3262896208825357, |
| "eval_HasAns_exact": 73.13351498637603, |
| "eval_HasAns_f1": 79.60268173326928, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.13351498637603, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.60268173326928, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.13351498637603, |
| "eval_f1": 79.60268173326928, |
| "eval_loss": 0.5079160928726196, |
| "eval_runtime": 202.7995, |
| "eval_samples_per_second": 126.677, |
| "eval_steps_per_second": 7.919, |
| "eval_total": 25690, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.33405842137973896, |
| "grad_norm": 10.088223457336426, |
| "learning_rate": 9.966625233064016e-06, |
| "loss": 0.6151, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.3418272218769422, |
| "grad_norm": 10.863611221313477, |
| "learning_rate": 9.965848353014296e-06, |
| "loss": 0.5935, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.3418272218769422, |
| "eval_HasAns_exact": 72.9349941611522, |
| "eval_HasAns_f1": 79.49240914807372, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.9349941611522, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.49240914807372, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.9349941611522, |
| "eval_f1": 79.49240914807372, |
| "eval_loss": 0.5025383234024048, |
| "eval_runtime": 203.3956, |
| "eval_samples_per_second": 126.306, |
| "eval_steps_per_second": 7.896, |
| "eval_total": 25690, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.34959602237414544, |
| "grad_norm": 12.565064430236816, |
| "learning_rate": 9.965071472964574e-06, |
| "loss": 0.5667, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.3573648228713487, |
| "grad_norm": 5.066905975341797, |
| "learning_rate": 9.964294592914854e-06, |
| "loss": 0.5198, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.3573648228713487, |
| "eval_HasAns_exact": 72.6975476839237, |
| "eval_HasAns_f1": 79.25327470362464, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.6975476839237, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.25327470362464, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.6975476839237, |
| "eval_f1": 79.25327470362464, |
| "eval_loss": 0.4996239244937897, |
| "eval_runtime": 204.0149, |
| "eval_samples_per_second": 125.922, |
| "eval_steps_per_second": 7.872, |
| "eval_total": 25690, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.3651336233685519, |
| "grad_norm": 10.777655601501465, |
| "learning_rate": 9.963517712865134e-06, |
| "loss": 0.4983, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.3729024238657551, |
| "grad_norm": 6.770049095153809, |
| "learning_rate": 9.962740832815414e-06, |
| "loss": 0.5474, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.3729024238657551, |
| "eval_HasAns_exact": 73.29700272479565, |
| "eval_HasAns_f1": 79.7561576366954, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.29700272479565, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.7561576366954, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.29700272479565, |
| "eval_f1": 79.7561576366954, |
| "eval_loss": 0.4912045896053314, |
| "eval_runtime": 202.5387, |
| "eval_samples_per_second": 126.84, |
| "eval_steps_per_second": 7.929, |
| "eval_total": 25690, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.38067122436295836, |
| "grad_norm": 11.886155128479004, |
| "learning_rate": 9.961963952765694e-06, |
| "loss": 0.5038, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.3884400248601616, |
| "grad_norm": 15.60519027709961, |
| "learning_rate": 9.961187072715973e-06, |
| "loss": 0.5655, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.3884400248601616, |
| "eval_HasAns_exact": 73.46049046321527, |
| "eval_HasAns_f1": 79.95807844595133, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 73.46049046321527, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.95807844595133, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 73.46049046321527, |
| "eval_f1": 79.95807844595133, |
| "eval_loss": 0.48474493622779846, |
| "eval_runtime": 202.6752, |
| "eval_samples_per_second": 126.755, |
| "eval_steps_per_second": 7.924, |
| "eval_total": 25690, |
| "step": 2500 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 643600, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 10, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.090374053888e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|