| { |
| "best_global_step": 700, |
| "best_metric": 79.0143805614001, |
| "best_model_checkpoint": "bert-soccer-qa/checkpoint-500", |
| "epoch": 0.15537600994406464, |
| "eval_steps": 100, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007768800497203232, |
| "grad_norm": 9.272425651550293, |
| "learning_rate": 9.99925419515227e-06, |
| "loss": 1.1446, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.015537600994406464, |
| "grad_norm": 12.237289428710938, |
| "learning_rate": 9.99847731510255e-06, |
| "loss": 0.9455, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.015537600994406464, |
| "eval_HasAns_exact": 69.85208252238225, |
| "eval_HasAns_f1": 77.13603651720207, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 69.85208252238225, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 77.13603651720207, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 69.85208252238225, |
| "eval_f1": 77.13603651720207, |
| "eval_loss": 0.8127343058586121, |
| "eval_runtime": 202.0338, |
| "eval_samples_per_second": 127.157, |
| "eval_steps_per_second": 7.949, |
| "eval_total": 25690, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.023306401491609695, |
| "grad_norm": 5.626998424530029, |
| "learning_rate": 9.997700435052828e-06, |
| "loss": 0.8129, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.031075201988812928, |
| "grad_norm": 6.87526798248291, |
| "learning_rate": 9.996923555003108e-06, |
| "loss": 0.8743, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.031075201988812928, |
| "eval_HasAns_exact": 70.21409108602569, |
| "eval_HasAns_f1": 77.38207884007946, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 70.21409108602569, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 77.38207884007946, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 70.21409108602569, |
| "eval_f1": 77.38207884007946, |
| "eval_loss": 0.7383215427398682, |
| "eval_runtime": 202.5157, |
| "eval_samples_per_second": 126.854, |
| "eval_steps_per_second": 7.93, |
| "eval_total": 25690, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.03884400248601616, |
| "grad_norm": 11.461019515991211, |
| "learning_rate": 9.996146674953388e-06, |
| "loss": 0.7825, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.04661280298321939, |
| "grad_norm": 9.630631446838379, |
| "learning_rate": 9.995369794903668e-06, |
| "loss": 0.7189, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.04661280298321939, |
| "eval_HasAns_exact": 71.03931490852472, |
| "eval_HasAns_f1": 78.08897109574, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 71.03931490852472, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.08897109574, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 71.03931490852472, |
| "eval_f1": 78.08897109574, |
| "eval_loss": 0.7194859981536865, |
| "eval_runtime": 202.3679, |
| "eval_samples_per_second": 126.947, |
| "eval_steps_per_second": 7.936, |
| "eval_total": 25690, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.054381603480422626, |
| "grad_norm": 11.377346992492676, |
| "learning_rate": 9.994592914853948e-06, |
| "loss": 0.7889, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.062150403977625855, |
| "grad_norm": 7.450821399688721, |
| "learning_rate": 9.993816034804228e-06, |
| "loss": 0.7367, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.062150403977625855, |
| "eval_HasAns_exact": 71.08991825613079, |
| "eval_HasAns_f1": 78.00619214091435, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 71.08991825613079, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.00619214091435, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 71.08991825613079, |
| "eval_f1": 78.00619214091435, |
| "eval_loss": 0.683600902557373, |
| "eval_runtime": 202.4728, |
| "eval_samples_per_second": 126.881, |
| "eval_steps_per_second": 7.932, |
| "eval_total": 25690, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.06991920447482909, |
| "grad_norm": 14.92029857635498, |
| "learning_rate": 9.993039154754508e-06, |
| "loss": 0.7838, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.07768800497203232, |
| "grad_norm": 9.030844688415527, |
| "learning_rate": 9.992262274704786e-06, |
| "loss": 0.6469, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.07768800497203232, |
| "eval_HasAns_exact": 71.42467886337096, |
| "eval_HasAns_f1": 78.23334576784337, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 71.42467886337096, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.23334576784337, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 71.42467886337096, |
| "eval_f1": 78.23334576784337, |
| "eval_loss": 0.6646179556846619, |
| "eval_runtime": 202.6333, |
| "eval_samples_per_second": 126.781, |
| "eval_steps_per_second": 7.926, |
| "eval_total": 25690, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.08545680546923555, |
| "grad_norm": 10.103166580200195, |
| "learning_rate": 9.991485394655066e-06, |
| "loss": 0.725, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.09322560596643878, |
| "grad_norm": 6.986274242401123, |
| "learning_rate": 9.990708514605346e-06, |
| "loss": 0.6657, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.09322560596643878, |
| "eval_HasAns_exact": 70.2841572596341, |
| "eval_HasAns_f1": 77.1247889416155, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 70.2841572596341, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 77.1247889416155, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 70.2841572596341, |
| "eval_f1": 77.1247889416155, |
| "eval_loss": 0.6492825150489807, |
| "eval_runtime": 203.3854, |
| "eval_samples_per_second": 126.312, |
| "eval_steps_per_second": 7.896, |
| "eval_total": 25690, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.10099440646364201, |
| "grad_norm": 10.636602401733398, |
| "learning_rate": 9.989931634555626e-06, |
| "loss": 0.7337, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.10876320696084525, |
| "grad_norm": 8.252824783325195, |
| "learning_rate": 9.989154754505906e-06, |
| "loss": 0.662, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.10876320696084525, |
| "eval_HasAns_exact": 72.22654729466718, |
| "eval_HasAns_f1": 79.0143805614001, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.22654729466718, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 79.0143805614001, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.22654729466718, |
| "eval_f1": 79.0143805614001, |
| "eval_loss": 0.6340453028678894, |
| "eval_runtime": 202.8216, |
| "eval_samples_per_second": 126.663, |
| "eval_steps_per_second": 7.918, |
| "eval_total": 25690, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.11653200745804848, |
| "grad_norm": 8.463785171508789, |
| "learning_rate": 9.988377874456184e-06, |
| "loss": 0.7265, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.12430080795525171, |
| "grad_norm": 9.748174667358398, |
| "learning_rate": 9.987600994406464e-06, |
| "loss": 0.6969, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.12430080795525171, |
| "eval_HasAns_exact": 72.1292331646555, |
| "eval_HasAns_f1": 78.83892105701732, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.1292331646555, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.83892105701732, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.1292331646555, |
| "eval_f1": 78.83892105701732, |
| "eval_loss": 0.6085864901542664, |
| "eval_runtime": 202.3194, |
| "eval_samples_per_second": 126.977, |
| "eval_steps_per_second": 7.938, |
| "eval_total": 25690, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.13206960845245494, |
| "grad_norm": 9.786517143249512, |
| "learning_rate": 9.986824114356744e-06, |
| "loss": 0.7111, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.13983840894965818, |
| "grad_norm": 8.391840934753418, |
| "learning_rate": 9.986047234307024e-06, |
| "loss": 0.669, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.13983840894965818, |
| "eval_HasAns_exact": 71.89567925262749, |
| "eval_HasAns_f1": 78.61371443349637, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 71.89567925262749, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.61371443349637, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 71.89567925262749, |
| "eval_f1": 78.61371443349637, |
| "eval_loss": 0.5937665104866028, |
| "eval_runtime": 202.8476, |
| "eval_samples_per_second": 126.647, |
| "eval_steps_per_second": 7.917, |
| "eval_total": 25690, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.1476072094468614, |
| "grad_norm": 11.602773666381836, |
| "learning_rate": 9.985270354257304e-06, |
| "loss": 0.7253, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.15537600994406464, |
| "grad_norm": 9.150772094726562, |
| "learning_rate": 9.984493474207582e-06, |
| "loss": 0.6676, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.15537600994406464, |
| "eval_HasAns_exact": 72.28493577267419, |
| "eval_HasAns_f1": 78.8876551339714, |
| "eval_HasAns_total": 25690, |
| "eval_best_exact": 72.28493577267419, |
| "eval_best_exact_thresh": 0.0, |
| "eval_best_f1": 78.8876551339714, |
| "eval_best_f1_thresh": 0.0, |
| "eval_exact": 72.28493577267419, |
| "eval_f1": 78.8876551339714, |
| "eval_loss": 0.5816648602485657, |
| "eval_runtime": 202.8184, |
| "eval_samples_per_second": 126.665, |
| "eval_steps_per_second": 7.918, |
| "eval_total": 25690, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 643600, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 10, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 3 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8361496215552000.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|