| { | |
| "best_metric": 0.8401998462720984, | |
| "best_model_checkpoint": "/mnt/hdd-nfs/jungsoo/DensePhrases/resources/ckpts/labeler_multi.train.v4.1_nq.dev.v4.1_rlmulti_title/checkpoint-198000", | |
| "epoch": 1.0, | |
| "global_step": 235538, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.532984062019718e-06, | |
| "loss": 0.4566, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.8049192928516525, | |
| "eval_loss": 0.4426476061344147, | |
| "eval_runtime": 31.0353, | |
| "eval_samples_per_second": 419.2, | |
| "eval_steps_per_second": 52.424, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.065968124039434e-06, | |
| "loss": 0.4093, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_accuracy": 0.8199846272098386, | |
| "eval_loss": 0.3974132835865021, | |
| "eval_runtime": 31.2125, | |
| "eval_samples_per_second": 416.821, | |
| "eval_steps_per_second": 52.127, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 8.598952186059151e-06, | |
| "loss": 0.387, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.8220599538816296, | |
| "eval_loss": 0.39724817872047424, | |
| "eval_runtime": 31.0035, | |
| "eval_samples_per_second": 419.63, | |
| "eval_steps_per_second": 52.478, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 8.131936248078867e-06, | |
| "loss": 0.3694, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.8254419677171406, | |
| "eval_loss": 0.4177829623222351, | |
| "eval_runtime": 31.0051, | |
| "eval_samples_per_second": 419.608, | |
| "eval_steps_per_second": 52.475, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 7.664920310098584e-06, | |
| "loss": 0.3528, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_accuracy": 0.828977709454266, | |
| "eval_loss": 0.38233569264411926, | |
| "eval_runtime": 31.0115, | |
| "eval_samples_per_second": 419.521, | |
| "eval_steps_per_second": 52.464, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 7.1979043721182995e-06, | |
| "loss": 0.3368, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.8327440430438124, | |
| "eval_loss": 0.38260024785995483, | |
| "eval_runtime": 31.0066, | |
| "eval_samples_per_second": 419.588, | |
| "eval_steps_per_second": 52.473, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 6.730888434138017e-06, | |
| "loss": 0.3236, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.8348962336664104, | |
| "eval_loss": 0.3918485939502716, | |
| "eval_runtime": 31.0424, | |
| "eval_samples_per_second": 419.104, | |
| "eval_steps_per_second": 52.412, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 6.263872496157733e-06, | |
| "loss": 0.3107, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.8374327440430438, | |
| "eval_loss": 0.40744495391845703, | |
| "eval_runtime": 30.981, | |
| "eval_samples_per_second": 419.935, | |
| "eval_steps_per_second": 52.516, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 5.79685655817745e-06, | |
| "loss": 0.299, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_accuracy": 0.8369715603382014, | |
| "eval_loss": 0.42519351840019226, | |
| "eval_runtime": 30.9991, | |
| "eval_samples_per_second": 419.69, | |
| "eval_steps_per_second": 52.485, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 5.329840620197166e-06, | |
| "loss": 0.2857, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.8330514988470408, | |
| "eval_loss": 0.432172954082489, | |
| "eval_runtime": 31.0082, | |
| "eval_samples_per_second": 419.567, | |
| "eval_steps_per_second": 52.47, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.862824682216883e-06, | |
| "loss": 0.2756, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 0.8378170637970792, | |
| "eval_loss": 0.41075146198272705, | |
| "eval_runtime": 30.9654, | |
| "eval_samples_per_second": 420.147, | |
| "eval_steps_per_second": 52.543, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.395808744236599e-06, | |
| "loss": 0.266, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.8359723289777095, | |
| "eval_loss": 0.4291342794895172, | |
| "eval_runtime": 31.031, | |
| "eval_samples_per_second": 419.258, | |
| "eval_steps_per_second": 52.431, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.9287928062563155e-06, | |
| "loss": 0.2557, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.8334358186010761, | |
| "eval_loss": 0.4698314964771271, | |
| "eval_runtime": 31.0504, | |
| "eval_samples_per_second": 418.997, | |
| "eval_steps_per_second": 52.399, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.4617768682760324e-06, | |
| "loss": 0.2478, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.8363566487317448, | |
| "eval_loss": 0.4574269652366638, | |
| "eval_runtime": 31.1319, | |
| "eval_samples_per_second": 417.9, | |
| "eval_steps_per_second": 52.262, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.994760930295749e-06, | |
| "loss": 0.2393, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 0.8398923904688701, | |
| "eval_loss": 0.4403812289237976, | |
| "eval_runtime": 31.055, | |
| "eval_samples_per_second": 418.934, | |
| "eval_steps_per_second": 52.391, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 2.5277449923154652e-06, | |
| "loss": 0.2321, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_accuracy": 0.8360491929285165, | |
| "eval_loss": 0.4628557562828064, | |
| "eval_runtime": 31.1062, | |
| "eval_samples_per_second": 418.245, | |
| "eval_steps_per_second": 52.305, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.0607290543351817e-06, | |
| "loss": 0.2246, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.8398923904688701, | |
| "eval_loss": 0.45440638065338135, | |
| "eval_runtime": 31.0474, | |
| "eval_samples_per_second": 419.036, | |
| "eval_steps_per_second": 52.404, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.5937131163548984e-06, | |
| "loss": 0.2194, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_accuracy": 0.8401998462720984, | |
| "eval_loss": 0.4905882179737091, | |
| "eval_runtime": 31.0565, | |
| "eval_samples_per_second": 418.915, | |
| "eval_steps_per_second": 52.388, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.1266971783746148e-06, | |
| "loss": 0.2125, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.8395080707148348, | |
| "eval_loss": 0.48444053530693054, | |
| "eval_runtime": 31.0361, | |
| "eval_samples_per_second": 419.189, | |
| "eval_steps_per_second": 52.423, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 6.596812403943314e-07, | |
| "loss": 0.2101, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.8388931591083781, | |
| "eval_loss": 0.49173465371131897, | |
| "eval_runtime": 31.025, | |
| "eval_samples_per_second": 419.34, | |
| "eval_steps_per_second": 52.442, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.9266530241404786e-07, | |
| "loss": 0.2052, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.8384319754035358, | |
| "eval_loss": 0.4989364445209503, | |
| "eval_runtime": 31.4895, | |
| "eval_samples_per_second": 413.153, | |
| "eval_steps_per_second": 51.668, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 235538, | |
| "total_flos": 2.758442251818959e+18, | |
| "train_loss": 0.2896772417569365, | |
| "train_runtime": 61879.5726, | |
| "train_samples_per_second": 121.804, | |
| "train_steps_per_second": 3.806 | |
| } | |
| ], | |
| "max_steps": 235538, | |
| "num_train_epochs": 1, | |
| "total_flos": 2.758442251818959e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |