| { | |
| "best_global_step": 30000, | |
| "best_metric": 0.8292354941368103, | |
| "best_model_checkpoint": "bert_base_code_uml/checkpoint-30000", | |
| "epoch": 25.0, | |
| "eval_steps": 10000, | |
| "global_step": 31850, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.3924646781789639, | |
| "grad_norm": 2.2605140209198, | |
| "learning_rate": 4.9900000000000005e-06, | |
| "loss": 8.1534, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7849293563579278, | |
| "grad_norm": 1.4277119636535645, | |
| "learning_rate": 9.990000000000001e-06, | |
| "loss": 5.4981, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.1773940345368916, | |
| "grad_norm": 1.1594833135604858, | |
| "learning_rate": 1.499e-05, | |
| "loss": 4.6668, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.5698587127158556, | |
| "grad_norm": 1.3609659671783447, | |
| "learning_rate": 1.999e-05, | |
| "loss": 4.4569, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.9623233908948194, | |
| "grad_norm": 1.4516750574111938, | |
| "learning_rate": 2.4990000000000003e-05, | |
| "loss": 4.3236, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.3547880690737832, | |
| "grad_norm": 1.307254672050476, | |
| "learning_rate": 2.9990000000000003e-05, | |
| "loss": 4.2234, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.7472527472527473, | |
| "grad_norm": 1.1777299642562866, | |
| "learning_rate": 3.499e-05, | |
| "loss": 4.1369, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.1397174254317113, | |
| "grad_norm": 1.277431607246399, | |
| "learning_rate": 3.999e-05, | |
| "loss": 4.0883, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.5321821036106753, | |
| "grad_norm": 1.136020302772522, | |
| "learning_rate": 4.499e-05, | |
| "loss": 4.0251, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.924646781789639, | |
| "grad_norm": 1.5430645942687988, | |
| "learning_rate": 4.999e-05, | |
| "loss": 3.7435, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.3171114599686025, | |
| "grad_norm": 1.1859745979309082, | |
| "learning_rate": 5.499000000000001e-05, | |
| "loss": 3.5562, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 4.7095761381475665, | |
| "grad_norm": 1.1602009534835815, | |
| "learning_rate": 5.999e-05, | |
| "loss": 3.4409, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.1020408163265305, | |
| "grad_norm": 1.5617371797561646, | |
| "learning_rate": 6.499000000000001e-05, | |
| "loss": 3.3426, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 5.4945054945054945, | |
| "grad_norm": 1.3554491996765137, | |
| "learning_rate": 6.999e-05, | |
| "loss": 3.2194, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 5.8869701726844585, | |
| "grad_norm": 2.1539087295532227, | |
| "learning_rate": 7.499e-05, | |
| "loss": 3.1264, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 6.279434850863423, | |
| "grad_norm": 1.4375736713409424, | |
| "learning_rate": 7.999000000000001e-05, | |
| "loss": 3.0421, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 6.671899529042387, | |
| "grad_norm": 1.8041514158248901, | |
| "learning_rate": 8.499e-05, | |
| "loss": 2.9334, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 7.06436420722135, | |
| "grad_norm": 2.089439868927002, | |
| "learning_rate": 8.999000000000001e-05, | |
| "loss": 2.8356, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 7.456828885400314, | |
| "grad_norm": 1.8236392736434937, | |
| "learning_rate": 9.499e-05, | |
| "loss": 2.6914, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 7.849293563579278, | |
| "grad_norm": 1.8073580265045166, | |
| "learning_rate": 9.999000000000001e-05, | |
| "loss": 2.4929, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 7.849293563579278, | |
| "eval_accuracy": 0.5692341405099076, | |
| "eval_loss": 2.151398181915283, | |
| "eval_runtime": 38.2798, | |
| "eval_samples_per_second": 160.685, | |
| "eval_steps_per_second": 1.698, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 8.241758241758241, | |
| "grad_norm": 1.849391222000122, | |
| "learning_rate": 9.77162471395881e-05, | |
| "loss": 2.1576, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 8.634222919937205, | |
| "grad_norm": 1.2290756702423096, | |
| "learning_rate": 9.542791762013731e-05, | |
| "loss": 1.7337, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 9.026687598116169, | |
| "grad_norm": 1.1669484376907349, | |
| "learning_rate": 9.313958810068651e-05, | |
| "loss": 1.4375, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 9.419152276295133, | |
| "grad_norm": 1.0519758462905884, | |
| "learning_rate": 9.08512585812357e-05, | |
| "loss": 1.3162, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 9.811616954474097, | |
| "grad_norm": 1.0862187147140503, | |
| "learning_rate": 8.85629290617849e-05, | |
| "loss": 1.2368, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 10.204081632653061, | |
| "grad_norm": 0.9377219676971436, | |
| "learning_rate": 8.62745995423341e-05, | |
| "loss": 1.1784, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 10.596546310832025, | |
| "grad_norm": 0.9312331676483154, | |
| "learning_rate": 8.398627002288329e-05, | |
| "loss": 1.1388, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 10.989010989010989, | |
| "grad_norm": 0.9040568470954895, | |
| "learning_rate": 8.16979405034325e-05, | |
| "loss": 1.1097, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 11.381475667189953, | |
| "grad_norm": 0.8583242297172546, | |
| "learning_rate": 7.94096109839817e-05, | |
| "loss": 1.0736, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 11.773940345368917, | |
| "grad_norm": 0.8321512937545776, | |
| "learning_rate": 7.712128146453089e-05, | |
| "loss": 1.0626, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 12.166405023547881, | |
| "grad_norm": 0.9143489003181458, | |
| "learning_rate": 7.48329519450801e-05, | |
| "loss": 1.0358, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 12.558869701726845, | |
| "grad_norm": 0.8196631669998169, | |
| "learning_rate": 7.25446224256293e-05, | |
| "loss": 1.0207, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 12.95133437990581, | |
| "grad_norm": 0.7631738781929016, | |
| "learning_rate": 7.025629290617849e-05, | |
| "loss": 1.004, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 13.343799058084773, | |
| "grad_norm": 0.8194634914398193, | |
| "learning_rate": 6.79679633867277e-05, | |
| "loss": 0.9921, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 13.736263736263737, | |
| "grad_norm": 0.7670016884803772, | |
| "learning_rate": 6.56796338672769e-05, | |
| "loss": 0.9779, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 14.1287284144427, | |
| "grad_norm": 0.7673987746238708, | |
| "learning_rate": 6.339130434782609e-05, | |
| "loss": 0.9608, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 14.521193092621663, | |
| "grad_norm": 0.7936846613883972, | |
| "learning_rate": 6.110297482837529e-05, | |
| "loss": 0.9558, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 14.913657770800627, | |
| "grad_norm": 0.7623568177223206, | |
| "learning_rate": 5.881464530892449e-05, | |
| "loss": 0.9505, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 15.306122448979592, | |
| "grad_norm": 0.7214558720588684, | |
| "learning_rate": 5.652631578947368e-05, | |
| "loss": 0.9402, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 15.698587127158556, | |
| "grad_norm": 0.827078640460968, | |
| "learning_rate": 5.423798627002289e-05, | |
| "loss": 0.9263, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 15.698587127158556, | |
| "eval_accuracy": 0.8142541052951258, | |
| "eval_loss": 0.9068173170089722, | |
| "eval_runtime": 31.5271, | |
| "eval_samples_per_second": 195.102, | |
| "eval_steps_per_second": 2.062, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 16.09105180533752, | |
| "grad_norm": 0.7756440043449402, | |
| "learning_rate": 5.1949656750572084e-05, | |
| "loss": 0.9185, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 16.483516483516482, | |
| "grad_norm": 0.7866923809051514, | |
| "learning_rate": 4.966132723112129e-05, | |
| "loss": 0.9115, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 16.875981161695446, | |
| "grad_norm": 0.7449353337287903, | |
| "learning_rate": 4.737299771167048e-05, | |
| "loss": 0.9021, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 17.26844583987441, | |
| "grad_norm": 0.7738542556762695, | |
| "learning_rate": 4.508466819221968e-05, | |
| "loss": 0.9021, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 17.660910518053374, | |
| "grad_norm": 0.7117587924003601, | |
| "learning_rate": 4.279633867276888e-05, | |
| "loss": 0.8932, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 18.053375196232338, | |
| "grad_norm": 0.6952142715454102, | |
| "learning_rate": 4.0508009153318077e-05, | |
| "loss": 0.8866, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 18.445839874411302, | |
| "grad_norm": 0.6748417615890503, | |
| "learning_rate": 3.821967963386728e-05, | |
| "loss": 0.8831, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 18.838304552590266, | |
| "grad_norm": 0.7013327479362488, | |
| "learning_rate": 3.593135011441648e-05, | |
| "loss": 0.8714, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 19.23076923076923, | |
| "grad_norm": 0.629546046257019, | |
| "learning_rate": 3.364302059496568e-05, | |
| "loss": 0.8684, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 19.623233908948194, | |
| "grad_norm": 0.6739959120750427, | |
| "learning_rate": 3.135469107551487e-05, | |
| "loss": 0.8664, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 20.015698587127158, | |
| "grad_norm": 0.6923867464065552, | |
| "learning_rate": 2.9066361556064075e-05, | |
| "loss": 0.8613, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 20.408163265306122, | |
| "grad_norm": 0.7043192386627197, | |
| "learning_rate": 2.677803203661327e-05, | |
| "loss": 0.8541, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 20.800627943485086, | |
| "grad_norm": 0.6633190512657166, | |
| "learning_rate": 2.448970251716247e-05, | |
| "loss": 0.8558, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 21.19309262166405, | |
| "grad_norm": 0.6382936239242554, | |
| "learning_rate": 2.2201372997711673e-05, | |
| "loss": 0.8486, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 21.585557299843014, | |
| "grad_norm": 0.7126407623291016, | |
| "learning_rate": 1.9913043478260872e-05, | |
| "loss": 0.8455, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 21.978021978021978, | |
| "grad_norm": 0.6809006929397583, | |
| "learning_rate": 1.7624713958810068e-05, | |
| "loss": 0.8382, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 22.370486656200942, | |
| "grad_norm": 0.6693772077560425, | |
| "learning_rate": 1.533638443935927e-05, | |
| "loss": 0.8377, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 22.762951334379906, | |
| "grad_norm": 0.7368608117103577, | |
| "learning_rate": 1.3048054919908468e-05, | |
| "loss": 0.8346, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 23.15541601255887, | |
| "grad_norm": 0.6541247963905334, | |
| "learning_rate": 1.0759725400457667e-05, | |
| "loss": 0.8298, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 23.547880690737834, | |
| "grad_norm": 0.6780161261558533, | |
| "learning_rate": 8.471395881006864e-06, | |
| "loss": 0.8293, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 23.547880690737834, | |
| "eval_accuracy": 0.8285928159953133, | |
| "eval_loss": 0.8292354941368103, | |
| "eval_runtime": 31.6198, | |
| "eval_samples_per_second": 194.53, | |
| "eval_steps_per_second": 2.056, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 23.940345368916798, | |
| "grad_norm": 0.6645314693450928, | |
| "learning_rate": 6.183066361556064e-06, | |
| "loss": 0.8306, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 24.332810047095762, | |
| "grad_norm": 0.7622667551040649, | |
| "learning_rate": 3.894736842105264e-06, | |
| "loss": 0.8225, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 24.725274725274726, | |
| "grad_norm": 0.6563706398010254, | |
| "learning_rate": 1.6064073226544622e-06, | |
| "loss": 0.8275, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "step": 31850, | |
| "total_flos": 8.0444602960128e+17, | |
| "train_loss": 1.908989332549426, | |
| "train_runtime": 23433.9018, | |
| "train_samples_per_second": 130.424, | |
| "train_steps_per_second": 1.359 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 31850, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 25, | |
| "save_steps": 10000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.0444602960128e+17, | |
| "train_batch_size": 96, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |