{ "best_global_step": 30000, "best_metric": 0.8292354941368103, "best_model_checkpoint": "bert_base_code_uml/checkpoint-30000", "epoch": 25.0, "eval_steps": 10000, "global_step": 31850, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3924646781789639, "grad_norm": 2.2605140209198, "learning_rate": 4.9900000000000005e-06, "loss": 8.1534, "step": 500 }, { "epoch": 0.7849293563579278, "grad_norm": 1.4277119636535645, "learning_rate": 9.990000000000001e-06, "loss": 5.4981, "step": 1000 }, { "epoch": 1.1773940345368916, "grad_norm": 1.1594833135604858, "learning_rate": 1.499e-05, "loss": 4.6668, "step": 1500 }, { "epoch": 1.5698587127158556, "grad_norm": 1.3609659671783447, "learning_rate": 1.999e-05, "loss": 4.4569, "step": 2000 }, { "epoch": 1.9623233908948194, "grad_norm": 1.4516750574111938, "learning_rate": 2.4990000000000003e-05, "loss": 4.3236, "step": 2500 }, { "epoch": 2.3547880690737832, "grad_norm": 1.307254672050476, "learning_rate": 2.9990000000000003e-05, "loss": 4.2234, "step": 3000 }, { "epoch": 2.7472527472527473, "grad_norm": 1.1777299642562866, "learning_rate": 3.499e-05, "loss": 4.1369, "step": 3500 }, { "epoch": 3.1397174254317113, "grad_norm": 1.277431607246399, "learning_rate": 3.999e-05, "loss": 4.0883, "step": 4000 }, { "epoch": 3.5321821036106753, "grad_norm": 1.136020302772522, "learning_rate": 4.499e-05, "loss": 4.0251, "step": 4500 }, { "epoch": 3.924646781789639, "grad_norm": 1.5430645942687988, "learning_rate": 4.999e-05, "loss": 3.7435, "step": 5000 }, { "epoch": 4.3171114599686025, "grad_norm": 1.1859745979309082, "learning_rate": 5.499000000000001e-05, "loss": 3.5562, "step": 5500 }, { "epoch": 4.7095761381475665, "grad_norm": 1.1602009534835815, "learning_rate": 5.999e-05, "loss": 3.4409, "step": 6000 }, { "epoch": 5.1020408163265305, "grad_norm": 1.5617371797561646, "learning_rate": 6.499000000000001e-05, "loss": 3.3426, "step": 6500 }, { "epoch": 5.4945054945054945, "grad_norm": 1.3554491996765137, "learning_rate": 6.999e-05, "loss": 3.2194, "step": 7000 }, { "epoch": 5.8869701726844585, "grad_norm": 2.1539087295532227, "learning_rate": 7.499e-05, "loss": 3.1264, "step": 7500 }, { "epoch": 6.279434850863423, "grad_norm": 1.4375736713409424, "learning_rate": 7.999000000000001e-05, "loss": 3.0421, "step": 8000 }, { "epoch": 6.671899529042387, "grad_norm": 1.8041514158248901, "learning_rate": 8.499e-05, "loss": 2.9334, "step": 8500 }, { "epoch": 7.06436420722135, "grad_norm": 2.089439868927002, "learning_rate": 8.999000000000001e-05, "loss": 2.8356, "step": 9000 }, { "epoch": 7.456828885400314, "grad_norm": 1.8236392736434937, "learning_rate": 9.499e-05, "loss": 2.6914, "step": 9500 }, { "epoch": 7.849293563579278, "grad_norm": 1.8073580265045166, "learning_rate": 9.999000000000001e-05, "loss": 2.4929, "step": 10000 }, { "epoch": 7.849293563579278, "eval_accuracy": 0.5692341405099076, "eval_loss": 2.151398181915283, "eval_runtime": 38.2798, "eval_samples_per_second": 160.685, "eval_steps_per_second": 1.698, "step": 10000 }, { "epoch": 8.241758241758241, "grad_norm": 1.849391222000122, "learning_rate": 9.77162471395881e-05, "loss": 2.1576, "step": 10500 }, { "epoch": 8.634222919937205, "grad_norm": 1.2290756702423096, "learning_rate": 9.542791762013731e-05, "loss": 1.7337, "step": 11000 }, { "epoch": 9.026687598116169, "grad_norm": 1.1669484376907349, "learning_rate": 9.313958810068651e-05, "loss": 1.4375, "step": 11500 }, { "epoch": 9.419152276295133, "grad_norm": 1.0519758462905884, "learning_rate": 9.08512585812357e-05, "loss": 1.3162, "step": 12000 }, { "epoch": 9.811616954474097, "grad_norm": 1.0862187147140503, "learning_rate": 8.85629290617849e-05, "loss": 1.2368, "step": 12500 }, { "epoch": 10.204081632653061, "grad_norm": 0.9377219676971436, "learning_rate": 8.62745995423341e-05, "loss": 1.1784, "step": 13000 }, { "epoch": 10.596546310832025, "grad_norm": 0.9312331676483154, "learning_rate": 8.398627002288329e-05, "loss": 1.1388, "step": 13500 }, { "epoch": 10.989010989010989, "grad_norm": 0.9040568470954895, "learning_rate": 8.16979405034325e-05, "loss": 1.1097, "step": 14000 }, { "epoch": 11.381475667189953, "grad_norm": 0.8583242297172546, "learning_rate": 7.94096109839817e-05, "loss": 1.0736, "step": 14500 }, { "epoch": 11.773940345368917, "grad_norm": 0.8321512937545776, "learning_rate": 7.712128146453089e-05, "loss": 1.0626, "step": 15000 }, { "epoch": 12.166405023547881, "grad_norm": 0.9143489003181458, "learning_rate": 7.48329519450801e-05, "loss": 1.0358, "step": 15500 }, { "epoch": 12.558869701726845, "grad_norm": 0.8196631669998169, "learning_rate": 7.25446224256293e-05, "loss": 1.0207, "step": 16000 }, { "epoch": 12.95133437990581, "grad_norm": 0.7631738781929016, "learning_rate": 7.025629290617849e-05, "loss": 1.004, "step": 16500 }, { "epoch": 13.343799058084773, "grad_norm": 0.8194634914398193, "learning_rate": 6.79679633867277e-05, "loss": 0.9921, "step": 17000 }, { "epoch": 13.736263736263737, "grad_norm": 0.7670016884803772, "learning_rate": 6.56796338672769e-05, "loss": 0.9779, "step": 17500 }, { "epoch": 14.1287284144427, "grad_norm": 0.7673987746238708, "learning_rate": 6.339130434782609e-05, "loss": 0.9608, "step": 18000 }, { "epoch": 14.521193092621663, "grad_norm": 0.7936846613883972, "learning_rate": 6.110297482837529e-05, "loss": 0.9558, "step": 18500 }, { "epoch": 14.913657770800627, "grad_norm": 0.7623568177223206, "learning_rate": 5.881464530892449e-05, "loss": 0.9505, "step": 19000 }, { "epoch": 15.306122448979592, "grad_norm": 0.7214558720588684, "learning_rate": 5.652631578947368e-05, "loss": 0.9402, "step": 19500 }, { "epoch": 15.698587127158556, "grad_norm": 0.827078640460968, "learning_rate": 5.423798627002289e-05, "loss": 0.9263, "step": 20000 }, { "epoch": 15.698587127158556, "eval_accuracy": 0.8142541052951258, "eval_loss": 0.9068173170089722, "eval_runtime": 31.5271, "eval_samples_per_second": 195.102, "eval_steps_per_second": 2.062, "step": 20000 }, { "epoch": 16.09105180533752, "grad_norm": 0.7756440043449402, "learning_rate": 5.1949656750572084e-05, "loss": 0.9185, "step": 20500 }, { "epoch": 16.483516483516482, "grad_norm": 0.7866923809051514, "learning_rate": 4.966132723112129e-05, "loss": 0.9115, "step": 21000 }, { "epoch": 16.875981161695446, "grad_norm": 0.7449353337287903, "learning_rate": 4.737299771167048e-05, "loss": 0.9021, "step": 21500 }, { "epoch": 17.26844583987441, "grad_norm": 0.7738542556762695, "learning_rate": 4.508466819221968e-05, "loss": 0.9021, "step": 22000 }, { "epoch": 17.660910518053374, "grad_norm": 0.7117587924003601, "learning_rate": 4.279633867276888e-05, "loss": 0.8932, "step": 22500 }, { "epoch": 18.053375196232338, "grad_norm": 0.6952142715454102, "learning_rate": 4.0508009153318077e-05, "loss": 0.8866, "step": 23000 }, { "epoch": 18.445839874411302, "grad_norm": 0.6748417615890503, "learning_rate": 3.821967963386728e-05, "loss": 0.8831, "step": 23500 }, { "epoch": 18.838304552590266, "grad_norm": 0.7013327479362488, "learning_rate": 3.593135011441648e-05, "loss": 0.8714, "step": 24000 }, { "epoch": 19.23076923076923, "grad_norm": 0.629546046257019, "learning_rate": 3.364302059496568e-05, "loss": 0.8684, "step": 24500 }, { "epoch": 19.623233908948194, "grad_norm": 0.6739959120750427, "learning_rate": 3.135469107551487e-05, "loss": 0.8664, "step": 25000 }, { "epoch": 20.015698587127158, "grad_norm": 0.6923867464065552, "learning_rate": 2.9066361556064075e-05, "loss": 0.8613, "step": 25500 }, { "epoch": 20.408163265306122, "grad_norm": 0.7043192386627197, "learning_rate": 2.677803203661327e-05, "loss": 0.8541, "step": 26000 }, { "epoch": 20.800627943485086, "grad_norm": 0.6633190512657166, "learning_rate": 2.448970251716247e-05, "loss": 0.8558, "step": 26500 }, { "epoch": 21.19309262166405, "grad_norm": 0.6382936239242554, "learning_rate": 2.2201372997711673e-05, "loss": 0.8486, "step": 27000 }, { "epoch": 21.585557299843014, "grad_norm": 0.7126407623291016, "learning_rate": 1.9913043478260872e-05, "loss": 0.8455, "step": 27500 }, { "epoch": 21.978021978021978, "grad_norm": 0.6809006929397583, "learning_rate": 1.7624713958810068e-05, "loss": 0.8382, "step": 28000 }, { "epoch": 22.370486656200942, "grad_norm": 0.6693772077560425, "learning_rate": 1.533638443935927e-05, "loss": 0.8377, "step": 28500 }, { "epoch": 22.762951334379906, "grad_norm": 0.7368608117103577, "learning_rate": 1.3048054919908468e-05, "loss": 0.8346, "step": 29000 }, { "epoch": 23.15541601255887, "grad_norm": 0.6541247963905334, "learning_rate": 1.0759725400457667e-05, "loss": 0.8298, "step": 29500 }, { "epoch": 23.547880690737834, "grad_norm": 0.6780161261558533, "learning_rate": 8.471395881006864e-06, "loss": 0.8293, "step": 30000 }, { "epoch": 23.547880690737834, "eval_accuracy": 0.8285928159953133, "eval_loss": 0.8292354941368103, "eval_runtime": 31.6198, "eval_samples_per_second": 194.53, "eval_steps_per_second": 2.056, "step": 30000 }, { "epoch": 23.940345368916798, "grad_norm": 0.6645314693450928, "learning_rate": 6.183066361556064e-06, "loss": 0.8306, "step": 30500 }, { "epoch": 24.332810047095762, "grad_norm": 0.7622667551040649, "learning_rate": 3.894736842105264e-06, "loss": 0.8225, "step": 31000 }, { "epoch": 24.725274725274726, "grad_norm": 0.6563706398010254, "learning_rate": 1.6064073226544622e-06, "loss": 0.8275, "step": 31500 }, { "epoch": 25.0, "step": 31850, "total_flos": 8.0444602960128e+17, "train_loss": 1.908989332549426, "train_runtime": 23433.9018, "train_samples_per_second": 130.424, "train_steps_per_second": 1.359 } ], "logging_steps": 500, "max_steps": 31850, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.0444602960128e+17, "train_batch_size": 96, "trial_name": null, "trial_params": null }