| { | |
| "best_metric": 0.5012531328320803, | |
| "best_model_checkpoint": "/content/our_data/checkpoint-9000", | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 12410, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.91941982272361e-05, | |
| "loss": 1.9177, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.6004288777698356, | |
| "eval_f1": 0.0379746835443038, | |
| "eval_loss": 1.6839170455932617, | |
| "eval_precision": 0.06, | |
| "eval_recall": 0.027777777777777776, | |
| "eval_runtime": 3.4052, | |
| "eval_samples_per_second": 89.275, | |
| "eval_steps_per_second": 44.637, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.83883964544722e-05, | |
| "loss": 1.4976, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.63128425065523, | |
| "eval_f1": 0.24557116676847893, | |
| "eval_loss": 1.4935959577560425, | |
| "eval_precision": 0.22814982973893302, | |
| "eval_recall": 0.26587301587301587, | |
| "eval_runtime": 2.3845, | |
| "eval_samples_per_second": 127.488, | |
| "eval_steps_per_second": 63.744, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 1.75825946817083e-05, | |
| "loss": 1.2309, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_accuracy": 0.6657136049559209, | |
| "eval_f1": 0.2877871825876663, | |
| "eval_loss": 1.2914698123931885, | |
| "eval_precision": 0.2650334075723831, | |
| "eval_recall": 0.3148148148148148, | |
| "eval_runtime": 3.4851, | |
| "eval_samples_per_second": 87.229, | |
| "eval_steps_per_second": 43.615, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 1.67767929089444e-05, | |
| "loss": 1.0546, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_accuracy": 0.6803669287586371, | |
| "eval_f1": 0.33198380566801616, | |
| "eval_loss": 1.2454315423965454, | |
| "eval_precision": 0.2949640287769784, | |
| "eval_recall": 0.37962962962962965, | |
| "eval_runtime": 2.4542, | |
| "eval_samples_per_second": 123.867, | |
| "eval_steps_per_second": 61.934, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.59709911361805e-05, | |
| "loss": 0.9405, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_accuracy": 0.6915654038599, | |
| "eval_f1": 0.35719063545150503, | |
| "eval_loss": 1.2377290725708008, | |
| "eval_precision": 0.36129905277401897, | |
| "eval_recall": 0.3531746031746032, | |
| "eval_runtime": 3.3956, | |
| "eval_samples_per_second": 89.527, | |
| "eval_steps_per_second": 44.763, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.5165189363416601e-05, | |
| "loss": 0.7501, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "eval_accuracy": 0.7170598046223493, | |
| "eval_f1": 0.3872549019607843, | |
| "eval_loss": 1.1723062992095947, | |
| "eval_precision": 0.3607305936073059, | |
| "eval_recall": 0.41798941798941797, | |
| "eval_runtime": 2.4666, | |
| "eval_samples_per_second": 123.248, | |
| "eval_steps_per_second": 61.624, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 1.4359387590652701e-05, | |
| "loss": 0.7133, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_accuracy": 0.7159876101977604, | |
| "eval_f1": 0.39976204640095175, | |
| "eval_loss": 1.1583572626113892, | |
| "eval_precision": 0.36324324324324325, | |
| "eval_recall": 0.4444444444444444, | |
| "eval_runtime": 2.4767, | |
| "eval_samples_per_second": 122.743, | |
| "eval_steps_per_second": 61.371, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 1.35535858178888e-05, | |
| "loss": 0.5896, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "eval_accuracy": 0.7306409340004766, | |
| "eval_f1": 0.42666666666666664, | |
| "eval_loss": 1.2287709712982178, | |
| "eval_precision": 0.41025641025641024, | |
| "eval_recall": 0.4444444444444444, | |
| "eval_runtime": 3.2475, | |
| "eval_samples_per_second": 93.609, | |
| "eval_steps_per_second": 46.805, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 1.27477840451249e-05, | |
| "loss": 0.5353, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "eval_accuracy": 0.7253990945913747, | |
| "eval_f1": 0.4356672651107121, | |
| "eval_loss": 1.2319059371948242, | |
| "eval_precision": 0.3978142076502732, | |
| "eval_recall": 0.48148148148148145, | |
| "eval_runtime": 2.3963, | |
| "eval_samples_per_second": 126.861, | |
| "eval_steps_per_second": 63.431, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 1.1941982272361e-05, | |
| "loss": 0.5432, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "eval_accuracy": 0.7306409340004766, | |
| "eval_f1": 0.4548825710754017, | |
| "eval_loss": 1.2172613143920898, | |
| "eval_precision": 0.42691415313225056, | |
| "eval_recall": 0.48677248677248675, | |
| "eval_runtime": 3.3895, | |
| "eval_samples_per_second": 89.689, | |
| "eval_steps_per_second": 44.845, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 1.11361804995971e-05, | |
| "loss": 0.4062, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "eval_accuracy": 0.7271860852990231, | |
| "eval_f1": 0.4691358024691359, | |
| "eval_loss": 1.283239722251892, | |
| "eval_precision": 0.4398148148148148, | |
| "eval_recall": 0.5026455026455027, | |
| "eval_runtime": 2.4143, | |
| "eval_samples_per_second": 125.916, | |
| "eval_steps_per_second": 62.958, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 1.0330378726833199e-05, | |
| "loss": 0.4485, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "eval_accuracy": 0.7412437455325233, | |
| "eval_f1": 0.4610778443113772, | |
| "eval_loss": 1.2196030616760254, | |
| "eval_precision": 0.4212253829321663, | |
| "eval_recall": 0.5092592592592593, | |
| "eval_runtime": 2.7872, | |
| "eval_samples_per_second": 109.069, | |
| "eval_steps_per_second": 54.534, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 9.5245769540693e-06, | |
| "loss": 0.3614, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "eval_accuracy": 0.732547057421968, | |
| "eval_f1": 0.46210720887245843, | |
| "eval_loss": 1.3155299425125122, | |
| "eval_precision": 0.43252595155709345, | |
| "eval_recall": 0.49603174603174605, | |
| "eval_runtime": 3.3893, | |
| "eval_samples_per_second": 89.694, | |
| "eval_steps_per_second": 44.847, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 8.7187751813054e-06, | |
| "loss": 0.3308, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "eval_accuracy": 0.7354062425542054, | |
| "eval_f1": 0.4604402141582391, | |
| "eval_loss": 1.3501168489456177, | |
| "eval_precision": 0.4183783783783784, | |
| "eval_recall": 0.5119047619047619, | |
| "eval_runtime": 2.4288, | |
| "eval_samples_per_second": 125.165, | |
| "eval_steps_per_second": 62.583, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 7.9129734085415e-06, | |
| "loss": 0.3645, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "eval_accuracy": 0.7365975696926376, | |
| "eval_f1": 0.4730792498487599, | |
| "eval_loss": 1.3390766382217407, | |
| "eval_precision": 0.4358974358974359, | |
| "eval_recall": 0.5171957671957672, | |
| "eval_runtime": 2.391, | |
| "eval_samples_per_second": 127.141, | |
| "eval_steps_per_second": 63.571, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 7.107171635777599e-06, | |
| "loss": 0.2982, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "eval_accuracy": 0.7314748629973791, | |
| "eval_f1": 0.4590354445090064, | |
| "eval_loss": 1.3889434337615967, | |
| "eval_precision": 0.40932642487046633, | |
| "eval_recall": 0.5224867724867724, | |
| "eval_runtime": 3.0054, | |
| "eval_samples_per_second": 101.151, | |
| "eval_steps_per_second": 50.576, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 6.301369863013699e-06, | |
| "loss": 0.2845, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "eval_accuracy": 0.7376697641172266, | |
| "eval_f1": 0.47794117647058826, | |
| "eval_loss": 1.4109262228012085, | |
| "eval_precision": 0.4452054794520548, | |
| "eval_recall": 0.5158730158730159, | |
| "eval_runtime": 2.4417, | |
| "eval_samples_per_second": 124.505, | |
| "eval_steps_per_second": 62.253, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 5.495568090249799e-06, | |
| "loss": 0.2482, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "eval_accuracy": 0.7375506314033834, | |
| "eval_f1": 0.5012531328320803, | |
| "eval_loss": 1.4667584896087646, | |
| "eval_precision": 0.47619047619047616, | |
| "eval_recall": 0.5291005291005291, | |
| "eval_runtime": 2.4653, | |
| "eval_samples_per_second": 123.313, | |
| "eval_steps_per_second": 61.657, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 4.689766317485899e-06, | |
| "loss": 0.2636, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "eval_accuracy": 0.73409578270193, | |
| "eval_f1": 0.49150485436893204, | |
| "eval_loss": 1.4925192594528198, | |
| "eval_precision": 0.45403587443946186, | |
| "eval_recall": 0.5357142857142857, | |
| "eval_runtime": 3.4322, | |
| "eval_samples_per_second": 88.572, | |
| "eval_steps_per_second": 44.286, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 3.883964544721999e-06, | |
| "loss": 0.2605, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "eval_accuracy": 0.7405289492494639, | |
| "eval_f1": 0.49358582773365917, | |
| "eval_loss": 1.4916423559188843, | |
| "eval_precision": 0.4585698070374574, | |
| "eval_recall": 0.5343915343915344, | |
| "eval_runtime": 2.4755, | |
| "eval_samples_per_second": 122.805, | |
| "eval_steps_per_second": 61.403, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "learning_rate": 3.0781627719580986e-06, | |
| "loss": 0.1989, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "eval_accuracy": 0.7387419585418156, | |
| "eval_f1": 0.4990780577750461, | |
| "eval_loss": 1.5096321105957031, | |
| "eval_precision": 0.4661308840413318, | |
| "eval_recall": 0.5370370370370371, | |
| "eval_runtime": 3.3245, | |
| "eval_samples_per_second": 91.444, | |
| "eval_steps_per_second": 45.722, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "learning_rate": 2.2723609991941985e-06, | |
| "loss": 0.2415, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "eval_accuracy": 0.744341196092447, | |
| "eval_f1": 0.4990914597213809, | |
| "eval_loss": 1.4698182344436646, | |
| "eval_precision": 0.46033519553072627, | |
| "eval_recall": 0.544973544973545, | |
| "eval_runtime": 2.5811, | |
| "eval_samples_per_second": 117.779, | |
| "eval_steps_per_second": 58.889, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "learning_rate": 1.4665592264302982e-06, | |
| "loss": 0.2488, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "eval_accuracy": 0.7455325232308792, | |
| "eval_f1": 0.491421568627451, | |
| "eval_loss": 1.4736005067825317, | |
| "eval_precision": 0.4577625570776256, | |
| "eval_recall": 0.5304232804232805, | |
| "eval_runtime": 3.3981, | |
| "eval_samples_per_second": 89.462, | |
| "eval_steps_per_second": 44.731, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 6.607574536663981e-07, | |
| "loss": 0.2129, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "eval_accuracy": 0.7438646652370741, | |
| "eval_f1": 0.5012165450121654, | |
| "eval_loss": 1.506749153137207, | |
| "eval_precision": 0.46396396396396394, | |
| "eval_recall": 0.544973544973545, | |
| "eval_runtime": 2.854, | |
| "eval_samples_per_second": 106.517, | |
| "eval_steps_per_second": 53.258, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 12410, | |
| "total_flos": 503702005049490.0, | |
| "train_loss": 0.5683070008357046, | |
| "train_runtime": 1353.7746, | |
| "train_samples_per_second": 18.327, | |
| "train_steps_per_second": 9.167 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 12410, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "total_flos": 503702005049490.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |