| { | |
| "best_metric": 0.45430439710617065, | |
| "best_model_checkpoint": "/content/aptner_cybert/checkpoint-3000", | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 8430, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.881376037959668e-05, | |
| "loss": 0.8182, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.9014212120427658, | |
| "eval_f1": 0.3316993464052288, | |
| "eval_loss": 0.5568719506263733, | |
| "eval_precision": 0.5218508997429306, | |
| "eval_recall": 0.24311377245508983, | |
| "eval_runtime": 4.9712, | |
| "eval_samples_per_second": 351.823, | |
| "eval_steps_per_second": 44.053, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 1.762752075919336e-05, | |
| "loss": 0.5357, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_accuracy": 0.9023790416526444, | |
| "eval_f1": 0.3561038482784018, | |
| "eval_loss": 0.49799811840057373, | |
| "eval_precision": 0.46245815399330464, | |
| "eval_recall": 0.2895209580838323, | |
| "eval_runtime": 6.2711, | |
| "eval_samples_per_second": 278.899, | |
| "eval_steps_per_second": 34.922, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 1.6441281138790037e-05, | |
| "loss": 0.4417, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_accuracy": 0.9016283103367936, | |
| "eval_f1": 0.3786700523726393, | |
| "eval_loss": 0.477287232875824, | |
| "eval_precision": 0.4029044241810199, | |
| "eval_recall": 0.35718562874251497, | |
| "eval_runtime": 4.9329, | |
| "eval_samples_per_second": 354.557, | |
| "eval_steps_per_second": 44.396, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.5255041518386714e-05, | |
| "loss": 0.394, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_accuracy": 0.89430220818556, | |
| "eval_f1": 0.3816285134743552, | |
| "eval_loss": 0.48396036028862, | |
| "eval_precision": 0.36973610331274565, | |
| "eval_recall": 0.39431137724550896, | |
| "eval_runtime": 6.2239, | |
| "eval_samples_per_second": 281.013, | |
| "eval_steps_per_second": 35.187, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 1.4068801897983393e-05, | |
| "loss": 0.3534, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "eval_accuracy": 0.8914028320691708, | |
| "eval_f1": 0.3966278602970695, | |
| "eval_loss": 0.474202960729599, | |
| "eval_precision": 0.3585773046213404, | |
| "eval_recall": 0.4437125748502994, | |
| "eval_runtime": 5.0489, | |
| "eval_samples_per_second": 346.412, | |
| "eval_steps_per_second": 43.376, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 1.2882562277580073e-05, | |
| "loss": 0.3048, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "eval_accuracy": 0.9043205881591551, | |
| "eval_f1": 0.43323262839879156, | |
| "eval_loss": 0.45430439710617065, | |
| "eval_precision": 0.4371951219512195, | |
| "eval_recall": 0.4293413173652695, | |
| "eval_runtime": 6.4763, | |
| "eval_samples_per_second": 270.064, | |
| "eval_steps_per_second": 33.816, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 1.169632265717675e-05, | |
| "loss": 0.2992, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "eval_accuracy": 0.890703875326827, | |
| "eval_f1": 0.394885598923284, | |
| "eval_loss": 0.48460009694099426, | |
| "eval_precision": 0.358679706601467, | |
| "eval_recall": 0.43922155688622755, | |
| "eval_runtime": 6.3841, | |
| "eval_samples_per_second": 273.964, | |
| "eval_steps_per_second": 34.304, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 1.0510083036773429e-05, | |
| "loss": 0.2675, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "eval_accuracy": 0.9000232985580782, | |
| "eval_f1": 0.4304409672830725, | |
| "eval_loss": 0.4759831428527832, | |
| "eval_precision": 0.4100271002710027, | |
| "eval_recall": 0.4529940119760479, | |
| "eval_runtime": 5.112, | |
| "eval_samples_per_second": 342.138, | |
| "eval_steps_per_second": 42.841, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 9.323843416370107e-06, | |
| "loss": 0.2454, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "eval_accuracy": 0.9014212120427658, | |
| "eval_f1": 0.4260492040520984, | |
| "eval_loss": 0.470166951417923, | |
| "eval_precision": 0.4123249299719888, | |
| "eval_recall": 0.4407185628742515, | |
| "eval_runtime": 6.2203, | |
| "eval_samples_per_second": 281.177, | |
| "eval_steps_per_second": 35.207, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 8.137603795966786e-06, | |
| "loss": 0.2391, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "eval_accuracy": 0.8979005410442932, | |
| "eval_f1": 0.4270158511371468, | |
| "eval_loss": 0.4743064045906067, | |
| "eval_precision": 0.39565772669220944, | |
| "eval_recall": 0.46377245508982035, | |
| "eval_runtime": 6.4494, | |
| "eval_samples_per_second": 271.187, | |
| "eval_steps_per_second": 33.956, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 6.951364175563464e-06, | |
| "loss": 0.2088, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "eval_accuracy": 0.903828729710839, | |
| "eval_f1": 0.43508568109207085, | |
| "eval_loss": 0.4777669310569763, | |
| "eval_precision": 0.4224478285391991, | |
| "eval_recall": 0.4485029940119761, | |
| "eval_runtime": 5.0603, | |
| "eval_samples_per_second": 345.632, | |
| "eval_steps_per_second": 43.278, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 5.765124555160143e-06, | |
| "loss": 0.2076, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "eval_accuracy": 0.8929560692743793, | |
| "eval_f1": 0.41404164442071545, | |
| "eval_loss": 0.5050208568572998, | |
| "eval_precision": 0.3735549132947977, | |
| "eval_recall": 0.46437125748502994, | |
| "eval_runtime": 5.3928, | |
| "eval_samples_per_second": 324.318, | |
| "eval_steps_per_second": 40.609, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 4.5788849347568215e-06, | |
| "loss": 0.1946, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "eval_accuracy": 0.8977452173237723, | |
| "eval_f1": 0.4283921349881467, | |
| "eval_loss": 0.49643442034721375, | |
| "eval_precision": 0.4009397024275646, | |
| "eval_recall": 0.4598802395209581, | |
| "eval_runtime": 6.4354, | |
| "eval_samples_per_second": 271.776, | |
| "eval_steps_per_second": 34.03, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 3.3926453143535e-06, | |
| "loss": 0.1808, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "eval_accuracy": 0.9028450128142069, | |
| "eval_f1": 0.4383916990920882, | |
| "eval_loss": 0.48777079582214355, | |
| "eval_precision": 0.4226173937204779, | |
| "eval_recall": 0.4553892215568862, | |
| "eval_runtime": 5.0515, | |
| "eval_samples_per_second": 346.235, | |
| "eval_steps_per_second": 43.354, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "learning_rate": 2.2064056939501782e-06, | |
| "loss": 0.1683, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "eval_accuracy": 0.8975898936032515, | |
| "eval_f1": 0.42638333103353115, | |
| "eval_loss": 0.4947454333305359, | |
| "eval_precision": 0.39544407473765036, | |
| "eval_recall": 0.4625748502994012, | |
| "eval_runtime": 5.1576, | |
| "eval_samples_per_second": 339.114, | |
| "eval_steps_per_second": 42.462, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 1.0201660735468566e-06, | |
| "loss": 0.1681, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "eval_accuracy": 0.900126847705092, | |
| "eval_f1": 0.4352201257861635, | |
| "eval_loss": 0.4915802478790283, | |
| "eval_precision": 0.40812581913499346, | |
| "eval_recall": 0.4661676646706587, | |
| "eval_runtime": 6.3679, | |
| "eval_samples_per_second": 274.657, | |
| "eval_steps_per_second": 34.391, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 8430, | |
| "total_flos": 1171778510432640.0, | |
| "train_loss": 0.3065130231502076, | |
| "train_runtime": 820.0929, | |
| "train_samples_per_second": 82.235, | |
| "train_steps_per_second": 10.279 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 8430, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "total_flos": 1171778510432640.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |