| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 20.0, |
| "eval_steps": 500, |
| "global_step": 10560, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.8086087107658386, |
| "learning_rate": 4.75e-05, |
| "loss": 0.4434, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.9455927655768658, |
| "eval_f1": 0.7605633802816901, |
| "eval_loss": 0.16302905976772308, |
| "eval_precision": 0.679945054945055, |
| "eval_recall": 0.862870424171993, |
| "eval_runtime": 4.5797, |
| "eval_samples_per_second": 204.163, |
| "eval_steps_per_second": 3.275, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.7114465832710266, |
| "learning_rate": 4.5e-05, |
| "loss": 0.1462, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.9566729603497963, |
| "eval_f1": 0.8073836276083467, |
| "eval_loss": 0.12938551604747772, |
| "eval_precision": 0.7481408031730292, |
| "eval_recall": 0.8768158047646717, |
| "eval_runtime": 4.6537, |
| "eval_samples_per_second": 200.915, |
| "eval_steps_per_second": 3.223, |
| "step": 1056 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.316323161125183, |
| "learning_rate": 4.25e-05, |
| "loss": 0.1183, |
| "step": 1584 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9568717082381, |
| "eval_f1": 0.8116639914392724, |
| "eval_loss": 0.13784636557102203, |
| "eval_precision": 0.7521070897372335, |
| "eval_recall": 0.8814642649622313, |
| "eval_runtime": 4.903, |
| "eval_samples_per_second": 190.701, |
| "eval_steps_per_second": 3.059, |
| "step": 1584 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 1.4418916702270508, |
| "learning_rate": 4e-05, |
| "loss": 0.1012, |
| "step": 2112 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.9596541786743515, |
| "eval_f1": 0.8231144872490505, |
| "eval_loss": 0.1358918398618698, |
| "eval_precision": 0.7720101781170483, |
| "eval_recall": 0.8814642649622313, |
| "eval_runtime": 4.7436, |
| "eval_samples_per_second": 197.109, |
| "eval_steps_per_second": 3.162, |
| "step": 2112 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 1.0950379371643066, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.0884, |
| "step": 2640 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.9622379012222995, |
| "eval_f1": 0.8348926802421575, |
| "eval_loss": 0.12661471962928772, |
| "eval_precision": 0.7929952953476216, |
| "eval_recall": 0.8814642649622313, |
| "eval_runtime": 4.6673, |
| "eval_samples_per_second": 200.331, |
| "eval_steps_per_second": 3.214, |
| "step": 2640 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 1.270456314086914, |
| "learning_rate": 3.5e-05, |
| "loss": 0.0793, |
| "step": 3168 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.9610454138924774, |
| "eval_f1": 0.8404432132963989, |
| "eval_loss": 0.1408655196428299, |
| "eval_precision": 0.803070407623081, |
| "eval_recall": 0.8814642649622313, |
| "eval_runtime": 4.6946, |
| "eval_samples_per_second": 199.164, |
| "eval_steps_per_second": 3.195, |
| "step": 3168 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 1.6711246967315674, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 0.072, |
| "step": 3696 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.9588591871211368, |
| "eval_f1": 0.8222222222222223, |
| "eval_loss": 0.1545909196138382, |
| "eval_precision": 0.7704418486541391, |
| "eval_recall": 0.8814642649622313, |
| "eval_runtime": 4.6637, |
| "eval_samples_per_second": 200.485, |
| "eval_steps_per_second": 3.216, |
| "step": 3696 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.5584707260131836, |
| "learning_rate": 3e-05, |
| "loss": 0.067, |
| "step": 4224 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.9608466660041737, |
| "eval_f1": 0.8334258745141587, |
| "eval_loss": 0.14326535165309906, |
| "eval_precision": 0.797979797979798, |
| "eval_recall": 0.8721673445671121, |
| "eval_runtime": 4.6647, |
| "eval_samples_per_second": 200.444, |
| "eval_steps_per_second": 3.216, |
| "step": 4224 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.5288811326026917, |
| "learning_rate": 2.7500000000000004e-05, |
| "loss": 0.0607, |
| "step": 4752 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.9598529265626553, |
| "eval_f1": 0.8312328767123287, |
| "eval_loss": 0.1468406319618225, |
| "eval_precision": 0.7864178330741317, |
| "eval_recall": 0.8814642649622313, |
| "eval_runtime": 4.6583, |
| "eval_samples_per_second": 200.719, |
| "eval_steps_per_second": 3.22, |
| "step": 4752 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 1.041274070739746, |
| "learning_rate": 2.5e-05, |
| "loss": 0.0562, |
| "step": 5280 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.9612441617807811, |
| "eval_f1": 0.8267029972752045, |
| "eval_loss": 0.14973483979701996, |
| "eval_precision": 0.7783478707029245, |
| "eval_recall": 0.8814642649622313, |
| "eval_runtime": 4.6532, |
| "eval_samples_per_second": 200.937, |
| "eval_steps_per_second": 3.224, |
| "step": 5280 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 1.7796343564987183, |
| "learning_rate": 2.25e-05, |
| "loss": 0.0506, |
| "step": 5808 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.9594554307860479, |
| "eval_f1": 0.8332413031474324, |
| "eval_loss": 0.1600087583065033, |
| "eval_precision": 0.793792740662809, |
| "eval_recall": 0.8768158047646717, |
| "eval_runtime": 4.6756, |
| "eval_samples_per_second": 199.975, |
| "eval_steps_per_second": 3.208, |
| "step": 5808 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 1.6123548746109009, |
| "learning_rate": 2e-05, |
| "loss": 0.0483, |
| "step": 6336 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.9608466660041737, |
| "eval_f1": 0.8328721638074155, |
| "eval_loss": 0.15964019298553467, |
| "eval_precision": 0.7950343370311674, |
| "eval_recall": 0.8744915746658919, |
| "eval_runtime": 4.6719, |
| "eval_samples_per_second": 200.134, |
| "eval_steps_per_second": 3.211, |
| "step": 6336 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 0.9525193572044373, |
| "learning_rate": 1.75e-05, |
| "loss": 0.0443, |
| "step": 6864 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.96064791811587, |
| "eval_f1": 0.8237547892720306, |
| "eval_loss": 0.15955425798892975, |
| "eval_precision": 0.7785825142265907, |
| "eval_recall": 0.8744915746658919, |
| "eval_runtime": 4.6803, |
| "eval_samples_per_second": 199.774, |
| "eval_steps_per_second": 3.205, |
| "step": 6864 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.5356388688087463, |
| "learning_rate": 1.5e-05, |
| "loss": 0.0421, |
| "step": 7392 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.9612441617807811, |
| "eval_f1": 0.8350857775318208, |
| "eval_loss": 0.16503094136714935, |
| "eval_precision": 0.7971473851030111, |
| "eval_recall": 0.8768158047646717, |
| "eval_runtime": 4.671, |
| "eval_samples_per_second": 200.172, |
| "eval_steps_per_second": 3.211, |
| "step": 7392 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.6785407662391663, |
| "learning_rate": 1.25e-05, |
| "loss": 0.0395, |
| "step": 7920 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.9602504223392626, |
| "eval_f1": 0.8284449363586054, |
| "eval_loss": 0.16934077441692352, |
| "eval_precision": 0.7908082408874801, |
| "eval_recall": 0.8698431144683324, |
| "eval_runtime": 4.6607, |
| "eval_samples_per_second": 200.612, |
| "eval_steps_per_second": 3.218, |
| "step": 7920 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 1.843337893486023, |
| "learning_rate": 1e-05, |
| "loss": 0.0375, |
| "step": 8448 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.9594554307860479, |
| "eval_f1": 0.8336088154269972, |
| "eval_loss": 0.17250221967697144, |
| "eval_precision": 0.7925615505500262, |
| "eval_recall": 0.8791400348634515, |
| "eval_runtime": 4.6872, |
| "eval_samples_per_second": 199.478, |
| "eval_steps_per_second": 3.2, |
| "step": 8448 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 0.6304071545600891, |
| "learning_rate": 7.5e-06, |
| "loss": 0.0358, |
| "step": 8976 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.9612441617807811, |
| "eval_f1": 0.8321289605336298, |
| "eval_loss": 0.17892615497112274, |
| "eval_precision": 0.7975492807671817, |
| "eval_recall": 0.8698431144683324, |
| "eval_runtime": 4.6497, |
| "eval_samples_per_second": 201.086, |
| "eval_steps_per_second": 3.226, |
| "step": 8976 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 1.1330559253692627, |
| "learning_rate": 5e-06, |
| "loss": 0.0339, |
| "step": 9504 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.960051674450959, |
| "eval_f1": 0.8225895316804408, |
| "eval_loss": 0.17817425727844238, |
| "eval_precision": 0.7820848611838659, |
| "eval_recall": 0.8675188843695526, |
| "eval_runtime": 4.6927, |
| "eval_samples_per_second": 199.247, |
| "eval_steps_per_second": 3.196, |
| "step": 9504 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 0.08901867270469666, |
| "learning_rate": 2.5e-06, |
| "loss": 0.0327, |
| "step": 10032 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.9620391533339958, |
| "eval_f1": 0.833983286908078, |
| "eval_loss": 0.17433172464370728, |
| "eval_precision": 0.8009630818619583, |
| "eval_recall": 0.8698431144683324, |
| "eval_runtime": 4.6614, |
| "eval_samples_per_second": 200.583, |
| "eval_steps_per_second": 3.218, |
| "step": 10032 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 1.3878382444381714, |
| "learning_rate": 0.0, |
| "loss": 0.0327, |
| "step": 10560 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.9608466660041737, |
| "eval_f1": 0.8321289605336298, |
| "eval_loss": 0.17778323590755463, |
| "eval_precision": 0.7975492807671817, |
| "eval_recall": 0.8698431144683324, |
| "eval_runtime": 4.9618, |
| "eval_samples_per_second": 188.439, |
| "eval_steps_per_second": 3.023, |
| "step": 10560 |
| }, |
| { |
| "epoch": 20.0, |
| "step": 10560, |
| "total_flos": 5062583230111038.0, |
| "train_loss": 0.08150525255636736, |
| "train_runtime": 1343.1683, |
| "train_samples_per_second": 125.628, |
| "train_steps_per_second": 7.862 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 10560, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "total_flos": 5062583230111038.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|