| { |
| "best_metric": 0.06405104696750641, |
| "best_model_checkpoint": "./trained_model/checkpoint-2144", |
| "epoch": 20.0, |
| "eval_steps": 500, |
| "global_step": 2680, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 35.17252731323242, |
| "learning_rate": 5e-06, |
| "loss": 3.7473, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_f1": 0.04727812496565904, |
| "eval_loss": 3.678004264831543, |
| "eval_runtime": 4.792, |
| "eval_samples_per_second": 98.706, |
| "eval_steps_per_second": 1.669, |
| "step": 134 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 199.6754913330078, |
| "learning_rate": 1e-05, |
| "loss": 3.2514, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_f1": 0.266607271215907, |
| "eval_loss": 2.5477890968322754, |
| "eval_runtime": 4.7749, |
| "eval_samples_per_second": 99.059, |
| "eval_steps_per_second": 1.675, |
| "step": 268 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 171.65182495117188, |
| "learning_rate": 9.444444444444445e-06, |
| "loss": 1.8984, |
| "step": 402 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_f1": 0.7324588125106856, |
| "eval_loss": 1.1185234785079956, |
| "eval_runtime": 4.6545, |
| "eval_samples_per_second": 101.621, |
| "eval_steps_per_second": 1.719, |
| "step": 402 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 35.59435272216797, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 1.0211, |
| "step": 536 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_f1": 0.860704216321126, |
| "eval_loss": 0.5424026846885681, |
| "eval_runtime": 4.7373, |
| "eval_samples_per_second": 99.846, |
| "eval_steps_per_second": 1.689, |
| "step": 536 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 3.402517318725586, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.6218, |
| "step": 670 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_f1": 0.9176158056018094, |
| "eval_loss": 0.29218313097953796, |
| "eval_runtime": 4.6529, |
| "eval_samples_per_second": 101.658, |
| "eval_steps_per_second": 1.719, |
| "step": 670 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 2.050248384475708, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 0.4323, |
| "step": 804 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_f1": 0.9496526464988996, |
| "eval_loss": 0.19649288058280945, |
| "eval_runtime": 4.695, |
| "eval_samples_per_second": 100.745, |
| "eval_steps_per_second": 1.704, |
| "step": 804 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.15155179798603058, |
| "learning_rate": 7.222222222222223e-06, |
| "loss": 0.3037, |
| "step": 938 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_f1": 0.9658213272069875, |
| "eval_loss": 0.1448889821767807, |
| "eval_runtime": 4.6742, |
| "eval_samples_per_second": 101.194, |
| "eval_steps_per_second": 1.712, |
| "step": 938 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 1.328462839126587, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.2473, |
| "step": 1072 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_f1": 0.9722665265768234, |
| "eval_loss": 0.11456680297851562, |
| "eval_runtime": 4.6339, |
| "eval_samples_per_second": 102.073, |
| "eval_steps_per_second": 1.726, |
| "step": 1072 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 313.9409484863281, |
| "learning_rate": 6.111111111111112e-06, |
| "loss": 0.2064, |
| "step": 1206 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_f1": 0.9780034572033636, |
| "eval_loss": 0.09856382757425308, |
| "eval_runtime": 4.6934, |
| "eval_samples_per_second": 100.779, |
| "eval_steps_per_second": 1.705, |
| "step": 1206 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 189.65391540527344, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 0.1533, |
| "step": 1340 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_f1": 0.9778725803283007, |
| "eval_loss": 0.08937614411115646, |
| "eval_runtime": 4.6566, |
| "eval_samples_per_second": 101.577, |
| "eval_steps_per_second": 1.718, |
| "step": 1340 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 22.184070587158203, |
| "learning_rate": 5e-06, |
| "loss": 0.1395, |
| "step": 1474 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_f1": 0.9807261483480538, |
| "eval_loss": 0.0832449197769165, |
| "eval_runtime": 4.6769, |
| "eval_samples_per_second": 101.135, |
| "eval_steps_per_second": 1.711, |
| "step": 1474 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 7.0296196937561035, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 0.1249, |
| "step": 1608 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_f1": 0.9865706513457244, |
| "eval_loss": 0.07267424464225769, |
| "eval_runtime": 4.6735, |
| "eval_samples_per_second": 101.21, |
| "eval_steps_per_second": 1.712, |
| "step": 1608 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 0.09863970428705215, |
| "learning_rate": 3.88888888888889e-06, |
| "loss": 0.1238, |
| "step": 1742 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_f1": 0.9886923352646306, |
| "eval_loss": 0.0811920091509819, |
| "eval_runtime": 4.7013, |
| "eval_samples_per_second": 100.611, |
| "eval_steps_per_second": 1.702, |
| "step": 1742 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.17580562829971313, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.1103, |
| "step": 1876 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_f1": 0.9887811136168541, |
| "eval_loss": 0.07010301947593689, |
| "eval_runtime": 4.6405, |
| "eval_samples_per_second": 101.93, |
| "eval_steps_per_second": 1.724, |
| "step": 1876 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 1.5174826383590698, |
| "learning_rate": 2.7777777777777783e-06, |
| "loss": 0.0953, |
| "step": 2010 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_f1": 0.9882510714507506, |
| "eval_loss": 0.07096822559833527, |
| "eval_runtime": 4.6554, |
| "eval_samples_per_second": 101.602, |
| "eval_steps_per_second": 1.718, |
| "step": 2010 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 0.018986046314239502, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 0.0831, |
| "step": 2144 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_f1": 0.9904615337218803, |
| "eval_loss": 0.06405104696750641, |
| "eval_runtime": 4.7164, |
| "eval_samples_per_second": 100.288, |
| "eval_steps_per_second": 1.696, |
| "step": 2144 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 0.7639440298080444, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 0.0813, |
| "step": 2278 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_f1": 0.9904615337218803, |
| "eval_loss": 0.06663929671049118, |
| "eval_runtime": 4.6329, |
| "eval_samples_per_second": 102.095, |
| "eval_steps_per_second": 1.727, |
| "step": 2278 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 0.013097506947815418, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 0.085, |
| "step": 2412 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_f1": 0.9904615337218803, |
| "eval_loss": 0.06558458507061005, |
| "eval_runtime": 4.6458, |
| "eval_samples_per_second": 101.813, |
| "eval_steps_per_second": 1.722, |
| "step": 2412 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 0.7450041770935059, |
| "learning_rate": 5.555555555555555e-07, |
| "loss": 0.0716, |
| "step": 2546 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_f1": 0.9904615337218803, |
| "eval_loss": 0.06490221619606018, |
| "eval_runtime": 4.6882, |
| "eval_samples_per_second": 100.891, |
| "eval_steps_per_second": 1.706, |
| "step": 2546 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.07539009302854538, |
| "learning_rate": 0.0, |
| "loss": 0.084, |
| "step": 2680 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_f1": 0.9904615337218803, |
| "eval_loss": 0.06570780277252197, |
| "eval_runtime": 4.6524, |
| "eval_samples_per_second": 101.669, |
| "eval_steps_per_second": 1.72, |
| "step": 2680 |
| }, |
| { |
| "epoch": 20.0, |
| "step": 2680, |
| "total_flos": 2.773139351595909e+18, |
| "train_loss": 0.6440982267038146, |
| "train_runtime": 2307.5728, |
| "train_samples_per_second": 36.896, |
| "train_steps_per_second": 1.161 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_f1": 0.9689867070609877, |
| "eval_loss": 0.10238795727491379, |
| "eval_runtime": 125.4849, |
| "eval_samples_per_second": 100.65, |
| "eval_steps_per_second": 1.578, |
| "step": 2680 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 2680, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.773139351595909e+18, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|