| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 20.0, |
| "eval_steps": 500, |
| "global_step": 2440, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 4.737851142883301, |
| "learning_rate": 4.75e-05, |
| "loss": 0.5636, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.7218045112781954, |
| "eval_f1": 0.6379754285293902, |
| "eval_loss": 0.515233039855957, |
| "eval_precision": 0.6550161812297735, |
| "eval_recall": 0.6306601200218221, |
| "eval_runtime": 1.7102, |
| "eval_samples_per_second": 233.304, |
| "eval_steps_per_second": 29.236, |
| "step": 122 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 3.684419870376587, |
| "learning_rate": 4.5e-05, |
| "loss": 0.5068, |
| "step": 244 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.731829573934837, |
| "eval_f1": 0.6876906533236773, |
| "eval_loss": 0.49799418449401855, |
| "eval_precision": 0.6827970718420157, |
| "eval_recall": 0.6952627750500091, |
| "eval_runtime": 1.7043, |
| "eval_samples_per_second": 234.116, |
| "eval_steps_per_second": 29.338, |
| "step": 244 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 3.5807459354400635, |
| "learning_rate": 4.25e-05, |
| "loss": 0.4683, |
| "step": 366 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.7694235588972431, |
| "eval_f1": 0.7232342119548827, |
| "eval_loss": 0.4658961892127991, |
| "eval_precision": 0.7221638655462185, |
| "eval_recall": 0.7243589743589745, |
| "eval_runtime": 1.7025, |
| "eval_samples_per_second": 234.358, |
| "eval_steps_per_second": 29.368, |
| "step": 366 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 4.049397945404053, |
| "learning_rate": 4e-05, |
| "loss": 0.4465, |
| "step": 488 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.7894736842105263, |
| "eval_f1": 0.7421052631578947, |
| "eval_loss": 0.4400848150253296, |
| "eval_precision": 0.7461993243243243, |
| "eval_recall": 0.7385433715220949, |
| "eval_runtime": 1.7046, |
| "eval_samples_per_second": 234.074, |
| "eval_steps_per_second": 29.333, |
| "step": 488 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 7.287342548370361, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.4152, |
| "step": 610 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.8120300751879699, |
| "eval_f1": 0.7781114447781114, |
| "eval_loss": 0.4213794767856598, |
| "eval_precision": 0.7728937728937728, |
| "eval_recall": 0.784506273867976, |
| "eval_runtime": 1.7034, |
| "eval_samples_per_second": 234.242, |
| "eval_steps_per_second": 29.354, |
| "step": 610 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 3.4730327129364014, |
| "learning_rate": 3.5e-05, |
| "loss": 0.3848, |
| "step": 732 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.8320802005012531, |
| "eval_f1": 0.8052907201177011, |
| "eval_loss": 0.4165918529033661, |
| "eval_precision": 0.7961137707687975, |
| "eval_recall": 0.8186943080560102, |
| "eval_runtime": 1.7018, |
| "eval_samples_per_second": 234.453, |
| "eval_steps_per_second": 29.38, |
| "step": 732 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 6.1496357917785645, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 0.3714, |
| "step": 854 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.8395989974937343, |
| "eval_f1": 0.7931972789115647, |
| "eval_loss": 0.39873412251472473, |
| "eval_precision": 0.8205776934429685, |
| "eval_recall": 0.7765048190580106, |
| "eval_runtime": 1.7029, |
| "eval_samples_per_second": 234.301, |
| "eval_steps_per_second": 29.361, |
| "step": 854 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 5.676291465759277, |
| "learning_rate": 3e-05, |
| "loss": 0.3529, |
| "step": 976 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.8421052631578947, |
| "eval_f1": 0.7945265945707349, |
| "eval_loss": 0.39004629850387573, |
| "eval_precision": 0.8271844660194174, |
| "eval_recall": 0.7757774140752864, |
| "eval_runtime": 1.7018, |
| "eval_samples_per_second": 234.461, |
| "eval_steps_per_second": 29.381, |
| "step": 976 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 3.7676236629486084, |
| "learning_rate": 2.7500000000000004e-05, |
| "loss": 0.3471, |
| "step": 1098 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.8521303258145363, |
| "eval_f1": 0.8174054682955206, |
| "eval_loss": 0.3774680197238922, |
| "eval_precision": 0.825840015273005, |
| "eval_recall": 0.810374613566103, |
| "eval_runtime": 1.7048, |
| "eval_samples_per_second": 234.049, |
| "eval_steps_per_second": 29.329, |
| "step": 1098 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 6.180309772491455, |
| "learning_rate": 2.5e-05, |
| "loss": 0.3335, |
| "step": 1220 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.8596491228070176, |
| "eval_f1": 0.8289650949173301, |
| "eval_loss": 0.37219953536987305, |
| "eval_precision": 0.8325081997648369, |
| "eval_recall": 0.82569558101473, |
| "eval_runtime": 1.7032, |
| "eval_samples_per_second": 234.267, |
| "eval_steps_per_second": 29.357, |
| "step": 1220 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 10.125824928283691, |
| "learning_rate": 2.25e-05, |
| "loss": 0.3333, |
| "step": 1342 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.8546365914786967, |
| "eval_f1": 0.8209821152299028, |
| "eval_loss": 0.3652091324329376, |
| "eval_precision": 0.8284245491932933, |
| "eval_recall": 0.8146481178396072, |
| "eval_runtime": 1.7066, |
| "eval_samples_per_second": 233.803, |
| "eval_steps_per_second": 29.299, |
| "step": 1342 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 9.260209083557129, |
| "learning_rate": 2e-05, |
| "loss": 0.3171, |
| "step": 1464 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.8521303258145363, |
| "eval_f1": 0.818380305052578, |
| "eval_loss": 0.36155039072036743, |
| "eval_precision": 0.8247247562126455, |
| "eval_recall": 0.8128750681942172, |
| "eval_runtime": 1.7057, |
| "eval_samples_per_second": 233.922, |
| "eval_steps_per_second": 29.313, |
| "step": 1464 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 6.139608383178711, |
| "learning_rate": 1.75e-05, |
| "loss": 0.2959, |
| "step": 1586 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.8571428571428571, |
| "eval_f1": 0.81524926686217, |
| "eval_loss": 0.36369869112968445, |
| "eval_precision": 0.8462682339611953, |
| "eval_recall": 0.7964175304600837, |
| "eval_runtime": 1.7055, |
| "eval_samples_per_second": 233.944, |
| "eval_steps_per_second": 29.316, |
| "step": 1586 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 10.355992317199707, |
| "learning_rate": 1.5e-05, |
| "loss": 0.3177, |
| "step": 1708 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.8621553884711779, |
| "eval_f1": 0.8297847585805701, |
| "eval_loss": 0.3496277332305908, |
| "eval_precision": 0.8385357006491028, |
| "eval_recall": 0.8224677214038916, |
| "eval_runtime": 1.7046, |
| "eval_samples_per_second": 234.079, |
| "eval_steps_per_second": 29.333, |
| "step": 1708 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 1.6303342580795288, |
| "learning_rate": 1.25e-05, |
| "loss": 0.2923, |
| "step": 1830 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.8596491228070176, |
| "eval_f1": 0.8289650949173301, |
| "eval_loss": 0.34540727734565735, |
| "eval_precision": 0.8325081997648369, |
| "eval_recall": 0.82569558101473, |
| "eval_runtime": 1.7049, |
| "eval_samples_per_second": 234.031, |
| "eval_steps_per_second": 29.327, |
| "step": 1830 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 5.4151411056518555, |
| "learning_rate": 1e-05, |
| "loss": 0.2887, |
| "step": 1952 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.8596491228070176, |
| "eval_f1": 0.8289650949173301, |
| "eval_loss": 0.3453606367111206, |
| "eval_precision": 0.8325081997648369, |
| "eval_recall": 0.82569558101473, |
| "eval_runtime": 1.7018, |
| "eval_samples_per_second": 234.46, |
| "eval_steps_per_second": 29.381, |
| "step": 1952 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 4.5014424324035645, |
| "learning_rate": 7.5e-06, |
| "loss": 0.2916, |
| "step": 2074 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.8596491228070176, |
| "eval_f1": 0.8280701754385965, |
| "eval_loss": 0.3442404568195343, |
| "eval_precision": 0.833567942942943, |
| "eval_recall": 0.8231951263866157, |
| "eval_runtime": 1.7024, |
| "eval_samples_per_second": 234.37, |
| "eval_steps_per_second": 29.37, |
| "step": 2074 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 7.567499160766602, |
| "learning_rate": 5e-06, |
| "loss": 0.2937, |
| "step": 2196 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.8596491228070176, |
| "eval_f1": 0.8280701754385965, |
| "eval_loss": 0.3432822525501251, |
| "eval_precision": 0.833567942942943, |
| "eval_recall": 0.8231951263866157, |
| "eval_runtime": 1.7013, |
| "eval_samples_per_second": 234.52, |
| "eval_steps_per_second": 29.388, |
| "step": 2196 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 5.598782539367676, |
| "learning_rate": 2.5e-06, |
| "loss": 0.2902, |
| "step": 2318 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.8521303258145363, |
| "eval_f1": 0.8174054682955206, |
| "eval_loss": 0.34225448966026306, |
| "eval_precision": 0.825840015273005, |
| "eval_recall": 0.810374613566103, |
| "eval_runtime": 1.7024, |
| "eval_samples_per_second": 234.376, |
| "eval_steps_per_second": 29.37, |
| "step": 2318 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 3.4671976566314697, |
| "learning_rate": 0.0, |
| "loss": 0.2928, |
| "step": 2440 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.8546365914786967, |
| "eval_f1": 0.8209821152299028, |
| "eval_loss": 0.34143561124801636, |
| "eval_precision": 0.8284245491932933, |
| "eval_recall": 0.8146481178396072, |
| "eval_runtime": 1.7043, |
| "eval_samples_per_second": 234.111, |
| "eval_steps_per_second": 29.337, |
| "step": 2440 |
| }, |
| { |
| "epoch": 20.0, |
| "step": 2440, |
| "total_flos": 7604291693904000.0, |
| "train_loss": 0.36016932315513733, |
| "train_runtime": 612.4361, |
| "train_samples_per_second": 118.804, |
| "train_steps_per_second": 3.984 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 2440, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "total_flos": 7604291693904000.0, |
| "train_batch_size": 30, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|