| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 20.0, |
| "eval_steps": 500, |
| "global_step": 2440, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 4.611067295074463, |
| "learning_rate": 4.75e-05, |
| "loss": 0.5624, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.7243107769423559, |
| "eval_f1": 0.6280256288561936, |
| "eval_loss": 0.521743655204773, |
| "eval_precision": 0.6565268987341772, |
| "eval_recall": 0.6199308965266412, |
| "eval_runtime": 1.7104, |
| "eval_samples_per_second": 233.272, |
| "eval_steps_per_second": 29.232, |
| "step": 122 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 8.575949668884277, |
| "learning_rate": 4.5e-05, |
| "loss": 0.5051, |
| "step": 244 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.7192982456140351, |
| "eval_f1": 0.6887502089485709, |
| "eval_loss": 0.520839512348175, |
| "eval_precision": 0.6835816181502343, |
| "eval_recall": 0.7114020731042008, |
| "eval_runtime": 1.709, |
| "eval_samples_per_second": 233.477, |
| "eval_steps_per_second": 29.258, |
| "step": 244 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 4.63930606842041, |
| "learning_rate": 4.25e-05, |
| "loss": 0.4776, |
| "step": 366 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.7719298245614035, |
| "eval_f1": 0.7269117082966674, |
| "eval_loss": 0.46694815158843994, |
| "eval_precision": 0.7253136200716845, |
| "eval_recall": 0.7286324786324787, |
| "eval_runtime": 1.707, |
| "eval_samples_per_second": 233.745, |
| "eval_steps_per_second": 29.291, |
| "step": 366 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 3.649268388748169, |
| "learning_rate": 4e-05, |
| "loss": 0.4447, |
| "step": 488 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.7794486215538847, |
| "eval_f1": 0.7190500576110612, |
| "eval_loss": 0.43937617540359497, |
| "eval_precision": 0.7353382945313034, |
| "eval_recall": 0.7089470812875068, |
| "eval_runtime": 1.7082, |
| "eval_samples_per_second": 233.577, |
| "eval_steps_per_second": 29.27, |
| "step": 488 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 15.028106689453125, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.4309, |
| "step": 610 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.7994987468671679, |
| "eval_f1": 0.768009768009768, |
| "eval_loss": 0.4312213063240051, |
| "eval_precision": 0.7598063973063973, |
| "eval_recall": 0.7806419348972541, |
| "eval_runtime": 1.7067, |
| "eval_samples_per_second": 233.781, |
| "eval_steps_per_second": 29.296, |
| "step": 610 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 2.052947759628296, |
| "learning_rate": 3.5e-05, |
| "loss": 0.395, |
| "step": 732 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.8020050125313283, |
| "eval_f1": 0.7732905629436768, |
| "eval_loss": 0.41726621985435486, |
| "eval_precision": 0.7638030888030888, |
| "eval_recall": 0.7899163484269867, |
| "eval_runtime": 1.7037, |
| "eval_samples_per_second": 234.197, |
| "eval_steps_per_second": 29.348, |
| "step": 732 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 1.5561754703521729, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 0.3841, |
| "step": 854 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.8245614035087719, |
| "eval_f1": 0.7883706128386979, |
| "eval_loss": 0.4011886715888977, |
| "eval_precision": 0.7883706128386979, |
| "eval_recall": 0.7883706128386979, |
| "eval_runtime": 1.7038, |
| "eval_samples_per_second": 234.18, |
| "eval_steps_per_second": 29.346, |
| "step": 854 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 3.902942419052124, |
| "learning_rate": 3e-05, |
| "loss": 0.3621, |
| "step": 976 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.8345864661654135, |
| "eval_f1": 0.7929065743944637, |
| "eval_loss": 0.38815978169441223, |
| "eval_precision": 0.8062188401994228, |
| "eval_recall": 0.7829605382796871, |
| "eval_runtime": 1.7036, |
| "eval_samples_per_second": 234.212, |
| "eval_steps_per_second": 29.35, |
| "step": 976 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 5.38352632522583, |
| "learning_rate": 2.7500000000000004e-05, |
| "loss": 0.3562, |
| "step": 1098 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.8320802005012531, |
| "eval_f1": 0.7969311405674351, |
| "eval_loss": 0.39120376110076904, |
| "eval_precision": 0.7976879493115634, |
| "eval_recall": 0.7961902164029824, |
| "eval_runtime": 1.7036, |
| "eval_samples_per_second": 234.206, |
| "eval_steps_per_second": 29.349, |
| "step": 1098 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 3.1229166984558105, |
| "learning_rate": 2.5e-05, |
| "loss": 0.3428, |
| "step": 1220 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.849624060150376, |
| "eval_f1": 0.8157894736842105, |
| "eval_loss": 0.37669265270233154, |
| "eval_precision": 0.8210867117117118, |
| "eval_recall": 0.8111020185488271, |
| "eval_runtime": 1.7026, |
| "eval_samples_per_second": 234.354, |
| "eval_steps_per_second": 29.368, |
| "step": 1220 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 7.114827632904053, |
| "learning_rate": 2.25e-05, |
| "loss": 0.3282, |
| "step": 1342 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.8596491228070176, |
| "eval_f1": 0.8242843661528783, |
| "eval_loss": 0.37359458208084106, |
| "eval_precision": 0.8389366308055628, |
| "eval_recall": 0.8131933078741589, |
| "eval_runtime": 1.7031, |
| "eval_samples_per_second": 234.281, |
| "eval_steps_per_second": 29.359, |
| "step": 1342 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 8.90954875946045, |
| "learning_rate": 2e-05, |
| "loss": 0.3308, |
| "step": 1464 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.8571428571428571, |
| "eval_f1": 0.8254579780661698, |
| "eval_loss": 0.36908435821533203, |
| "eval_precision": 0.8299216027874565, |
| "eval_recall": 0.8214220767412257, |
| "eval_runtime": 1.7026, |
| "eval_samples_per_second": 234.352, |
| "eval_steps_per_second": 29.367, |
| "step": 1464 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 4.10761022567749, |
| "learning_rate": 1.75e-05, |
| "loss": 0.3143, |
| "step": 1586 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.8596491228070176, |
| "eval_f1": 0.8222604047346316, |
| "eval_loss": 0.36313948035240173, |
| "eval_precision": 0.8423737373737374, |
| "eval_recall": 0.8081923986179305, |
| "eval_runtime": 1.7031, |
| "eval_samples_per_second": 234.283, |
| "eval_steps_per_second": 29.359, |
| "step": 1586 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 6.354517936706543, |
| "learning_rate": 1.5e-05, |
| "loss": 0.3173, |
| "step": 1708 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.8546365914786967, |
| "eval_f1": 0.8228567054500919, |
| "eval_loss": 0.35919609665870667, |
| "eval_precision": 0.8263351692555232, |
| "eval_recall": 0.8196490270958356, |
| "eval_runtime": 1.7057, |
| "eval_samples_per_second": 233.927, |
| "eval_steps_per_second": 29.314, |
| "step": 1708 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 3.759216070175171, |
| "learning_rate": 1.25e-05, |
| "loss": 0.305, |
| "step": 1830 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.849624060150376, |
| "eval_f1": 0.8167483159828537, |
| "eval_loss": 0.3542298674583435, |
| "eval_precision": 0.8201621387462095, |
| "eval_recall": 0.8136024731769412, |
| "eval_runtime": 1.7036, |
| "eval_samples_per_second": 234.208, |
| "eval_steps_per_second": 29.349, |
| "step": 1830 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 3.776470899581909, |
| "learning_rate": 1e-05, |
| "loss": 0.2968, |
| "step": 1952 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.8546365914786967, |
| "eval_f1": 0.8237632508833923, |
| "eval_loss": 0.354081928730011, |
| "eval_precision": 0.8254439681567667, |
| "eval_recall": 0.8221494817239499, |
| "eval_runtime": 1.7034, |
| "eval_samples_per_second": 234.232, |
| "eval_steps_per_second": 29.352, |
| "step": 1952 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 4.986289024353027, |
| "learning_rate": 7.5e-06, |
| "loss": 0.3049, |
| "step": 2074 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.8546365914786967, |
| "eval_f1": 0.8209821152299028, |
| "eval_loss": 0.34866294264793396, |
| "eval_precision": 0.8284245491932933, |
| "eval_recall": 0.8146481178396072, |
| "eval_runtime": 1.7032, |
| "eval_samples_per_second": 234.27, |
| "eval_steps_per_second": 29.357, |
| "step": 2074 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 5.735553741455078, |
| "learning_rate": 5e-06, |
| "loss": 0.3001, |
| "step": 2196 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.8546365914786967, |
| "eval_f1": 0.8255172205802521, |
| "eval_loss": 0.3513628840446472, |
| "eval_precision": 0.8239495798319327, |
| "eval_recall": 0.8271503909801782, |
| "eval_runtime": 1.7032, |
| "eval_samples_per_second": 234.26, |
| "eval_steps_per_second": 29.356, |
| "step": 2196 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 3.22857666015625, |
| "learning_rate": 2.5e-06, |
| "loss": 0.2986, |
| "step": 2318 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.8621553884711779, |
| "eval_f1": 0.8297847585805701, |
| "eval_loss": 0.34794819355010986, |
| "eval_precision": 0.8385357006491028, |
| "eval_recall": 0.8224677214038916, |
| "eval_runtime": 1.703, |
| "eval_samples_per_second": 234.296, |
| "eval_steps_per_second": 29.36, |
| "step": 2318 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 3.945305109024048, |
| "learning_rate": 0.0, |
| "loss": 0.2894, |
| "step": 2440 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.849624060150376, |
| "eval_f1": 0.8167483159828537, |
| "eval_loss": 0.34796616435050964, |
| "eval_precision": 0.8201621387462095, |
| "eval_recall": 0.8136024731769412, |
| "eval_runtime": 1.7033, |
| "eval_samples_per_second": 234.246, |
| "eval_steps_per_second": 29.354, |
| "step": 2440 |
| }, |
| { |
| "epoch": 20.0, |
| "step": 2440, |
| "total_flos": 7604291693904000.0, |
| "train_loss": 0.3673181408741435, |
| "train_runtime": 612.7283, |
| "train_samples_per_second": 118.748, |
| "train_steps_per_second": 3.982 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 2440, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "total_flos": 7604291693904000.0, |
| "train_batch_size": 30, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|