| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 20.0, |
| "eval_steps": 500, |
| "global_step": 2440, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 4.00977087020874, |
| "learning_rate": 4.75e-05, |
| "loss": 0.5604, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.7167919799498746, |
| "eval_f1": 0.608283160007298, |
| "eval_loss": 0.5190858244895935, |
| "eval_precision": 0.6430937818552498, |
| "eval_recall": 0.6021094744499, |
| "eval_runtime": 1.7103, |
| "eval_samples_per_second": 233.298, |
| "eval_steps_per_second": 29.235, |
| "step": 122 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 4.0487446784973145, |
| "learning_rate": 4.5e-05, |
| "loss": 0.5023, |
| "step": 244 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.7293233082706767, |
| "eval_f1": 0.6894315036900369, |
| "eval_loss": 0.5117867588996887, |
| "eval_precision": 0.6834947426674045, |
| "eval_recall": 0.7009910892889616, |
| "eval_runtime": 1.7052, |
| "eval_samples_per_second": 233.993, |
| "eval_steps_per_second": 29.322, |
| "step": 244 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 4.1346282958984375, |
| "learning_rate": 4.25e-05, |
| "loss": 0.4676, |
| "step": 366 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.7543859649122807, |
| "eval_f1": 0.7066176470588235, |
| "eval_loss": 0.46596524119377136, |
| "eval_precision": 0.704723824246388, |
| "eval_recall": 0.7087197672304055, |
| "eval_runtime": 1.7034, |
| "eval_samples_per_second": 234.238, |
| "eval_steps_per_second": 29.353, |
| "step": 366 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 3.7155730724334717, |
| "learning_rate": 4e-05, |
| "loss": 0.4415, |
| "step": 488 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.7844611528822055, |
| "eval_f1": 0.7345596880995111, |
| "eval_loss": 0.4403132498264313, |
| "eval_precision": 0.740066434672572, |
| "eval_recall": 0.7299963629750863, |
| "eval_runtime": 1.7029, |
| "eval_samples_per_second": 234.302, |
| "eval_steps_per_second": 29.361, |
| "step": 488 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 5.956135272979736, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.4208, |
| "step": 610 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.8120300751879699, |
| "eval_f1": 0.7738180718793697, |
| "eval_loss": 0.42520591616630554, |
| "eval_precision": 0.7731467519150732, |
| "eval_recall": 0.7745044553555192, |
| "eval_runtime": 1.7031, |
| "eval_samples_per_second": 234.273, |
| "eval_steps_per_second": 29.358, |
| "step": 610 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 2.4589931964874268, |
| "learning_rate": 3.5e-05, |
| "loss": 0.383, |
| "step": 732 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.8170426065162907, |
| "eval_f1": 0.7869291373142845, |
| "eval_loss": 0.41415733098983765, |
| "eval_precision": 0.7790262172284643, |
| "eval_recall": 0.7980541916712129, |
| "eval_runtime": 1.7034, |
| "eval_samples_per_second": 234.238, |
| "eval_steps_per_second": 29.353, |
| "step": 732 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 4.000768184661865, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 0.3751, |
| "step": 854 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.8270676691729323, |
| "eval_f1": 0.7790538100397255, |
| "eval_loss": 0.3982468843460083, |
| "eval_precision": 0.8007425742574257, |
| "eval_recall": 0.765139116202946, |
| "eval_runtime": 1.7033, |
| "eval_samples_per_second": 234.25, |
| "eval_steps_per_second": 29.355, |
| "step": 854 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 4.909487724304199, |
| "learning_rate": 3e-05, |
| "loss": 0.3554, |
| "step": 976 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.8395989974937343, |
| "eval_f1": 0.7944490952411617, |
| "eval_loss": 0.38216620683670044, |
| "eval_precision": 0.81875, |
| "eval_recall": 0.7790052736861247, |
| "eval_runtime": 1.7031, |
| "eval_samples_per_second": 234.272, |
| "eval_steps_per_second": 29.357, |
| "step": 976 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 8.974835395812988, |
| "learning_rate": 2.7500000000000004e-05, |
| "loss": 0.3502, |
| "step": 1098 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.8521303258145363, |
| "eval_f1": 0.8237962290701417, |
| "eval_loss": 0.3767492175102234, |
| "eval_precision": 0.8201159969225307, |
| "eval_recall": 0.8278777959629023, |
| "eval_runtime": 1.7032, |
| "eval_samples_per_second": 234.262, |
| "eval_steps_per_second": 29.356, |
| "step": 1098 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 5.985044956207275, |
| "learning_rate": 2.5e-05, |
| "loss": 0.3326, |
| "step": 1220 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.8571428571428571, |
| "eval_f1": 0.8235951134380454, |
| "eval_loss": 0.3652937412261963, |
| "eval_precision": 0.8321878579610538, |
| "eval_recall": 0.8164211674849973, |
| "eval_runtime": 1.7027, |
| "eval_samples_per_second": 234.329, |
| "eval_steps_per_second": 29.365, |
| "step": 1220 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 10.441373825073242, |
| "learning_rate": 2.25e-05, |
| "loss": 0.3246, |
| "step": 1342 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.8571428571428571, |
| "eval_f1": 0.822632092025736, |
| "eval_loss": 0.3636661469936371, |
| "eval_precision": 0.8334889561465646, |
| "eval_recall": 0.813920712856883, |
| "eval_runtime": 1.7033, |
| "eval_samples_per_second": 234.245, |
| "eval_steps_per_second": 29.354, |
| "step": 1342 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 11.4411039352417, |
| "learning_rate": 2e-05, |
| "loss": 0.3255, |
| "step": 1464 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.8546365914786967, |
| "eval_f1": 0.8209821152299028, |
| "eval_loss": 0.3571181893348694, |
| "eval_precision": 0.8284245491932933, |
| "eval_recall": 0.8146481178396072, |
| "eval_runtime": 1.7023, |
| "eval_samples_per_second": 234.39, |
| "eval_steps_per_second": 29.372, |
| "step": 1464 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 2.8683927059173584, |
| "learning_rate": 1.75e-05, |
| "loss": 0.3096, |
| "step": 1586 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.8471177944862155, |
| "eval_f1": 0.8022843031331996, |
| "eval_loss": 0.35997274518013, |
| "eval_precision": 0.832141339753576, |
| "eval_recall": 0.784324422622295, |
| "eval_runtime": 1.704, |
| "eval_samples_per_second": 234.154, |
| "eval_steps_per_second": 29.343, |
| "step": 1586 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 7.7256760597229, |
| "learning_rate": 1.5e-05, |
| "loss": 0.3123, |
| "step": 1708 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.8596491228070176, |
| "eval_f1": 0.8271551457392166, |
| "eval_loss": 0.34547460079193115, |
| "eval_precision": 0.8347358430876305, |
| "eval_recall": 0.8206946717585015, |
| "eval_runtime": 1.7031, |
| "eval_samples_per_second": 234.279, |
| "eval_steps_per_second": 29.358, |
| "step": 1708 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 3.0547094345092773, |
| "learning_rate": 1.25e-05, |
| "loss": 0.2937, |
| "step": 1830 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.8621553884711779, |
| "eval_f1": 0.8249232119350592, |
| "eval_loss": 0.34601926803588867, |
| "eval_precision": 0.8467014712861889, |
| "eval_recall": 0.8099654482633206, |
| "eval_runtime": 1.7039, |
| "eval_samples_per_second": 234.176, |
| "eval_steps_per_second": 29.345, |
| "step": 1830 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 4.09341287612915, |
| "learning_rate": 1e-05, |
| "loss": 0.2941, |
| "step": 1952 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.8596491228070176, |
| "eval_f1": 0.8280701754385965, |
| "eval_loss": 0.34146586060523987, |
| "eval_precision": 0.833567942942943, |
| "eval_recall": 0.8231951263866157, |
| "eval_runtime": 1.703, |
| "eval_samples_per_second": 234.292, |
| "eval_steps_per_second": 29.36, |
| "step": 1952 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 2.538574457168579, |
| "learning_rate": 7.5e-06, |
| "loss": 0.3031, |
| "step": 2074 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.8646616541353384, |
| "eval_f1": 0.8333281762485303, |
| "eval_loss": 0.3417251408100128, |
| "eval_precision": 0.8410471369819678, |
| "eval_recall": 0.8267412256773959, |
| "eval_runtime": 1.7034, |
| "eval_samples_per_second": 234.243, |
| "eval_steps_per_second": 29.354, |
| "step": 2074 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 6.733055114746094, |
| "learning_rate": 5e-06, |
| "loss": 0.3003, |
| "step": 2196 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.8621553884711779, |
| "eval_f1": 0.8315822595375324, |
| "eval_loss": 0.3399008512496948, |
| "eval_precision": 0.8361280487804879, |
| "eval_recall": 0.82746863066012, |
| "eval_runtime": 1.703, |
| "eval_samples_per_second": 234.288, |
| "eval_steps_per_second": 29.359, |
| "step": 2196 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 2.863952398300171, |
| "learning_rate": 2.5e-06, |
| "loss": 0.2976, |
| "step": 2318 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.8671679197994987, |
| "eval_f1": 0.8332268672959993, |
| "eval_loss": 0.34020429849624634, |
| "eval_precision": 0.849623687858982, |
| "eval_recall": 0.8210129114384433, |
| "eval_runtime": 1.7043, |
| "eval_samples_per_second": 234.111, |
| "eval_steps_per_second": 29.337, |
| "step": 2318 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 4.950519561767578, |
| "learning_rate": 0.0, |
| "loss": 0.2956, |
| "step": 2440 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.8696741854636592, |
| "eval_f1": 0.8368354828562441, |
| "eval_loss": 0.3397713303565979, |
| "eval_precision": 0.8520237470480189, |
| "eval_recall": 0.8252864157119476, |
| "eval_runtime": 1.7047, |
| "eval_samples_per_second": 234.055, |
| "eval_steps_per_second": 29.33, |
| "step": 2440 |
| }, |
| { |
| "epoch": 20.0, |
| "step": 2440, |
| "total_flos": 7604291693904000.0, |
| "train_loss": 0.36226015403622486, |
| "train_runtime": 612.3732, |
| "train_samples_per_second": 118.816, |
| "train_steps_per_second": 3.984 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 2440, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "total_flos": 7604291693904000.0, |
| "train_batch_size": 30, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|