| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 20.0, |
| "eval_steps": 500, |
| "global_step": 10560, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.263325572013855, |
| "learning_rate": 4.75e-05, |
| "loss": 0.8173, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.8480075524197556, |
| "eval_f1": 0.05425143453312468, |
| "eval_loss": 0.5014122128486633, |
| "eval_precision": 0.2653061224489796, |
| "eval_recall": 0.03021499128413713, |
| "eval_runtime": 3.9202, |
| "eval_samples_per_second": 238.51, |
| "eval_steps_per_second": 3.826, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.040271520614624, |
| "learning_rate": 4.5e-05, |
| "loss": 0.4808, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.8941170625062109, |
| "eval_f1": 0.3863885839736554, |
| "eval_loss": 0.3564635217189789, |
| "eval_precision": 0.5217391304347826, |
| "eval_recall": 0.30679837303893087, |
| "eval_runtime": 4.2946, |
| "eval_samples_per_second": 217.716, |
| "eval_steps_per_second": 3.493, |
| "step": 1056 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.256822943687439, |
| "learning_rate": 4.25e-05, |
| "loss": 0.3767, |
| "step": 1584 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.919358044320779, |
| "eval_f1": 0.5637982195845698, |
| "eval_loss": 0.2893461287021637, |
| "eval_precision": 0.5761067313523347, |
| "eval_recall": 0.5520046484601976, |
| "eval_runtime": 4.4845, |
| "eval_samples_per_second": 208.498, |
| "eval_steps_per_second": 3.345, |
| "step": 1584 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 1.3887470960617065, |
| "learning_rate": 4e-05, |
| "loss": 0.3159, |
| "step": 2112 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.9279042035178376, |
| "eval_f1": 0.6389668725435149, |
| "eval_loss": 0.24789083003997803, |
| "eval_precision": 0.6181423139598045, |
| "eval_recall": 0.6612434631028472, |
| "eval_runtime": 4.6339, |
| "eval_samples_per_second": 201.774, |
| "eval_steps_per_second": 3.237, |
| "step": 2112 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.7730798125267029, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.2785, |
| "step": 2640 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.9330716486137335, |
| "eval_f1": 0.6637314254265273, |
| "eval_loss": 0.2236042320728302, |
| "eval_precision": 0.6304234187140617, |
| "eval_recall": 0.7007553747821035, |
| "eval_runtime": 4.6093, |
| "eval_samples_per_second": 202.851, |
| "eval_steps_per_second": 3.254, |
| "step": 2640 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 1.3244330883026123, |
| "learning_rate": 3.5e-05, |
| "loss": 0.2527, |
| "step": 3168 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.9360528669382888, |
| "eval_f1": 0.6862425231103861, |
| "eval_loss": 0.20973308384418488, |
| "eval_precision": 0.6448645886561063, |
| "eval_recall": 0.7332945961650204, |
| "eval_runtime": 4.5728, |
| "eval_samples_per_second": 204.47, |
| "eval_steps_per_second": 3.28, |
| "step": 3168 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 1.106473684310913, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 0.2365, |
| "step": 3696 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.9376428500447183, |
| "eval_f1": 0.6923488496522204, |
| "eval_loss": 0.1997063159942627, |
| "eval_precision": 0.6415468517600397, |
| "eval_recall": 0.7518884369552585, |
| "eval_runtime": 4.5493, |
| "eval_samples_per_second": 205.525, |
| "eval_steps_per_second": 3.297, |
| "step": 3696 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.7820762991905212, |
| "learning_rate": 3e-05, |
| "loss": 0.2243, |
| "step": 4224 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.9394315810394515, |
| "eval_f1": 0.703166935050993, |
| "eval_loss": 0.19049784541130066, |
| "eval_precision": 0.6533665835411472, |
| "eval_recall": 0.7611853573503777, |
| "eval_runtime": 4.544, |
| "eval_samples_per_second": 205.764, |
| "eval_steps_per_second": 3.301, |
| "step": 4224 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.5380152463912964, |
| "learning_rate": 2.7500000000000004e-05, |
| "loss": 0.2134, |
| "step": 4752 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.9398290768160589, |
| "eval_f1": 0.7064464571124135, |
| "eval_loss": 0.18573088943958282, |
| "eval_precision": 0.6522380718150517, |
| "eval_recall": 0.7704822777454968, |
| "eval_runtime": 4.499, |
| "eval_samples_per_second": 207.826, |
| "eval_steps_per_second": 3.334, |
| "step": 4752 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 1.0374780893325806, |
| "learning_rate": 2.5e-05, |
| "loss": 0.2072, |
| "step": 5280 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.9418165556990957, |
| "eval_f1": 0.7126925119490174, |
| "eval_loss": 0.18140505254268646, |
| "eval_precision": 0.6562347188264058, |
| "eval_recall": 0.7797791981406159, |
| "eval_runtime": 4.8822, |
| "eval_samples_per_second": 191.511, |
| "eval_steps_per_second": 3.072, |
| "step": 5280 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 0.8238540291786194, |
| "learning_rate": 2.25e-05, |
| "loss": 0.2009, |
| "step": 5808 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.9426115472523104, |
| "eval_f1": 0.7149706979222165, |
| "eval_loss": 0.17563943564891815, |
| "eval_precision": 0.6601082144613871, |
| "eval_recall": 0.7797791981406159, |
| "eval_runtime": 4.6573, |
| "eval_samples_per_second": 200.759, |
| "eval_steps_per_second": 3.221, |
| "step": 5808 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 1.7851293087005615, |
| "learning_rate": 2e-05, |
| "loss": 0.1962, |
| "step": 6336 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.9436052866938289, |
| "eval_f1": 0.7161803713527852, |
| "eval_loss": 0.1738174557685852, |
| "eval_precision": 0.6588579795021962, |
| "eval_recall": 0.7844276583381755, |
| "eval_runtime": 4.5908, |
| "eval_samples_per_second": 203.669, |
| "eval_steps_per_second": 3.267, |
| "step": 6336 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 1.3740888833999634, |
| "learning_rate": 1.75e-05, |
| "loss": 0.1921, |
| "step": 6864 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.94420153035874, |
| "eval_f1": 0.7210164107993647, |
| "eval_loss": 0.17201030254364014, |
| "eval_precision": 0.6621293145357317, |
| "eval_recall": 0.7914003486345148, |
| "eval_runtime": 4.584, |
| "eval_samples_per_second": 203.972, |
| "eval_steps_per_second": 3.272, |
| "step": 6864 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.6247196197509766, |
| "learning_rate": 1.5e-05, |
| "loss": 0.1887, |
| "step": 7392 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.94420153035874, |
| "eval_f1": 0.7210164107993647, |
| "eval_loss": 0.1705305576324463, |
| "eval_precision": 0.6621293145357317, |
| "eval_recall": 0.7914003486345148, |
| "eval_runtime": 4.5864, |
| "eval_samples_per_second": 203.864, |
| "eval_steps_per_second": 3.271, |
| "step": 7392 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.5709086060523987, |
| "learning_rate": 1.25e-05, |
| "loss": 0.1857, |
| "step": 7920 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.944797774023651, |
| "eval_f1": 0.7254381306425917, |
| "eval_loss": 0.16876670718193054, |
| "eval_precision": 0.6679706601466993, |
| "eval_recall": 0.7937245787332946, |
| "eval_runtime": 4.4753, |
| "eval_samples_per_second": 208.925, |
| "eval_steps_per_second": 3.352, |
| "step": 7920 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 0.8432120680809021, |
| "learning_rate": 1e-05, |
| "loss": 0.1846, |
| "step": 8448 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.9449965219119547, |
| "eval_f1": 0.7283359914938863, |
| "eval_loss": 0.16835170984268188, |
| "eval_precision": 0.6712395884370407, |
| "eval_recall": 0.7960488088320744, |
| "eval_runtime": 4.493, |
| "eval_samples_per_second": 208.103, |
| "eval_steps_per_second": 3.339, |
| "step": 8448 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 0.8022745251655579, |
| "learning_rate": 7.5e-06, |
| "loss": 0.1833, |
| "step": 8976 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.9445990261353473, |
| "eval_f1": 0.7269824374667376, |
| "eval_loss": 0.16759617626667023, |
| "eval_precision": 0.6705940108001963, |
| "eval_recall": 0.7937245787332946, |
| "eval_runtime": 4.5329, |
| "eval_samples_per_second": 206.268, |
| "eval_steps_per_second": 3.309, |
| "step": 8976 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 0.9574353694915771, |
| "learning_rate": 5e-06, |
| "loss": 0.1804, |
| "step": 9504 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.9445990261353473, |
| "eval_f1": 0.7277570591369206, |
| "eval_loss": 0.1666666567325592, |
| "eval_precision": 0.6719134284308903, |
| "eval_recall": 0.7937245787332946, |
| "eval_runtime": 4.5749, |
| "eval_samples_per_second": 204.377, |
| "eval_steps_per_second": 3.279, |
| "step": 9504 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 0.7483543157577515, |
| "learning_rate": 2.5e-06, |
| "loss": 0.1816, |
| "step": 10032 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.944797774023651, |
| "eval_f1": 0.7277570591369206, |
| "eval_loss": 0.1664123684167862, |
| "eval_precision": 0.6719134284308903, |
| "eval_recall": 0.7937245787332946, |
| "eval_runtime": 4.5817, |
| "eval_samples_per_second": 204.074, |
| "eval_steps_per_second": 3.274, |
| "step": 10032 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 1.1839972734451294, |
| "learning_rate": 0.0, |
| "loss": 0.1801, |
| "step": 10560 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.9449965219119547, |
| "eval_f1": 0.7285333333333335, |
| "eval_loss": 0.16621001064777374, |
| "eval_precision": 0.6732380482996551, |
| "eval_recall": 0.7937245787332946, |
| "eval_runtime": 4.5007, |
| "eval_samples_per_second": 207.747, |
| "eval_steps_per_second": 3.333, |
| "step": 10560 |
| }, |
| { |
| "epoch": 20.0, |
| "step": 10560, |
| "total_flos": 4540180991527230.0, |
| "train_loss": 0.26384454712723243, |
| "train_runtime": 1246.4432, |
| "train_samples_per_second": 135.377, |
| "train_steps_per_second": 8.472 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 10560, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "total_flos": 4540180991527230.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|