nerugm-ia3 / trainer_state.json
apwic's picture
End of training
53699b6 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 10560,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 1.263325572013855,
"learning_rate": 4.75e-05,
"loss": 0.8173,
"step": 528
},
{
"epoch": 1.0,
"eval_accuracy": 0.8480075524197556,
"eval_f1": 0.05425143453312468,
"eval_loss": 0.5014122128486633,
"eval_precision": 0.2653061224489796,
"eval_recall": 0.03021499128413713,
"eval_runtime": 3.9202,
"eval_samples_per_second": 238.51,
"eval_steps_per_second": 3.826,
"step": 528
},
{
"epoch": 2.0,
"grad_norm": 1.040271520614624,
"learning_rate": 4.5e-05,
"loss": 0.4808,
"step": 1056
},
{
"epoch": 2.0,
"eval_accuracy": 0.8941170625062109,
"eval_f1": 0.3863885839736554,
"eval_loss": 0.3564635217189789,
"eval_precision": 0.5217391304347826,
"eval_recall": 0.30679837303893087,
"eval_runtime": 4.2946,
"eval_samples_per_second": 217.716,
"eval_steps_per_second": 3.493,
"step": 1056
},
{
"epoch": 3.0,
"grad_norm": 1.256822943687439,
"learning_rate": 4.25e-05,
"loss": 0.3767,
"step": 1584
},
{
"epoch": 3.0,
"eval_accuracy": 0.919358044320779,
"eval_f1": 0.5637982195845698,
"eval_loss": 0.2893461287021637,
"eval_precision": 0.5761067313523347,
"eval_recall": 0.5520046484601976,
"eval_runtime": 4.4845,
"eval_samples_per_second": 208.498,
"eval_steps_per_second": 3.345,
"step": 1584
},
{
"epoch": 4.0,
"grad_norm": 1.3887470960617065,
"learning_rate": 4e-05,
"loss": 0.3159,
"step": 2112
},
{
"epoch": 4.0,
"eval_accuracy": 0.9279042035178376,
"eval_f1": 0.6389668725435149,
"eval_loss": 0.24789083003997803,
"eval_precision": 0.6181423139598045,
"eval_recall": 0.6612434631028472,
"eval_runtime": 4.6339,
"eval_samples_per_second": 201.774,
"eval_steps_per_second": 3.237,
"step": 2112
},
{
"epoch": 5.0,
"grad_norm": 0.7730798125267029,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.2785,
"step": 2640
},
{
"epoch": 5.0,
"eval_accuracy": 0.9330716486137335,
"eval_f1": 0.6637314254265273,
"eval_loss": 0.2236042320728302,
"eval_precision": 0.6304234187140617,
"eval_recall": 0.7007553747821035,
"eval_runtime": 4.6093,
"eval_samples_per_second": 202.851,
"eval_steps_per_second": 3.254,
"step": 2640
},
{
"epoch": 6.0,
"grad_norm": 1.3244330883026123,
"learning_rate": 3.5e-05,
"loss": 0.2527,
"step": 3168
},
{
"epoch": 6.0,
"eval_accuracy": 0.9360528669382888,
"eval_f1": 0.6862425231103861,
"eval_loss": 0.20973308384418488,
"eval_precision": 0.6448645886561063,
"eval_recall": 0.7332945961650204,
"eval_runtime": 4.5728,
"eval_samples_per_second": 204.47,
"eval_steps_per_second": 3.28,
"step": 3168
},
{
"epoch": 7.0,
"grad_norm": 1.106473684310913,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.2365,
"step": 3696
},
{
"epoch": 7.0,
"eval_accuracy": 0.9376428500447183,
"eval_f1": 0.6923488496522204,
"eval_loss": 0.1997063159942627,
"eval_precision": 0.6415468517600397,
"eval_recall": 0.7518884369552585,
"eval_runtime": 4.5493,
"eval_samples_per_second": 205.525,
"eval_steps_per_second": 3.297,
"step": 3696
},
{
"epoch": 8.0,
"grad_norm": 0.7820762991905212,
"learning_rate": 3e-05,
"loss": 0.2243,
"step": 4224
},
{
"epoch": 8.0,
"eval_accuracy": 0.9394315810394515,
"eval_f1": 0.703166935050993,
"eval_loss": 0.19049784541130066,
"eval_precision": 0.6533665835411472,
"eval_recall": 0.7611853573503777,
"eval_runtime": 4.544,
"eval_samples_per_second": 205.764,
"eval_steps_per_second": 3.301,
"step": 4224
},
{
"epoch": 9.0,
"grad_norm": 0.5380152463912964,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.2134,
"step": 4752
},
{
"epoch": 9.0,
"eval_accuracy": 0.9398290768160589,
"eval_f1": 0.7064464571124135,
"eval_loss": 0.18573088943958282,
"eval_precision": 0.6522380718150517,
"eval_recall": 0.7704822777454968,
"eval_runtime": 4.499,
"eval_samples_per_second": 207.826,
"eval_steps_per_second": 3.334,
"step": 4752
},
{
"epoch": 10.0,
"grad_norm": 1.0374780893325806,
"learning_rate": 2.5e-05,
"loss": 0.2072,
"step": 5280
},
{
"epoch": 10.0,
"eval_accuracy": 0.9418165556990957,
"eval_f1": 0.7126925119490174,
"eval_loss": 0.18140505254268646,
"eval_precision": 0.6562347188264058,
"eval_recall": 0.7797791981406159,
"eval_runtime": 4.8822,
"eval_samples_per_second": 191.511,
"eval_steps_per_second": 3.072,
"step": 5280
},
{
"epoch": 11.0,
"grad_norm": 0.8238540291786194,
"learning_rate": 2.25e-05,
"loss": 0.2009,
"step": 5808
},
{
"epoch": 11.0,
"eval_accuracy": 0.9426115472523104,
"eval_f1": 0.7149706979222165,
"eval_loss": 0.17563943564891815,
"eval_precision": 0.6601082144613871,
"eval_recall": 0.7797791981406159,
"eval_runtime": 4.6573,
"eval_samples_per_second": 200.759,
"eval_steps_per_second": 3.221,
"step": 5808
},
{
"epoch": 12.0,
"grad_norm": 1.7851293087005615,
"learning_rate": 2e-05,
"loss": 0.1962,
"step": 6336
},
{
"epoch": 12.0,
"eval_accuracy": 0.9436052866938289,
"eval_f1": 0.7161803713527852,
"eval_loss": 0.1738174557685852,
"eval_precision": 0.6588579795021962,
"eval_recall": 0.7844276583381755,
"eval_runtime": 4.5908,
"eval_samples_per_second": 203.669,
"eval_steps_per_second": 3.267,
"step": 6336
},
{
"epoch": 13.0,
"grad_norm": 1.3740888833999634,
"learning_rate": 1.75e-05,
"loss": 0.1921,
"step": 6864
},
{
"epoch": 13.0,
"eval_accuracy": 0.94420153035874,
"eval_f1": 0.7210164107993647,
"eval_loss": 0.17201030254364014,
"eval_precision": 0.6621293145357317,
"eval_recall": 0.7914003486345148,
"eval_runtime": 4.584,
"eval_samples_per_second": 203.972,
"eval_steps_per_second": 3.272,
"step": 6864
},
{
"epoch": 14.0,
"grad_norm": 0.6247196197509766,
"learning_rate": 1.5e-05,
"loss": 0.1887,
"step": 7392
},
{
"epoch": 14.0,
"eval_accuracy": 0.94420153035874,
"eval_f1": 0.7210164107993647,
"eval_loss": 0.1705305576324463,
"eval_precision": 0.6621293145357317,
"eval_recall": 0.7914003486345148,
"eval_runtime": 4.5864,
"eval_samples_per_second": 203.864,
"eval_steps_per_second": 3.271,
"step": 7392
},
{
"epoch": 15.0,
"grad_norm": 0.5709086060523987,
"learning_rate": 1.25e-05,
"loss": 0.1857,
"step": 7920
},
{
"epoch": 15.0,
"eval_accuracy": 0.944797774023651,
"eval_f1": 0.7254381306425917,
"eval_loss": 0.16876670718193054,
"eval_precision": 0.6679706601466993,
"eval_recall": 0.7937245787332946,
"eval_runtime": 4.4753,
"eval_samples_per_second": 208.925,
"eval_steps_per_second": 3.352,
"step": 7920
},
{
"epoch": 16.0,
"grad_norm": 0.8432120680809021,
"learning_rate": 1e-05,
"loss": 0.1846,
"step": 8448
},
{
"epoch": 16.0,
"eval_accuracy": 0.9449965219119547,
"eval_f1": 0.7283359914938863,
"eval_loss": 0.16835170984268188,
"eval_precision": 0.6712395884370407,
"eval_recall": 0.7960488088320744,
"eval_runtime": 4.493,
"eval_samples_per_second": 208.103,
"eval_steps_per_second": 3.339,
"step": 8448
},
{
"epoch": 17.0,
"grad_norm": 0.8022745251655579,
"learning_rate": 7.5e-06,
"loss": 0.1833,
"step": 8976
},
{
"epoch": 17.0,
"eval_accuracy": 0.9445990261353473,
"eval_f1": 0.7269824374667376,
"eval_loss": 0.16759617626667023,
"eval_precision": 0.6705940108001963,
"eval_recall": 0.7937245787332946,
"eval_runtime": 4.5329,
"eval_samples_per_second": 206.268,
"eval_steps_per_second": 3.309,
"step": 8976
},
{
"epoch": 18.0,
"grad_norm": 0.9574353694915771,
"learning_rate": 5e-06,
"loss": 0.1804,
"step": 9504
},
{
"epoch": 18.0,
"eval_accuracy": 0.9445990261353473,
"eval_f1": 0.7277570591369206,
"eval_loss": 0.1666666567325592,
"eval_precision": 0.6719134284308903,
"eval_recall": 0.7937245787332946,
"eval_runtime": 4.5749,
"eval_samples_per_second": 204.377,
"eval_steps_per_second": 3.279,
"step": 9504
},
{
"epoch": 19.0,
"grad_norm": 0.7483543157577515,
"learning_rate": 2.5e-06,
"loss": 0.1816,
"step": 10032
},
{
"epoch": 19.0,
"eval_accuracy": 0.944797774023651,
"eval_f1": 0.7277570591369206,
"eval_loss": 0.1664123684167862,
"eval_precision": 0.6719134284308903,
"eval_recall": 0.7937245787332946,
"eval_runtime": 4.5817,
"eval_samples_per_second": 204.074,
"eval_steps_per_second": 3.274,
"step": 10032
},
{
"epoch": 20.0,
"grad_norm": 1.1839972734451294,
"learning_rate": 0.0,
"loss": 0.1801,
"step": 10560
},
{
"epoch": 20.0,
"eval_accuracy": 0.9449965219119547,
"eval_f1": 0.7285333333333335,
"eval_loss": 0.16621001064777374,
"eval_precision": 0.6732380482996551,
"eval_recall": 0.7937245787332946,
"eval_runtime": 4.5007,
"eval_samples_per_second": 207.747,
"eval_steps_per_second": 3.333,
"step": 10560
},
{
"epoch": 20.0,
"step": 10560,
"total_flos": 4540180991527230.0,
"train_loss": 0.26384454712723243,
"train_runtime": 1246.4432,
"train_samples_per_second": 135.377,
"train_steps_per_second": 8.472
}
],
"logging_steps": 500,
"max_steps": 10560,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 4540180991527230.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}