medquad-causes_lora / trainer_state.json
emilykang's picture
Training in progress, epoch 0
3b414ec verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.87012987012987,
"eval_steps": 500,
"global_step": 570,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.17316017316017315,
"grad_norm": 0.73779296875,
"learning_rate": 0.00019984815164333163,
"loss": 1.5442,
"step": 10
},
{
"epoch": 0.3463203463203463,
"grad_norm": 0.67529296875,
"learning_rate": 0.00019939306773179497,
"loss": 1.4059,
"step": 20
},
{
"epoch": 0.5194805194805194,
"grad_norm": 0.67724609375,
"learning_rate": 0.00019863613034027224,
"loss": 1.3684,
"step": 30
},
{
"epoch": 0.6926406926406926,
"grad_norm": 0.62744140625,
"learning_rate": 0.00019757963826274357,
"loss": 1.3543,
"step": 40
},
{
"epoch": 0.8658008658008658,
"grad_norm": 0.5947265625,
"learning_rate": 0.00019622680003092503,
"loss": 1.3547,
"step": 50
},
{
"epoch": 1.0389610389610389,
"grad_norm": 0.5986328125,
"learning_rate": 0.00019458172417006347,
"loss": 1.3248,
"step": 60
},
{
"epoch": 1.2121212121212122,
"grad_norm": 0.638671875,
"learning_rate": 0.00019264940672148018,
"loss": 1.2603,
"step": 70
},
{
"epoch": 1.3852813852813852,
"grad_norm": 0.78857421875,
"learning_rate": 0.00019043571606975777,
"loss": 1.2683,
"step": 80
},
{
"epoch": 1.5584415584415585,
"grad_norm": 0.7412109375,
"learning_rate": 0.0001879473751206489,
"loss": 1.2356,
"step": 90
},
{
"epoch": 1.7316017316017316,
"grad_norm": 0.70458984375,
"learning_rate": 0.00018519194088383273,
"loss": 1.2849,
"step": 100
},
{
"epoch": 1.9047619047619047,
"grad_norm": 0.748046875,
"learning_rate": 0.0001821777815225245,
"loss": 1.2618,
"step": 110
},
{
"epoch": 2.0779220779220777,
"grad_norm": 0.68359375,
"learning_rate": 0.00017891405093963938,
"loss": 1.1707,
"step": 120
},
{
"epoch": 2.2510822510822512,
"grad_norm": 0.83740234375,
"learning_rate": 0.00017541066097768963,
"loss": 1.1533,
"step": 130
},
{
"epoch": 2.4242424242424243,
"grad_norm": 0.81005859375,
"learning_rate": 0.00017167825131684513,
"loss": 1.1604,
"step": 140
},
{
"epoch": 2.5974025974025974,
"grad_norm": 0.99462890625,
"learning_rate": 0.00016772815716257412,
"loss": 1.1347,
"step": 150
},
{
"epoch": 2.7705627705627704,
"grad_norm": 0.9736328125,
"learning_rate": 0.00016357237482099684,
"loss": 1.1071,
"step": 160
},
{
"epoch": 2.9437229437229435,
"grad_norm": 0.89111328125,
"learning_rate": 0.00015922352526649803,
"loss": 1.1747,
"step": 170
},
{
"epoch": 3.116883116883117,
"grad_norm": 1.0849609375,
"learning_rate": 0.00015469481581224272,
"loss": 1.0882,
"step": 180
},
{
"epoch": 3.29004329004329,
"grad_norm": 1.0166015625,
"learning_rate": 0.00015000000000000001,
"loss": 1.0027,
"step": 190
},
{
"epoch": 3.463203463203463,
"grad_norm": 1.0068359375,
"learning_rate": 0.00014515333583108896,
"loss": 1.059,
"step": 200
},
{
"epoch": 3.6363636363636362,
"grad_norm": 0.9970703125,
"learning_rate": 0.00014016954246529696,
"loss": 1.0374,
"step": 210
},
{
"epoch": 3.8095238095238093,
"grad_norm": 1.0283203125,
"learning_rate": 0.00013506375551927547,
"loss": 0.9911,
"step": 220
},
{
"epoch": 3.982683982683983,
"grad_norm": 1.0732421875,
"learning_rate": 0.00012985148110016947,
"loss": 1.089,
"step": 230
},
{
"epoch": 4.1558441558441555,
"grad_norm": 2.0078125,
"learning_rate": 0.00012454854871407994,
"loss": 0.9512,
"step": 240
},
{
"epoch": 4.329004329004329,
"grad_norm": 1.2607421875,
"learning_rate": 0.00011917106319237386,
"loss": 0.9437,
"step": 250
},
{
"epoch": 4.5021645021645025,
"grad_norm": 1.21875,
"learning_rate": 0.00011373535578184082,
"loss": 0.9019,
"step": 260
},
{
"epoch": 4.675324675324675,
"grad_norm": 1.3486328125,
"learning_rate": 0.00010825793454723325,
"loss": 0.9206,
"step": 270
},
{
"epoch": 4.848484848484849,
"grad_norm": 1.1474609375,
"learning_rate": 0.00010275543423681621,
"loss": 0.9218,
"step": 280
},
{
"epoch": 5.021645021645021,
"grad_norm": 1.119140625,
"learning_rate": 9.724456576318381e-05,
"loss": 0.8835,
"step": 290
},
{
"epoch": 5.194805194805195,
"grad_norm": 1.25390625,
"learning_rate": 9.174206545276677e-05,
"loss": 0.8657,
"step": 300
},
{
"epoch": 5.367965367965368,
"grad_norm": 1.2841796875,
"learning_rate": 8.626464421815919e-05,
"loss": 0.8135,
"step": 310
},
{
"epoch": 5.541125541125541,
"grad_norm": 1.302734375,
"learning_rate": 8.082893680762619e-05,
"loss": 0.828,
"step": 320
},
{
"epoch": 5.714285714285714,
"grad_norm": 1.255859375,
"learning_rate": 7.54514512859201e-05,
"loss": 0.8363,
"step": 330
},
{
"epoch": 5.887445887445887,
"grad_norm": 1.2783203125,
"learning_rate": 7.014851889983057e-05,
"loss": 0.8205,
"step": 340
},
{
"epoch": 6.0606060606060606,
"grad_norm": 1.15234375,
"learning_rate": 6.493624448072457e-05,
"loss": 0.8129,
"step": 350
},
{
"epoch": 6.233766233766234,
"grad_norm": 1.314453125,
"learning_rate": 5.983045753470308e-05,
"loss": 0.7766,
"step": 360
},
{
"epoch": 6.406926406926407,
"grad_norm": 1.556640625,
"learning_rate": 5.484666416891109e-05,
"loss": 0.7152,
"step": 370
},
{
"epoch": 6.58008658008658,
"grad_norm": 1.3330078125,
"learning_rate": 5.000000000000002e-05,
"loss": 0.7112,
"step": 380
},
{
"epoch": 6.753246753246753,
"grad_norm": 1.3564453125,
"learning_rate": 4.530518418775733e-05,
"loss": 0.7955,
"step": 390
},
{
"epoch": 6.926406926406926,
"grad_norm": 1.31640625,
"learning_rate": 4.077647473350201e-05,
"loss": 0.8241,
"step": 400
},
{
"epoch": 7.0995670995671,
"grad_norm": 1.4013671875,
"learning_rate": 3.642762517900322e-05,
"loss": 0.7759,
"step": 410
},
{
"epoch": 7.2727272727272725,
"grad_norm": 1.4345703125,
"learning_rate": 3.227184283742591e-05,
"loss": 0.747,
"step": 420
},
{
"epoch": 7.445887445887446,
"grad_norm": 1.4423828125,
"learning_rate": 2.8321748683154893e-05,
"loss": 0.7206,
"step": 430
},
{
"epoch": 7.619047619047619,
"grad_norm": 1.599609375,
"learning_rate": 2.4589339022310386e-05,
"loss": 0.6764,
"step": 440
},
{
"epoch": 7.792207792207792,
"grad_norm": 1.3486328125,
"learning_rate": 2.1085949060360654e-05,
"loss": 0.7336,
"step": 450
},
{
"epoch": 7.965367965367966,
"grad_norm": 1.576171875,
"learning_rate": 1.7822218477475494e-05,
"loss": 0.6992,
"step": 460
},
{
"epoch": 8.13852813852814,
"grad_norm": 1.560546875,
"learning_rate": 1.4808059116167305e-05,
"loss": 0.6818,
"step": 470
},
{
"epoch": 8.311688311688311,
"grad_norm": 1.6240234375,
"learning_rate": 1.2052624879351104e-05,
"loss": 0.7221,
"step": 480
},
{
"epoch": 8.484848484848484,
"grad_norm": 1.431640625,
"learning_rate": 9.564283930242257e-06,
"loss": 0.6785,
"step": 490
},
{
"epoch": 8.658008658008658,
"grad_norm": 1.4521484375,
"learning_rate": 7.350593278519824e-06,
"loss": 0.6701,
"step": 500
},
{
"epoch": 8.831168831168831,
"grad_norm": 1.46484375,
"learning_rate": 5.418275829936537e-06,
"loss": 0.695,
"step": 510
},
{
"epoch": 9.004329004329005,
"grad_norm": 1.3701171875,
"learning_rate": 3.7731999690749585e-06,
"loss": 0.6976,
"step": 520
},
{
"epoch": 9.177489177489177,
"grad_norm": 1.3154296875,
"learning_rate": 2.420361737256438e-06,
"loss": 0.6386,
"step": 530
},
{
"epoch": 9.35064935064935,
"grad_norm": 1.5068359375,
"learning_rate": 1.3638696597277679e-06,
"loss": 0.706,
"step": 540
},
{
"epoch": 9.523809523809524,
"grad_norm": 1.5166015625,
"learning_rate": 6.069322682050516e-07,
"loss": 0.6972,
"step": 550
},
{
"epoch": 9.696969696969697,
"grad_norm": 1.2392578125,
"learning_rate": 1.518483566683826e-07,
"loss": 0.6888,
"step": 560
},
{
"epoch": 9.87012987012987,
"grad_norm": 1.4599609375,
"learning_rate": 0.0,
"loss": 0.6703,
"step": 570
},
{
"epoch": 9.87012987012987,
"step": 570,
"total_flos": 1.455489640562688e+16,
"train_loss": 0.9482885511297928,
"train_runtime": 548.5236,
"train_samples_per_second": 4.211,
"train_steps_per_second": 1.039
}
],
"logging_steps": 10,
"max_steps": 570,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1.455489640562688e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}