PharmaCompass / trainer_state.json
maherghanem86's picture
upload model
eba226c verified
Raw
History Blame Contribute Delete
14.3 kB
{
"best_global_step": 350,
"best_metric": 1.0208680629730225,
"best_model_checkpoint": "./llama2-medical-lora/checkpoint-300",
"epoch": 1.992947813822285,
"eval_steps": 50,
"global_step": 354,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.028208744710860368,
"grad_norm": 0.802484393119812,
"learning_rate": 8.000000000000001e-06,
"loss": 2.5988,
"step": 5
},
{
"epoch": 0.056417489421720736,
"grad_norm": 0.6854377388954163,
"learning_rate": 1.8e-05,
"loss": 2.5458,
"step": 10
},
{
"epoch": 0.0846262341325811,
"grad_norm": 0.7429786920547485,
"learning_rate": 2.8000000000000003e-05,
"loss": 2.7827,
"step": 15
},
{
"epoch": 0.11283497884344147,
"grad_norm": 0.5219557881355286,
"learning_rate": 3.8e-05,
"loss": 2.4165,
"step": 20
},
{
"epoch": 0.14104372355430184,
"grad_norm": 0.5999312996864319,
"learning_rate": 4.8e-05,
"loss": 2.4447,
"step": 25
},
{
"epoch": 0.1692524682651622,
"grad_norm": 0.4902834892272949,
"learning_rate": 5.8e-05,
"loss": 2.4914,
"step": 30
},
{
"epoch": 0.19746121297602257,
"grad_norm": 0.5148080587387085,
"learning_rate": 6.800000000000001e-05,
"loss": 2.228,
"step": 35
},
{
"epoch": 0.22566995768688294,
"grad_norm": 0.4651883542537689,
"learning_rate": 7.800000000000001e-05,
"loss": 2.1752,
"step": 40
},
{
"epoch": 0.2538787023977433,
"grad_norm": 0.5549948215484619,
"learning_rate": 8.800000000000001e-05,
"loss": 2.2121,
"step": 45
},
{
"epoch": 0.2820874471086037,
"grad_norm": 0.5938742160797119,
"learning_rate": 9.8e-05,
"loss": 2.0736,
"step": 50
},
{
"epoch": 0.2820874471086037,
"eval_loss": 1.4600411653518677,
"eval_runtime": 22.9644,
"eval_samples_per_second": 3.832,
"eval_steps_per_second": 3.832,
"step": 50
},
{
"epoch": 0.31029619181946405,
"grad_norm": 0.5836663842201233,
"learning_rate": 9.868421052631579e-05,
"loss": 1.9917,
"step": 55
},
{
"epoch": 0.3385049365303244,
"grad_norm": 0.5286921262741089,
"learning_rate": 9.703947368421054e-05,
"loss": 1.6407,
"step": 60
},
{
"epoch": 0.36671368124118475,
"grad_norm": 0.6719670295715332,
"learning_rate": 9.539473684210526e-05,
"loss": 1.783,
"step": 65
},
{
"epoch": 0.39492242595204513,
"grad_norm": 0.5518754720687866,
"learning_rate": 9.375e-05,
"loss": 1.552,
"step": 70
},
{
"epoch": 0.4231311706629055,
"grad_norm": 0.6786110997200012,
"learning_rate": 9.210526315789474e-05,
"loss": 1.8268,
"step": 75
},
{
"epoch": 0.4513399153737659,
"grad_norm": 0.6912865042686462,
"learning_rate": 9.046052631578948e-05,
"loss": 1.6622,
"step": 80
},
{
"epoch": 0.4795486600846262,
"grad_norm": 0.5231357216835022,
"learning_rate": 8.881578947368422e-05,
"loss": 1.4071,
"step": 85
},
{
"epoch": 0.5077574047954866,
"grad_norm": 0.6691134572029114,
"learning_rate": 8.717105263157895e-05,
"loss": 1.7379,
"step": 90
},
{
"epoch": 0.535966149506347,
"grad_norm": 0.7257916331291199,
"learning_rate": 8.552631578947369e-05,
"loss": 1.4458,
"step": 95
},
{
"epoch": 0.5641748942172073,
"grad_norm": 0.6908120512962341,
"learning_rate": 8.388157894736842e-05,
"loss": 1.6261,
"step": 100
},
{
"epoch": 0.5641748942172073,
"eval_loss": 1.1523932218551636,
"eval_runtime": 22.9742,
"eval_samples_per_second": 3.83,
"eval_steps_per_second": 3.83,
"step": 100
},
{
"epoch": 0.5923836389280677,
"grad_norm": 0.7155871391296387,
"learning_rate": 8.223684210526316e-05,
"loss": 1.6057,
"step": 105
},
{
"epoch": 0.6205923836389281,
"grad_norm": 0.6426169276237488,
"learning_rate": 8.059210526315791e-05,
"loss": 1.5596,
"step": 110
},
{
"epoch": 0.6488011283497884,
"grad_norm": 0.7796515226364136,
"learning_rate": 7.894736842105263e-05,
"loss": 1.507,
"step": 115
},
{
"epoch": 0.6770098730606487,
"grad_norm": 0.671275794506073,
"learning_rate": 7.730263157894737e-05,
"loss": 1.6426,
"step": 120
},
{
"epoch": 0.7052186177715092,
"grad_norm": 0.6995854377746582,
"learning_rate": 7.565789473684211e-05,
"loss": 1.5737,
"step": 125
},
{
"epoch": 0.7334273624823695,
"grad_norm": 0.8593846559524536,
"learning_rate": 7.401315789473685e-05,
"loss": 1.5536,
"step": 130
},
{
"epoch": 0.7616361071932299,
"grad_norm": 0.8717703223228455,
"learning_rate": 7.236842105263159e-05,
"loss": 1.6078,
"step": 135
},
{
"epoch": 0.7898448519040903,
"grad_norm": 0.8219364881515503,
"learning_rate": 7.072368421052632e-05,
"loss": 1.7053,
"step": 140
},
{
"epoch": 0.8180535966149506,
"grad_norm": 0.7495922446250916,
"learning_rate": 6.907894736842105e-05,
"loss": 1.4436,
"step": 145
},
{
"epoch": 0.846262341325811,
"grad_norm": 0.8867738246917725,
"learning_rate": 6.743421052631579e-05,
"loss": 1.6826,
"step": 150
},
{
"epoch": 0.846262341325811,
"eval_loss": 1.091098427772522,
"eval_runtime": 22.9641,
"eval_samples_per_second": 3.832,
"eval_steps_per_second": 3.832,
"step": 150
},
{
"epoch": 0.8744710860366713,
"grad_norm": 0.8017202615737915,
"learning_rate": 6.578947368421054e-05,
"loss": 1.4847,
"step": 155
},
{
"epoch": 0.9026798307475318,
"grad_norm": 0.8118647336959839,
"learning_rate": 6.414473684210526e-05,
"loss": 1.5564,
"step": 160
},
{
"epoch": 0.9308885754583921,
"grad_norm": 0.7750623822212219,
"learning_rate": 6.25e-05,
"loss": 1.6639,
"step": 165
},
{
"epoch": 0.9590973201692524,
"grad_norm": 0.8271228075027466,
"learning_rate": 6.085526315789474e-05,
"loss": 1.7569,
"step": 170
},
{
"epoch": 0.9873060648801129,
"grad_norm": 0.8436787128448486,
"learning_rate": 5.921052631578947e-05,
"loss": 1.592,
"step": 175
},
{
"epoch": 1.0112834978843441,
"grad_norm": 0.7751876711845398,
"learning_rate": 5.7565789473684216e-05,
"loss": 1.5432,
"step": 180
},
{
"epoch": 1.0394922425952045,
"grad_norm": 0.752131998538971,
"learning_rate": 5.5921052631578954e-05,
"loss": 1.48,
"step": 185
},
{
"epoch": 1.0677009873060648,
"grad_norm": 0.886309027671814,
"learning_rate": 5.4276315789473686e-05,
"loss": 1.4321,
"step": 190
},
{
"epoch": 1.0959097320169253,
"grad_norm": 0.810002326965332,
"learning_rate": 5.2631578947368424e-05,
"loss": 1.3895,
"step": 195
},
{
"epoch": 1.1241184767277856,
"grad_norm": 0.8746829032897949,
"learning_rate": 5.0986842105263155e-05,
"loss": 1.4329,
"step": 200
},
{
"epoch": 1.1241184767277856,
"eval_loss": 1.063501238822937,
"eval_runtime": 22.947,
"eval_samples_per_second": 3.835,
"eval_steps_per_second": 3.835,
"step": 200
},
{
"epoch": 1.152327221438646,
"grad_norm": 0.7560333013534546,
"learning_rate": 4.9342105263157894e-05,
"loss": 1.3735,
"step": 205
},
{
"epoch": 1.1805359661495063,
"grad_norm": 0.9235308766365051,
"learning_rate": 4.769736842105263e-05,
"loss": 1.3622,
"step": 210
},
{
"epoch": 1.2087447108603668,
"grad_norm": 0.8749310374259949,
"learning_rate": 4.605263157894737e-05,
"loss": 1.3775,
"step": 215
},
{
"epoch": 1.2369534555712272,
"grad_norm": 0.8778985738754272,
"learning_rate": 4.440789473684211e-05,
"loss": 1.382,
"step": 220
},
{
"epoch": 1.2651622002820875,
"grad_norm": 1.1433746814727783,
"learning_rate": 4.2763157894736847e-05,
"loss": 1.4517,
"step": 225
},
{
"epoch": 1.2933709449929478,
"grad_norm": 0.9843802452087402,
"learning_rate": 4.111842105263158e-05,
"loss": 1.5194,
"step": 230
},
{
"epoch": 1.3215796897038081,
"grad_norm": 0.8270325064659119,
"learning_rate": 3.9473684210526316e-05,
"loss": 1.4335,
"step": 235
},
{
"epoch": 1.3497884344146684,
"grad_norm": 0.8765392303466797,
"learning_rate": 3.7828947368421054e-05,
"loss": 1.3667,
"step": 240
},
{
"epoch": 1.377997179125529,
"grad_norm": 0.9477415084838867,
"learning_rate": 3.618421052631579e-05,
"loss": 1.438,
"step": 245
},
{
"epoch": 1.4062059238363893,
"grad_norm": 0.7957858443260193,
"learning_rate": 3.4539473684210524e-05,
"loss": 1.3859,
"step": 250
},
{
"epoch": 1.4062059238363893,
"eval_loss": 1.0414153337478638,
"eval_runtime": 22.9463,
"eval_samples_per_second": 3.835,
"eval_steps_per_second": 3.835,
"step": 250
},
{
"epoch": 1.4344146685472496,
"grad_norm": 0.9236720204353333,
"learning_rate": 3.289473684210527e-05,
"loss": 1.4527,
"step": 255
},
{
"epoch": 1.46262341325811,
"grad_norm": 0.9670230150222778,
"learning_rate": 3.125e-05,
"loss": 1.5064,
"step": 260
},
{
"epoch": 1.4908321579689705,
"grad_norm": 0.8780084252357483,
"learning_rate": 2.9605263157894735e-05,
"loss": 1.4092,
"step": 265
},
{
"epoch": 1.5190409026798308,
"grad_norm": 0.8973761200904846,
"learning_rate": 2.7960526315789477e-05,
"loss": 1.472,
"step": 270
},
{
"epoch": 1.5472496473906912,
"grad_norm": 1.020290732383728,
"learning_rate": 2.6315789473684212e-05,
"loss": 1.4181,
"step": 275
},
{
"epoch": 1.5754583921015515,
"grad_norm": 0.9510458111763,
"learning_rate": 2.4671052631578947e-05,
"loss": 1.3838,
"step": 280
},
{
"epoch": 1.6036671368124118,
"grad_norm": 0.856960654258728,
"learning_rate": 2.3026315789473685e-05,
"loss": 1.3249,
"step": 285
},
{
"epoch": 1.6318758815232721,
"grad_norm": 0.9089232683181763,
"learning_rate": 2.1381578947368423e-05,
"loss": 1.4036,
"step": 290
},
{
"epoch": 1.6600846262341324,
"grad_norm": 0.9417166709899902,
"learning_rate": 1.9736842105263158e-05,
"loss": 1.3667,
"step": 295
},
{
"epoch": 1.688293370944993,
"grad_norm": 0.8981354832649231,
"learning_rate": 1.8092105263157896e-05,
"loss": 1.3101,
"step": 300
},
{
"epoch": 1.688293370944993,
"eval_loss": 1.0266820192337036,
"eval_runtime": 22.9326,
"eval_samples_per_second": 3.837,
"eval_steps_per_second": 3.837,
"step": 300
},
{
"epoch": 1.7165021156558533,
"grad_norm": 0.9740147590637207,
"learning_rate": 1.6447368421052635e-05,
"loss": 1.4924,
"step": 305
},
{
"epoch": 1.7447108603667136,
"grad_norm": 0.8645333647727966,
"learning_rate": 1.4802631578947368e-05,
"loss": 1.3228,
"step": 310
},
{
"epoch": 1.7729196050775742,
"grad_norm": 1.0288525819778442,
"learning_rate": 1.3157894736842106e-05,
"loss": 1.357,
"step": 315
},
{
"epoch": 1.8011283497884345,
"grad_norm": 1.038477897644043,
"learning_rate": 1.1513157894736843e-05,
"loss": 1.4585,
"step": 320
},
{
"epoch": 1.8293370944992948,
"grad_norm": 0.9444319009780884,
"learning_rate": 9.868421052631579e-06,
"loss": 1.3472,
"step": 325
},
{
"epoch": 1.8575458392101551,
"grad_norm": 0.9182987809181213,
"learning_rate": 8.223684210526317e-06,
"loss": 1.5454,
"step": 330
},
{
"epoch": 1.8857545839210155,
"grad_norm": 1.0083339214324951,
"learning_rate": 6.578947368421053e-06,
"loss": 1.6271,
"step": 335
},
{
"epoch": 1.9139633286318758,
"grad_norm": 0.9220558404922485,
"learning_rate": 4.9342105263157895e-06,
"loss": 1.4036,
"step": 340
},
{
"epoch": 1.9421720733427361,
"grad_norm": 0.997721254825592,
"learning_rate": 3.2894736842105265e-06,
"loss": 1.4065,
"step": 345
},
{
"epoch": 1.9703808180535967,
"grad_norm": 0.9783635139465332,
"learning_rate": 1.6447368421052632e-06,
"loss": 1.3533,
"step": 350
},
{
"epoch": 1.9703808180535967,
"eval_loss": 1.0208680629730225,
"eval_runtime": 22.9356,
"eval_samples_per_second": 3.837,
"eval_steps_per_second": 3.837,
"step": 350
}
],
"logging_steps": 5,
"max_steps": 354,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.078311570721997e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}