medicalcode-lora-v0 / trainer_state.json
NajiAboo's picture
Upload 9 files
53c6423
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9956458635703918,
"eval_steps": 500,
"global_step": 1548,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 4.999747735219333e-05,
"loss": 1.6087,
"step": 10
},
{
"epoch": 0.04,
"learning_rate": 4.998512275175808e-05,
"loss": 1.4212,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 4.9962477937145644e-05,
"loss": 1.0602,
"step": 30
},
{
"epoch": 0.08,
"learning_rate": 4.992955223470575e-05,
"loss": 1.0232,
"step": 40
},
{
"epoch": 0.1,
"learning_rate": 4.9886359205009334e-05,
"loss": 1.0354,
"step": 50
},
{
"epoch": 0.12,
"learning_rate": 4.9832916637263665e-05,
"loss": 0.8403,
"step": 60
},
{
"epoch": 0.14,
"learning_rate": 4.976924654198569e-05,
"loss": 0.7628,
"step": 70
},
{
"epoch": 0.15,
"learning_rate": 4.9695375141937e-05,
"loss": 0.7421,
"step": 80
},
{
"epoch": 0.17,
"learning_rate": 4.9611332861323875e-05,
"loss": 0.7773,
"step": 90
},
{
"epoch": 0.19,
"learning_rate": 4.95171543132669e-05,
"loss": 0.7814,
"step": 100
},
{
"epoch": 0.21,
"learning_rate": 4.941287828554553e-05,
"loss": 0.7798,
"step": 110
},
{
"epoch": 0.23,
"learning_rate": 4.929854772462312e-05,
"loss": 0.7717,
"step": 120
},
{
"epoch": 0.25,
"learning_rate": 4.9174209717959294e-05,
"loss": 0.8173,
"step": 130
},
{
"epoch": 0.27,
"learning_rate": 4.9039915474616805e-05,
"loss": 0.8009,
"step": 140
},
{
"epoch": 0.29,
"learning_rate": 4.889572030417091e-05,
"loss": 0.7499,
"step": 150
},
{
"epoch": 0.31,
"learning_rate": 4.874168359392987e-05,
"loss": 0.6257,
"step": 160
},
{
"epoch": 0.33,
"learning_rate": 4.857786878447612e-05,
"loss": 0.7029,
"step": 170
},
{
"epoch": 0.35,
"learning_rate": 4.8404343343538014e-05,
"loss": 0.6505,
"step": 180
},
{
"epoch": 0.37,
"learning_rate": 4.822117873820301e-05,
"loss": 0.6647,
"step": 190
},
{
"epoch": 0.39,
"learning_rate": 4.802845040548363e-05,
"loss": 0.6787,
"step": 200
},
{
"epoch": 0.41,
"learning_rate": 4.782623772124855e-05,
"loss": 0.5937,
"step": 210
},
{
"epoch": 0.43,
"learning_rate": 4.7614623967531244e-05,
"loss": 0.649,
"step": 220
},
{
"epoch": 0.45,
"learning_rate": 4.7393696298230084e-05,
"loss": 0.729,
"step": 230
},
{
"epoch": 0.46,
"learning_rate": 4.716354570321361e-05,
"loss": 0.6169,
"step": 240
},
{
"epoch": 0.48,
"learning_rate": 4.692426697084605e-05,
"loss": 0.6484,
"step": 250
},
{
"epoch": 0.5,
"learning_rate": 4.6675958648948394e-05,
"loss": 0.6714,
"step": 260
},
{
"epoch": 0.52,
"learning_rate": 4.6418723004211075e-05,
"loss": 0.6608,
"step": 270
},
{
"epoch": 0.54,
"learning_rate": 4.615266598007512e-05,
"loss": 0.6982,
"step": 280
},
{
"epoch": 0.56,
"learning_rate": 4.587789715309888e-05,
"loss": 0.6304,
"step": 290
},
{
"epoch": 0.58,
"learning_rate": 4.559452968782861e-05,
"loss": 0.6261,
"step": 300
},
{
"epoch": 0.6,
"learning_rate": 4.530268029019117e-05,
"loss": 0.6385,
"step": 310
},
{
"epoch": 0.62,
"learning_rate": 4.500246915942827e-05,
"loss": 0.6703,
"step": 320
},
{
"epoch": 0.64,
"learning_rate": 4.469401993859201e-05,
"loss": 0.62,
"step": 330
},
{
"epoch": 0.66,
"learning_rate": 4.437745966362201e-05,
"loss": 0.7172,
"step": 340
},
{
"epoch": 0.68,
"learning_rate": 4.4052918711025194e-05,
"loss": 0.5989,
"step": 350
},
{
"epoch": 0.7,
"learning_rate": 4.372053074417975e-05,
"loss": 0.6586,
"step": 360
},
{
"epoch": 0.72,
"learning_rate": 4.3380432658285367e-05,
"loss": 0.6459,
"step": 370
},
{
"epoch": 0.74,
"learning_rate": 4.3032764523982496e-05,
"loss": 0.6918,
"step": 380
},
{
"epoch": 0.75,
"learning_rate": 4.267766952966369e-05,
"loss": 0.6366,
"step": 390
},
{
"epoch": 0.77,
"learning_rate": 4.231529392250095e-05,
"loss": 0.6692,
"step": 400
},
{
"epoch": 0.79,
"learning_rate": 4.194578694821332e-05,
"loss": 0.715,
"step": 410
},
{
"epoch": 0.81,
"learning_rate": 4.156930078959946e-05,
"loss": 0.6242,
"step": 420
},
{
"epoch": 0.83,
"learning_rate": 4.1185990503860626e-05,
"loss": 0.7372,
"step": 430
},
{
"epoch": 0.85,
"learning_rate": 4.079601395873979e-05,
"loss": 0.6203,
"step": 440
},
{
"epoch": 0.87,
"learning_rate": 4.0399531767503204e-05,
"loss": 0.7101,
"step": 450
},
{
"epoch": 0.89,
"learning_rate": 3.999670722279131e-05,
"loss": 0.6866,
"step": 460
},
{
"epoch": 0.91,
"learning_rate": 3.958770622936599e-05,
"loss": 0.5917,
"step": 470
},
{
"epoch": 0.93,
"learning_rate": 3.917269723578212e-05,
"loss": 0.664,
"step": 480
},
{
"epoch": 0.95,
"learning_rate": 3.8751851165011413e-05,
"loss": 0.6763,
"step": 490
},
{
"epoch": 0.97,
"learning_rate": 3.8325341344047174e-05,
"loss": 0.615,
"step": 500
},
{
"epoch": 0.99,
"learning_rate": 3.7893343432518946e-05,
"loss": 0.5707,
"step": 510
},
{
"epoch": 1.01,
"learning_rate": 3.745603535034641e-05,
"loss": 0.6939,
"step": 520
},
{
"epoch": 1.03,
"learning_rate": 3.701359720446249e-05,
"loss": 0.6863,
"step": 530
},
{
"epoch": 1.04,
"learning_rate": 3.656621121463557e-05,
"loss": 0.6671,
"step": 540
},
{
"epoch": 1.06,
"learning_rate": 3.611406163842168e-05,
"loss": 0.518,
"step": 550
},
{
"epoch": 1.08,
"learning_rate": 3.565733469527731e-05,
"loss": 0.6747,
"step": 560
},
{
"epoch": 1.1,
"learning_rate": 3.519621848986428e-05,
"loss": 0.5754,
"step": 570
},
{
"epoch": 1.12,
"learning_rate": 3.473090293457811e-05,
"loss": 0.6061,
"step": 580
},
{
"epoch": 1.14,
"learning_rate": 3.426157967133192e-05,
"loss": 0.5859,
"step": 590
},
{
"epoch": 1.16,
"learning_rate": 3.3788441992628026e-05,
"loss": 0.546,
"step": 600
},
{
"epoch": 1.18,
"learning_rate": 3.331168476194969e-05,
"loss": 0.6333,
"step": 610
},
{
"epoch": 1.2,
"learning_rate": 3.283150433350589e-05,
"loss": 0.6332,
"step": 620
},
{
"epoch": 1.22,
"learning_rate": 3.234809847136213e-05,
"loss": 0.5709,
"step": 630
},
{
"epoch": 1.24,
"learning_rate": 3.1861666267990566e-05,
"loss": 0.578,
"step": 640
},
{
"epoch": 1.26,
"learning_rate": 3.137240806227306e-05,
"loss": 0.537,
"step": 650
},
{
"epoch": 1.28,
"learning_rate": 3.08805253569909e-05,
"loss": 0.5905,
"step": 660
},
{
"epoch": 1.3,
"learning_rate": 3.038622073583507e-05,
"loss": 0.6015,
"step": 670
},
{
"epoch": 1.32,
"learning_rate": 2.9889697779971504e-05,
"loss": 0.5735,
"step": 680
},
{
"epoch": 1.34,
"learning_rate": 2.9391160984195382e-05,
"loss": 0.5791,
"step": 690
},
{
"epoch": 1.35,
"learning_rate": 2.8890815672709225e-05,
"loss": 0.5683,
"step": 700
},
{
"epoch": 1.37,
"learning_rate": 2.8388867914559347e-05,
"loss": 0.6583,
"step": 710
},
{
"epoch": 1.39,
"learning_rate": 2.7885524438765603e-05,
"loss": 0.6091,
"step": 720
},
{
"epoch": 1.41,
"learning_rate": 2.7380992549179235e-05,
"loss": 0.5614,
"step": 730
},
{
"epoch": 1.43,
"learning_rate": 2.6875480039104085e-05,
"loss": 0.6208,
"step": 740
},
{
"epoch": 1.45,
"learning_rate": 2.636919510571609e-05,
"loss": 0.5686,
"step": 750
},
{
"epoch": 1.47,
"learning_rate": 2.5862346264316605e-05,
"loss": 0.5565,
"step": 760
},
{
"epoch": 1.49,
"learning_rate": 2.5355142262454506e-05,
"loss": 0.5936,
"step": 770
},
{
"epoch": 1.51,
"learning_rate": 2.484779199395285e-05,
"loss": 0.6469,
"step": 780
},
{
"epoch": 1.53,
"learning_rate": 2.4340504412875113e-05,
"loss": 0.5551,
"step": 790
},
{
"epoch": 1.55,
"learning_rate": 2.3833488447466746e-05,
"loss": 0.6454,
"step": 800
},
{
"epoch": 1.57,
"learning_rate": 2.3326952914107268e-05,
"loss": 0.61,
"step": 810
},
{
"epoch": 1.59,
"learning_rate": 2.2821106431308544e-05,
"loss": 0.6171,
"step": 820
},
{
"epoch": 1.61,
"learning_rate": 2.2316157333794414e-05,
"loss": 0.5636,
"step": 830
},
{
"epoch": 1.63,
"learning_rate": 2.1812313586697307e-05,
"loss": 0.5094,
"step": 840
},
{
"epoch": 1.64,
"learning_rate": 2.1309782699907042e-05,
"loss": 0.6278,
"step": 850
},
{
"epoch": 1.66,
"learning_rate": 2.0808771642607146e-05,
"loss": 0.5556,
"step": 860
},
{
"epoch": 1.68,
"learning_rate": 2.0309486758033773e-05,
"loss": 0.5103,
"step": 870
},
{
"epoch": 1.7,
"learning_rate": 1.9812133678492554e-05,
"loss": 0.5002,
"step": 880
},
{
"epoch": 1.72,
"learning_rate": 1.9316917240668133e-05,
"loss": 0.626,
"step": 890
},
{
"epoch": 1.74,
"learning_rate": 1.8824041401261462e-05,
"loss": 0.5432,
"step": 900
},
{
"epoch": 1.76,
"learning_rate": 1.833370915298948e-05,
"loss": 0.5423,
"step": 910
},
{
"epoch": 1.78,
"learning_rate": 1.784612244098181e-05,
"loss": 0.5179,
"step": 920
},
{
"epoch": 1.8,
"learning_rate": 1.7361482079608914e-05,
"loss": 0.5541,
"step": 930
},
{
"epoch": 1.82,
"learning_rate": 1.687998766977597e-05,
"loss": 0.6554,
"step": 940
},
{
"epoch": 1.84,
"learning_rate": 1.6401837516716546e-05,
"loss": 0.601,
"step": 950
},
{
"epoch": 1.86,
"learning_rate": 1.5927228548319767e-05,
"loss": 0.5267,
"step": 960
},
{
"epoch": 1.88,
"learning_rate": 1.545635623402496e-05,
"loss": 0.5281,
"step": 970
},
{
"epoch": 1.9,
"learning_rate": 1.4989414504316748e-05,
"loss": 0.5823,
"step": 980
},
{
"epoch": 1.92,
"learning_rate": 1.4526595670854159e-05,
"loss": 0.5789,
"step": 990
},
{
"epoch": 1.94,
"learning_rate": 1.4068090347266311e-05,
"loss": 0.6653,
"step": 1000
},
{
"epoch": 1.95,
"learning_rate": 1.3614087370647479e-05,
"loss": 0.5199,
"step": 1010
},
{
"epoch": 1.97,
"learning_rate": 1.3164773723783918e-05,
"loss": 0.6317,
"step": 1020
},
{
"epoch": 1.99,
"learning_rate": 1.2720334458144235e-05,
"loss": 0.4948,
"step": 1030
},
{
"epoch": 2.01,
"learning_rate": 1.2280952617665334e-05,
"loss": 0.6401,
"step": 1040
},
{
"epoch": 2.03,
"learning_rate": 1.1846809163365052e-05,
"loss": 0.5579,
"step": 1050
},
{
"epoch": 2.05,
"learning_rate": 1.1418082898812721e-05,
"loss": 0.4654,
"step": 1060
},
{
"epoch": 2.07,
"learning_rate": 1.0994950396488275e-05,
"loss": 0.5219,
"step": 1070
},
{
"epoch": 2.09,
"learning_rate": 1.057758592506022e-05,
"loss": 0.6016,
"step": 1080
},
{
"epoch": 2.11,
"learning_rate": 1.0166161377612437e-05,
"loss": 0.515,
"step": 1090
},
{
"epoch": 2.13,
"learning_rate": 9.760846200849388e-06,
"loss": 0.5687,
"step": 1100
},
{
"epoch": 2.15,
"learning_rate": 9.361807325308861e-06,
"loss": 0.5587,
"step": 1110
},
{
"epoch": 2.17,
"learning_rate": 8.969209096611092e-06,
"loss": 0.5365,
"step": 1120
},
{
"epoch": 2.19,
"learning_rate": 8.5832132077723e-06,
"loss": 0.514,
"step": 1130
},
{
"epoch": 2.21,
"learning_rate": 8.203978632610915e-06,
"loss": 0.5671,
"step": 1140
},
{
"epoch": 2.23,
"learning_rate": 7.831661560273585e-06,
"loss": 0.5803,
"step": 1150
},
{
"epoch": 2.24,
"learning_rate": 7.466415330908147e-06,
"loss": 0.6003,
"step": 1160
},
{
"epoch": 2.26,
"learning_rate": 7.108390372509893e-06,
"loss": 0.59,
"step": 1170
},
{
"epoch": 2.28,
"learning_rate": 6.757734138967248e-06,
"loss": 0.6142,
"step": 1180
},
{
"epoch": 2.3,
"learning_rate": 6.414591049332366e-06,
"loss": 0.5202,
"step": 1190
},
{
"epoch": 2.32,
"learning_rate": 6.079102428341588e-06,
"loss": 0.5333,
"step": 1200
},
{
"epoch": 2.34,
"learning_rate": 5.7514064482104e-06,
"loss": 0.5676,
"step": 1210
},
{
"epoch": 2.36,
"learning_rate": 5.431638071726602e-06,
"loss": 0.5741,
"step": 1220
},
{
"epoch": 2.38,
"learning_rate": 5.11992899666546e-06,
"loss": 0.5904,
"step": 1230
},
{
"epoch": 2.4,
"learning_rate": 4.8164076015494695e-06,
"loss": 0.5816,
"step": 1240
},
{
"epoch": 2.42,
"learning_rate": 4.521198892775203e-06,
"loss": 0.5774,
"step": 1250
},
{
"epoch": 2.44,
"learning_rate": 4.234424453128974e-06,
"loss": 0.5844,
"step": 1260
},
{
"epoch": 2.46,
"learning_rate": 3.9562023917124905e-06,
"loss": 0.5407,
"step": 1270
},
{
"epoch": 2.48,
"learning_rate": 3.6866472952992226e-06,
"loss": 0.5709,
"step": 1280
},
{
"epoch": 2.5,
"learning_rate": 3.425870181141394e-06,
"loss": 0.5139,
"step": 1290
},
{
"epoch": 2.52,
"learning_rate": 3.173978451247153e-06,
"loss": 0.5911,
"step": 1300
},
{
"epoch": 2.54,
"learning_rate": 2.931075848146647e-06,
"loss": 0.5552,
"step": 1310
},
{
"epoch": 2.55,
"learning_rate": 2.697262412165261e-06,
"loss": 0.5725,
"step": 1320
},
{
"epoch": 2.57,
"learning_rate": 2.4726344402216662e-06,
"loss": 0.5334,
"step": 1330
},
{
"epoch": 2.59,
"learning_rate": 2.2572844461675902e-06,
"loss": 0.4746,
"step": 1340
},
{
"epoch": 2.61,
"learning_rate": 2.051301122685634e-06,
"loss": 0.5849,
"step": 1350
},
{
"epoch": 2.63,
"learning_rate": 1.8547693047608588e-06,
"loss": 0.5648,
"step": 1360
},
{
"epoch": 2.65,
"learning_rate": 1.6677699347412035e-06,
"loss": 0.5596,
"step": 1370
},
{
"epoch": 2.67,
"learning_rate": 1.4903800290010817e-06,
"loss": 0.508,
"step": 1380
},
{
"epoch": 2.69,
"learning_rate": 1.3226726462218897e-06,
"loss": 0.5818,
"step": 1390
},
{
"epoch": 2.71,
"learning_rate": 1.1647168573025474e-06,
"loss": 0.5936,
"step": 1400
},
{
"epoch": 2.73,
"learning_rate": 1.0165777169123703e-06,
"loss": 0.5606,
"step": 1410
},
{
"epoch": 2.75,
"learning_rate": 8.783162366980763e-07,
"loss": 0.6648,
"step": 1420
},
{
"epoch": 2.77,
"learning_rate": 7.499893601559255e-07,
"loss": 0.6249,
"step": 1430
},
{
"epoch": 2.79,
"learning_rate": 6.316499391793212e-07,
"loss": 0.5482,
"step": 1440
},
{
"epoch": 2.81,
"learning_rate": 5.233467122915642e-07,
"loss": 0.4884,
"step": 1450
},
{
"epoch": 2.83,
"learning_rate": 4.2512428457271435e-07,
"loss": 0.6196,
"step": 1460
},
{
"epoch": 2.84,
"learning_rate": 3.370231092888365e-07,
"loss": 0.5377,
"step": 1470
},
{
"epoch": 2.86,
"learning_rate": 2.590794712311606e-07,
"loss": 0.5291,
"step": 1480
},
{
"epoch": 2.88,
"learning_rate": 1.913254717720664e-07,
"loss": 0.5918,
"step": 1490
},
{
"epoch": 2.9,
"learning_rate": 1.3378901564400636e-07,
"loss": 0.5625,
"step": 1500
},
{
"epoch": 2.92,
"learning_rate": 8.649379944685732e-08,
"loss": 0.4924,
"step": 1510
},
{
"epoch": 2.94,
"learning_rate": 4.9459301888366004e-08,
"loss": 0.5611,
"step": 1520
},
{
"epoch": 2.96,
"learning_rate": 2.2700775761791416e-08,
"loss": 0.5466,
"step": 1530
},
{
"epoch": 2.98,
"learning_rate": 6.229241663974206e-09,
"loss": 0.5867,
"step": 1540
},
{
"epoch": 3.0,
"step": 1548,
"total_flos": 5.030540427342643e+17,
"train_loss": 0.6278958536241713,
"train_runtime": 4523.4188,
"train_samples_per_second": 5.483,
"train_steps_per_second": 0.342
}
],
"logging_steps": 10,
"max_steps": 1548,
"num_train_epochs": 3,
"save_steps": 1000,
"total_flos": 5.030540427342643e+17,
"trial_name": null,
"trial_params": null
}