{ "best_global_step": 350, "best_metric": 1.0208680629730225, "best_model_checkpoint": "./llama2-medical-lora/checkpoint-300", "epoch": 1.992947813822285, "eval_steps": 50, "global_step": 354, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.028208744710860368, "grad_norm": 0.802484393119812, "learning_rate": 8.000000000000001e-06, "loss": 2.5988, "step": 5 }, { "epoch": 0.056417489421720736, "grad_norm": 0.6854377388954163, "learning_rate": 1.8e-05, "loss": 2.5458, "step": 10 }, { "epoch": 0.0846262341325811, "grad_norm": 0.7429786920547485, "learning_rate": 2.8000000000000003e-05, "loss": 2.7827, "step": 15 }, { "epoch": 0.11283497884344147, "grad_norm": 0.5219557881355286, "learning_rate": 3.8e-05, "loss": 2.4165, "step": 20 }, { "epoch": 0.14104372355430184, "grad_norm": 0.5999312996864319, "learning_rate": 4.8e-05, "loss": 2.4447, "step": 25 }, { "epoch": 0.1692524682651622, "grad_norm": 0.4902834892272949, "learning_rate": 5.8e-05, "loss": 2.4914, "step": 30 }, { "epoch": 0.19746121297602257, "grad_norm": 0.5148080587387085, "learning_rate": 6.800000000000001e-05, "loss": 2.228, "step": 35 }, { "epoch": 0.22566995768688294, "grad_norm": 0.4651883542537689, "learning_rate": 7.800000000000001e-05, "loss": 2.1752, "step": 40 }, { "epoch": 0.2538787023977433, "grad_norm": 0.5549948215484619, "learning_rate": 8.800000000000001e-05, "loss": 2.2121, "step": 45 }, { "epoch": 0.2820874471086037, "grad_norm": 0.5938742160797119, "learning_rate": 9.8e-05, "loss": 2.0736, "step": 50 }, { "epoch": 0.2820874471086037, "eval_loss": 1.4600411653518677, "eval_runtime": 22.9644, "eval_samples_per_second": 3.832, "eval_steps_per_second": 3.832, "step": 50 }, { "epoch": 0.31029619181946405, "grad_norm": 0.5836663842201233, "learning_rate": 9.868421052631579e-05, "loss": 1.9917, "step": 55 }, { "epoch": 0.3385049365303244, "grad_norm": 0.5286921262741089, "learning_rate": 9.703947368421054e-05, "loss": 1.6407, "step": 60 }, { "epoch": 0.36671368124118475, "grad_norm": 0.6719670295715332, "learning_rate": 9.539473684210526e-05, "loss": 1.783, "step": 65 }, { "epoch": 0.39492242595204513, "grad_norm": 0.5518754720687866, "learning_rate": 9.375e-05, "loss": 1.552, "step": 70 }, { "epoch": 0.4231311706629055, "grad_norm": 0.6786110997200012, "learning_rate": 9.210526315789474e-05, "loss": 1.8268, "step": 75 }, { "epoch": 0.4513399153737659, "grad_norm": 0.6912865042686462, "learning_rate": 9.046052631578948e-05, "loss": 1.6622, "step": 80 }, { "epoch": 0.4795486600846262, "grad_norm": 0.5231357216835022, "learning_rate": 8.881578947368422e-05, "loss": 1.4071, "step": 85 }, { "epoch": 0.5077574047954866, "grad_norm": 0.6691134572029114, "learning_rate": 8.717105263157895e-05, "loss": 1.7379, "step": 90 }, { "epoch": 0.535966149506347, "grad_norm": 0.7257916331291199, "learning_rate": 8.552631578947369e-05, "loss": 1.4458, "step": 95 }, { "epoch": 0.5641748942172073, "grad_norm": 0.6908120512962341, "learning_rate": 8.388157894736842e-05, "loss": 1.6261, "step": 100 }, { "epoch": 0.5641748942172073, "eval_loss": 1.1523932218551636, "eval_runtime": 22.9742, "eval_samples_per_second": 3.83, "eval_steps_per_second": 3.83, "step": 100 }, { "epoch": 0.5923836389280677, "grad_norm": 0.7155871391296387, "learning_rate": 8.223684210526316e-05, "loss": 1.6057, "step": 105 }, { "epoch": 0.6205923836389281, "grad_norm": 0.6426169276237488, "learning_rate": 8.059210526315791e-05, "loss": 1.5596, "step": 110 }, { "epoch": 0.6488011283497884, "grad_norm": 0.7796515226364136, "learning_rate": 7.894736842105263e-05, "loss": 1.507, "step": 115 }, { "epoch": 0.6770098730606487, "grad_norm": 0.671275794506073, "learning_rate": 7.730263157894737e-05, "loss": 1.6426, "step": 120 }, { "epoch": 0.7052186177715092, "grad_norm": 0.6995854377746582, "learning_rate": 7.565789473684211e-05, "loss": 1.5737, "step": 125 }, { "epoch": 0.7334273624823695, "grad_norm": 0.8593846559524536, "learning_rate": 7.401315789473685e-05, "loss": 1.5536, "step": 130 }, { "epoch": 0.7616361071932299, "grad_norm": 0.8717703223228455, "learning_rate": 7.236842105263159e-05, "loss": 1.6078, "step": 135 }, { "epoch": 0.7898448519040903, "grad_norm": 0.8219364881515503, "learning_rate": 7.072368421052632e-05, "loss": 1.7053, "step": 140 }, { "epoch": 0.8180535966149506, "grad_norm": 0.7495922446250916, "learning_rate": 6.907894736842105e-05, "loss": 1.4436, "step": 145 }, { "epoch": 0.846262341325811, "grad_norm": 0.8867738246917725, "learning_rate": 6.743421052631579e-05, "loss": 1.6826, "step": 150 }, { "epoch": 0.846262341325811, "eval_loss": 1.091098427772522, "eval_runtime": 22.9641, "eval_samples_per_second": 3.832, "eval_steps_per_second": 3.832, "step": 150 }, { "epoch": 0.8744710860366713, "grad_norm": 0.8017202615737915, "learning_rate": 6.578947368421054e-05, "loss": 1.4847, "step": 155 }, { "epoch": 0.9026798307475318, "grad_norm": 0.8118647336959839, "learning_rate": 6.414473684210526e-05, "loss": 1.5564, "step": 160 }, { "epoch": 0.9308885754583921, "grad_norm": 0.7750623822212219, "learning_rate": 6.25e-05, "loss": 1.6639, "step": 165 }, { "epoch": 0.9590973201692524, "grad_norm": 0.8271228075027466, "learning_rate": 6.085526315789474e-05, "loss": 1.7569, "step": 170 }, { "epoch": 0.9873060648801129, "grad_norm": 0.8436787128448486, "learning_rate": 5.921052631578947e-05, "loss": 1.592, "step": 175 }, { "epoch": 1.0112834978843441, "grad_norm": 0.7751876711845398, "learning_rate": 5.7565789473684216e-05, "loss": 1.5432, "step": 180 }, { "epoch": 1.0394922425952045, "grad_norm": 0.752131998538971, "learning_rate": 5.5921052631578954e-05, "loss": 1.48, "step": 185 }, { "epoch": 1.0677009873060648, "grad_norm": 0.886309027671814, "learning_rate": 5.4276315789473686e-05, "loss": 1.4321, "step": 190 }, { "epoch": 1.0959097320169253, "grad_norm": 0.810002326965332, "learning_rate": 5.2631578947368424e-05, "loss": 1.3895, "step": 195 }, { "epoch": 1.1241184767277856, "grad_norm": 0.8746829032897949, "learning_rate": 5.0986842105263155e-05, "loss": 1.4329, "step": 200 }, { "epoch": 1.1241184767277856, "eval_loss": 1.063501238822937, "eval_runtime": 22.947, "eval_samples_per_second": 3.835, "eval_steps_per_second": 3.835, "step": 200 }, { "epoch": 1.152327221438646, "grad_norm": 0.7560333013534546, "learning_rate": 4.9342105263157894e-05, "loss": 1.3735, "step": 205 }, { "epoch": 1.1805359661495063, "grad_norm": 0.9235308766365051, "learning_rate": 4.769736842105263e-05, "loss": 1.3622, "step": 210 }, { "epoch": 1.2087447108603668, "grad_norm": 0.8749310374259949, "learning_rate": 4.605263157894737e-05, "loss": 1.3775, "step": 215 }, { "epoch": 1.2369534555712272, "grad_norm": 0.8778985738754272, "learning_rate": 4.440789473684211e-05, "loss": 1.382, "step": 220 }, { "epoch": 1.2651622002820875, "grad_norm": 1.1433746814727783, "learning_rate": 4.2763157894736847e-05, "loss": 1.4517, "step": 225 }, { "epoch": 1.2933709449929478, "grad_norm": 0.9843802452087402, "learning_rate": 4.111842105263158e-05, "loss": 1.5194, "step": 230 }, { "epoch": 1.3215796897038081, "grad_norm": 0.8270325064659119, "learning_rate": 3.9473684210526316e-05, "loss": 1.4335, "step": 235 }, { "epoch": 1.3497884344146684, "grad_norm": 0.8765392303466797, "learning_rate": 3.7828947368421054e-05, "loss": 1.3667, "step": 240 }, { "epoch": 1.377997179125529, "grad_norm": 0.9477415084838867, "learning_rate": 3.618421052631579e-05, "loss": 1.438, "step": 245 }, { "epoch": 1.4062059238363893, "grad_norm": 0.7957858443260193, "learning_rate": 3.4539473684210524e-05, "loss": 1.3859, "step": 250 }, { "epoch": 1.4062059238363893, "eval_loss": 1.0414153337478638, "eval_runtime": 22.9463, "eval_samples_per_second": 3.835, "eval_steps_per_second": 3.835, "step": 250 }, { "epoch": 1.4344146685472496, "grad_norm": 0.9236720204353333, "learning_rate": 3.289473684210527e-05, "loss": 1.4527, "step": 255 }, { "epoch": 1.46262341325811, "grad_norm": 0.9670230150222778, "learning_rate": 3.125e-05, "loss": 1.5064, "step": 260 }, { "epoch": 1.4908321579689705, "grad_norm": 0.8780084252357483, "learning_rate": 2.9605263157894735e-05, "loss": 1.4092, "step": 265 }, { "epoch": 1.5190409026798308, "grad_norm": 0.8973761200904846, "learning_rate": 2.7960526315789477e-05, "loss": 1.472, "step": 270 }, { "epoch": 1.5472496473906912, "grad_norm": 1.020290732383728, "learning_rate": 2.6315789473684212e-05, "loss": 1.4181, "step": 275 }, { "epoch": 1.5754583921015515, "grad_norm": 0.9510458111763, "learning_rate": 2.4671052631578947e-05, "loss": 1.3838, "step": 280 }, { "epoch": 1.6036671368124118, "grad_norm": 0.856960654258728, "learning_rate": 2.3026315789473685e-05, "loss": 1.3249, "step": 285 }, { "epoch": 1.6318758815232721, "grad_norm": 0.9089232683181763, "learning_rate": 2.1381578947368423e-05, "loss": 1.4036, "step": 290 }, { "epoch": 1.6600846262341324, "grad_norm": 0.9417166709899902, "learning_rate": 1.9736842105263158e-05, "loss": 1.3667, "step": 295 }, { "epoch": 1.688293370944993, "grad_norm": 0.8981354832649231, "learning_rate": 1.8092105263157896e-05, "loss": 1.3101, "step": 300 }, { "epoch": 1.688293370944993, "eval_loss": 1.0266820192337036, "eval_runtime": 22.9326, "eval_samples_per_second": 3.837, "eval_steps_per_second": 3.837, "step": 300 }, { "epoch": 1.7165021156558533, "grad_norm": 0.9740147590637207, "learning_rate": 1.6447368421052635e-05, "loss": 1.4924, "step": 305 }, { "epoch": 1.7447108603667136, "grad_norm": 0.8645333647727966, "learning_rate": 1.4802631578947368e-05, "loss": 1.3228, "step": 310 }, { "epoch": 1.7729196050775742, "grad_norm": 1.0288525819778442, "learning_rate": 1.3157894736842106e-05, "loss": 1.357, "step": 315 }, { "epoch": 1.8011283497884345, "grad_norm": 1.038477897644043, "learning_rate": 1.1513157894736843e-05, "loss": 1.4585, "step": 320 }, { "epoch": 1.8293370944992948, "grad_norm": 0.9444319009780884, "learning_rate": 9.868421052631579e-06, "loss": 1.3472, "step": 325 }, { "epoch": 1.8575458392101551, "grad_norm": 0.9182987809181213, "learning_rate": 8.223684210526317e-06, "loss": 1.5454, "step": 330 }, { "epoch": 1.8857545839210155, "grad_norm": 1.0083339214324951, "learning_rate": 6.578947368421053e-06, "loss": 1.6271, "step": 335 }, { "epoch": 1.9139633286318758, "grad_norm": 0.9220558404922485, "learning_rate": 4.9342105263157895e-06, "loss": 1.4036, "step": 340 }, { "epoch": 1.9421720733427361, "grad_norm": 0.997721254825592, "learning_rate": 3.2894736842105265e-06, "loss": 1.4065, "step": 345 }, { "epoch": 1.9703808180535967, "grad_norm": 0.9783635139465332, "learning_rate": 1.6447368421052632e-06, "loss": 1.3533, "step": 350 }, { "epoch": 1.9703808180535967, "eval_loss": 1.0208680629730225, "eval_runtime": 22.9356, "eval_samples_per_second": 3.837, "eval_steps_per_second": 3.837, "step": 350 } ], "logging_steps": 5, "max_steps": 354, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.078311570721997e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }