| { |
| "best_global_step": 350, |
| "best_metric": 1.0208680629730225, |
| "best_model_checkpoint": "./llama2-medical-lora/checkpoint-300", |
| "epoch": 1.992947813822285, |
| "eval_steps": 50, |
| "global_step": 354, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.028208744710860368, |
| "grad_norm": 0.802484393119812, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 2.5988, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.056417489421720736, |
| "grad_norm": 0.6854377388954163, |
| "learning_rate": 1.8e-05, |
| "loss": 2.5458, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0846262341325811, |
| "grad_norm": 0.7429786920547485, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 2.7827, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.11283497884344147, |
| "grad_norm": 0.5219557881355286, |
| "learning_rate": 3.8e-05, |
| "loss": 2.4165, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.14104372355430184, |
| "grad_norm": 0.5999312996864319, |
| "learning_rate": 4.8e-05, |
| "loss": 2.4447, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1692524682651622, |
| "grad_norm": 0.4902834892272949, |
| "learning_rate": 5.8e-05, |
| "loss": 2.4914, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.19746121297602257, |
| "grad_norm": 0.5148080587387085, |
| "learning_rate": 6.800000000000001e-05, |
| "loss": 2.228, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.22566995768688294, |
| "grad_norm": 0.4651883542537689, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 2.1752, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2538787023977433, |
| "grad_norm": 0.5549948215484619, |
| "learning_rate": 8.800000000000001e-05, |
| "loss": 2.2121, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.2820874471086037, |
| "grad_norm": 0.5938742160797119, |
| "learning_rate": 9.8e-05, |
| "loss": 2.0736, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2820874471086037, |
| "eval_loss": 1.4600411653518677, |
| "eval_runtime": 22.9644, |
| "eval_samples_per_second": 3.832, |
| "eval_steps_per_second": 3.832, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.31029619181946405, |
| "grad_norm": 0.5836663842201233, |
| "learning_rate": 9.868421052631579e-05, |
| "loss": 1.9917, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.3385049365303244, |
| "grad_norm": 0.5286921262741089, |
| "learning_rate": 9.703947368421054e-05, |
| "loss": 1.6407, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.36671368124118475, |
| "grad_norm": 0.6719670295715332, |
| "learning_rate": 9.539473684210526e-05, |
| "loss": 1.783, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.39492242595204513, |
| "grad_norm": 0.5518754720687866, |
| "learning_rate": 9.375e-05, |
| "loss": 1.552, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4231311706629055, |
| "grad_norm": 0.6786110997200012, |
| "learning_rate": 9.210526315789474e-05, |
| "loss": 1.8268, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.4513399153737659, |
| "grad_norm": 0.6912865042686462, |
| "learning_rate": 9.046052631578948e-05, |
| "loss": 1.6622, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4795486600846262, |
| "grad_norm": 0.5231357216835022, |
| "learning_rate": 8.881578947368422e-05, |
| "loss": 1.4071, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.5077574047954866, |
| "grad_norm": 0.6691134572029114, |
| "learning_rate": 8.717105263157895e-05, |
| "loss": 1.7379, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.535966149506347, |
| "grad_norm": 0.7257916331291199, |
| "learning_rate": 8.552631578947369e-05, |
| "loss": 1.4458, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5641748942172073, |
| "grad_norm": 0.6908120512962341, |
| "learning_rate": 8.388157894736842e-05, |
| "loss": 1.6261, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5641748942172073, |
| "eval_loss": 1.1523932218551636, |
| "eval_runtime": 22.9742, |
| "eval_samples_per_second": 3.83, |
| "eval_steps_per_second": 3.83, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5923836389280677, |
| "grad_norm": 0.7155871391296387, |
| "learning_rate": 8.223684210526316e-05, |
| "loss": 1.6057, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.6205923836389281, |
| "grad_norm": 0.6426169276237488, |
| "learning_rate": 8.059210526315791e-05, |
| "loss": 1.5596, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6488011283497884, |
| "grad_norm": 0.7796515226364136, |
| "learning_rate": 7.894736842105263e-05, |
| "loss": 1.507, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6770098730606487, |
| "grad_norm": 0.671275794506073, |
| "learning_rate": 7.730263157894737e-05, |
| "loss": 1.6426, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7052186177715092, |
| "grad_norm": 0.6995854377746582, |
| "learning_rate": 7.565789473684211e-05, |
| "loss": 1.5737, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.7334273624823695, |
| "grad_norm": 0.8593846559524536, |
| "learning_rate": 7.401315789473685e-05, |
| "loss": 1.5536, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7616361071932299, |
| "grad_norm": 0.8717703223228455, |
| "learning_rate": 7.236842105263159e-05, |
| "loss": 1.6078, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7898448519040903, |
| "grad_norm": 0.8219364881515503, |
| "learning_rate": 7.072368421052632e-05, |
| "loss": 1.7053, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.8180535966149506, |
| "grad_norm": 0.7495922446250916, |
| "learning_rate": 6.907894736842105e-05, |
| "loss": 1.4436, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.846262341325811, |
| "grad_norm": 0.8867738246917725, |
| "learning_rate": 6.743421052631579e-05, |
| "loss": 1.6826, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.846262341325811, |
| "eval_loss": 1.091098427772522, |
| "eval_runtime": 22.9641, |
| "eval_samples_per_second": 3.832, |
| "eval_steps_per_second": 3.832, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8744710860366713, |
| "grad_norm": 0.8017202615737915, |
| "learning_rate": 6.578947368421054e-05, |
| "loss": 1.4847, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.9026798307475318, |
| "grad_norm": 0.8118647336959839, |
| "learning_rate": 6.414473684210526e-05, |
| "loss": 1.5564, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9308885754583921, |
| "grad_norm": 0.7750623822212219, |
| "learning_rate": 6.25e-05, |
| "loss": 1.6639, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.9590973201692524, |
| "grad_norm": 0.8271228075027466, |
| "learning_rate": 6.085526315789474e-05, |
| "loss": 1.7569, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9873060648801129, |
| "grad_norm": 0.8436787128448486, |
| "learning_rate": 5.921052631578947e-05, |
| "loss": 1.592, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.0112834978843441, |
| "grad_norm": 0.7751876711845398, |
| "learning_rate": 5.7565789473684216e-05, |
| "loss": 1.5432, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.0394922425952045, |
| "grad_norm": 0.752131998538971, |
| "learning_rate": 5.5921052631578954e-05, |
| "loss": 1.48, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.0677009873060648, |
| "grad_norm": 0.886309027671814, |
| "learning_rate": 5.4276315789473686e-05, |
| "loss": 1.4321, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0959097320169253, |
| "grad_norm": 0.810002326965332, |
| "learning_rate": 5.2631578947368424e-05, |
| "loss": 1.3895, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.1241184767277856, |
| "grad_norm": 0.8746829032897949, |
| "learning_rate": 5.0986842105263155e-05, |
| "loss": 1.4329, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.1241184767277856, |
| "eval_loss": 1.063501238822937, |
| "eval_runtime": 22.947, |
| "eval_samples_per_second": 3.835, |
| "eval_steps_per_second": 3.835, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.152327221438646, |
| "grad_norm": 0.7560333013534546, |
| "learning_rate": 4.9342105263157894e-05, |
| "loss": 1.3735, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.1805359661495063, |
| "grad_norm": 0.9235308766365051, |
| "learning_rate": 4.769736842105263e-05, |
| "loss": 1.3622, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.2087447108603668, |
| "grad_norm": 0.8749310374259949, |
| "learning_rate": 4.605263157894737e-05, |
| "loss": 1.3775, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.2369534555712272, |
| "grad_norm": 0.8778985738754272, |
| "learning_rate": 4.440789473684211e-05, |
| "loss": 1.382, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.2651622002820875, |
| "grad_norm": 1.1433746814727783, |
| "learning_rate": 4.2763157894736847e-05, |
| "loss": 1.4517, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.2933709449929478, |
| "grad_norm": 0.9843802452087402, |
| "learning_rate": 4.111842105263158e-05, |
| "loss": 1.5194, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.3215796897038081, |
| "grad_norm": 0.8270325064659119, |
| "learning_rate": 3.9473684210526316e-05, |
| "loss": 1.4335, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.3497884344146684, |
| "grad_norm": 0.8765392303466797, |
| "learning_rate": 3.7828947368421054e-05, |
| "loss": 1.3667, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.377997179125529, |
| "grad_norm": 0.9477415084838867, |
| "learning_rate": 3.618421052631579e-05, |
| "loss": 1.438, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.4062059238363893, |
| "grad_norm": 0.7957858443260193, |
| "learning_rate": 3.4539473684210524e-05, |
| "loss": 1.3859, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.4062059238363893, |
| "eval_loss": 1.0414153337478638, |
| "eval_runtime": 22.9463, |
| "eval_samples_per_second": 3.835, |
| "eval_steps_per_second": 3.835, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.4344146685472496, |
| "grad_norm": 0.9236720204353333, |
| "learning_rate": 3.289473684210527e-05, |
| "loss": 1.4527, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.46262341325811, |
| "grad_norm": 0.9670230150222778, |
| "learning_rate": 3.125e-05, |
| "loss": 1.5064, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.4908321579689705, |
| "grad_norm": 0.8780084252357483, |
| "learning_rate": 2.9605263157894735e-05, |
| "loss": 1.4092, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.5190409026798308, |
| "grad_norm": 0.8973761200904846, |
| "learning_rate": 2.7960526315789477e-05, |
| "loss": 1.472, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.5472496473906912, |
| "grad_norm": 1.020290732383728, |
| "learning_rate": 2.6315789473684212e-05, |
| "loss": 1.4181, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.5754583921015515, |
| "grad_norm": 0.9510458111763, |
| "learning_rate": 2.4671052631578947e-05, |
| "loss": 1.3838, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.6036671368124118, |
| "grad_norm": 0.856960654258728, |
| "learning_rate": 2.3026315789473685e-05, |
| "loss": 1.3249, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.6318758815232721, |
| "grad_norm": 0.9089232683181763, |
| "learning_rate": 2.1381578947368423e-05, |
| "loss": 1.4036, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.6600846262341324, |
| "grad_norm": 0.9417166709899902, |
| "learning_rate": 1.9736842105263158e-05, |
| "loss": 1.3667, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.688293370944993, |
| "grad_norm": 0.8981354832649231, |
| "learning_rate": 1.8092105263157896e-05, |
| "loss": 1.3101, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.688293370944993, |
| "eval_loss": 1.0266820192337036, |
| "eval_runtime": 22.9326, |
| "eval_samples_per_second": 3.837, |
| "eval_steps_per_second": 3.837, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.7165021156558533, |
| "grad_norm": 0.9740147590637207, |
| "learning_rate": 1.6447368421052635e-05, |
| "loss": 1.4924, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.7447108603667136, |
| "grad_norm": 0.8645333647727966, |
| "learning_rate": 1.4802631578947368e-05, |
| "loss": 1.3228, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.7729196050775742, |
| "grad_norm": 1.0288525819778442, |
| "learning_rate": 1.3157894736842106e-05, |
| "loss": 1.357, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.8011283497884345, |
| "grad_norm": 1.038477897644043, |
| "learning_rate": 1.1513157894736843e-05, |
| "loss": 1.4585, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.8293370944992948, |
| "grad_norm": 0.9444319009780884, |
| "learning_rate": 9.868421052631579e-06, |
| "loss": 1.3472, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.8575458392101551, |
| "grad_norm": 0.9182987809181213, |
| "learning_rate": 8.223684210526317e-06, |
| "loss": 1.5454, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.8857545839210155, |
| "grad_norm": 1.0083339214324951, |
| "learning_rate": 6.578947368421053e-06, |
| "loss": 1.6271, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.9139633286318758, |
| "grad_norm": 0.9220558404922485, |
| "learning_rate": 4.9342105263157895e-06, |
| "loss": 1.4036, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.9421720733427361, |
| "grad_norm": 0.997721254825592, |
| "learning_rate": 3.2894736842105265e-06, |
| "loss": 1.4065, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.9703808180535967, |
| "grad_norm": 0.9783635139465332, |
| "learning_rate": 1.6447368421052632e-06, |
| "loss": 1.3533, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.9703808180535967, |
| "eval_loss": 1.0208680629730225, |
| "eval_runtime": 22.9356, |
| "eval_samples_per_second": 3.837, |
| "eval_steps_per_second": 3.837, |
| "step": 350 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 354, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.078311570721997e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|