| { |
| "best_global_step": 6000, |
| "best_metric": 0.5394141674041748, |
| "best_model_checkpoint": "./banking-sms-parser-v9/checkpoint-6000", |
| "epoch": 2.132954141485958, |
| "eval_steps": 500, |
| "global_step": 6000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.07109847138286526, |
| "grad_norm": 0.3580895662307739, |
| "learning_rate": 0.0002929257020974049, |
| "loss": 1.6773, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.14219694276573053, |
| "grad_norm": 0.4506950378417969, |
| "learning_rate": 0.00028581585495911834, |
| "loss": 1.0596, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.17774617845716317, |
| "eval_loss": 0.9089033007621765, |
| "eval_runtime": 20.1054, |
| "eval_samples_per_second": 124.345, |
| "eval_steps_per_second": 15.568, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.21329541414859582, |
| "grad_norm": 0.45709463953971863, |
| "learning_rate": 0.0002787060078208318, |
| "loss": 0.9841, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.28439388553146105, |
| "grad_norm": 0.44296860694885254, |
| "learning_rate": 0.0002715961606825453, |
| "loss": 0.8851, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.35549235691432635, |
| "grad_norm": 0.36072883009910583, |
| "learning_rate": 0.0002644863135442588, |
| "loss": 0.8392, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.35549235691432635, |
| "eval_loss": 0.7634525299072266, |
| "eval_runtime": 20.0775, |
| "eval_samples_per_second": 124.518, |
| "eval_steps_per_second": 15.59, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.42659082829719164, |
| "grad_norm": 0.3250260055065155, |
| "learning_rate": 0.00025737646640597224, |
| "loss": 0.8145, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.4976892996800569, |
| "grad_norm": 0.43198713660240173, |
| "learning_rate": 0.0002502666192676857, |
| "loss": 0.7876, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5332385353714895, |
| "eval_loss": 0.6990019679069519, |
| "eval_runtime": 19.8584, |
| "eval_samples_per_second": 125.892, |
| "eval_steps_per_second": 15.762, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5687877710629221, |
| "grad_norm": 0.35759472846984863, |
| "learning_rate": 0.0002431567721293992, |
| "loss": 0.7508, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.6398862424457874, |
| "grad_norm": 0.40281543135643005, |
| "learning_rate": 0.00023604692499111269, |
| "loss": 0.749, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.7109847138286527, |
| "grad_norm": 0.35377171635627747, |
| "learning_rate": 0.00022893707785282615, |
| "loss": 0.735, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7109847138286527, |
| "eval_loss": 0.6533255577087402, |
| "eval_runtime": 19.9971, |
| "eval_samples_per_second": 125.018, |
| "eval_steps_per_second": 15.652, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.782083185211518, |
| "grad_norm": 0.433393269777298, |
| "learning_rate": 0.0002218272307145396, |
| "loss": 0.7081, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.8531816565943833, |
| "grad_norm": 0.5624399185180664, |
| "learning_rate": 0.0002147173835762531, |
| "loss": 0.6873, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8887308922858158, |
| "eval_loss": 0.6280441880226135, |
| "eval_runtime": 19.9698, |
| "eval_samples_per_second": 125.189, |
| "eval_steps_per_second": 15.674, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.9242801279772485, |
| "grad_norm": 0.3414687216281891, |
| "learning_rate": 0.00020760753643796656, |
| "loss": 0.6907, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.9953785993601137, |
| "grad_norm": 0.34478306770324707, |
| "learning_rate": 0.00020049768929968003, |
| "loss": 0.6684, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.066477070742979, |
| "grad_norm": 0.4737894833087921, |
| "learning_rate": 0.00019338784216139352, |
| "loss": 0.6766, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.066477070742979, |
| "eval_loss": 0.6048450469970703, |
| "eval_runtime": 19.8742, |
| "eval_samples_per_second": 125.791, |
| "eval_steps_per_second": 15.749, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.1375755421258442, |
| "grad_norm": 0.33612045645713806, |
| "learning_rate": 0.00018627799502310698, |
| "loss": 0.6582, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.2086740135087095, |
| "grad_norm": 0.42905059456825256, |
| "learning_rate": 0.00017916814788482044, |
| "loss": 0.644, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.2442232492001422, |
| "eval_loss": 0.5869821310043335, |
| "eval_runtime": 20.0276, |
| "eval_samples_per_second": 124.828, |
| "eval_steps_per_second": 15.628, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.2797724848915748, |
| "grad_norm": 0.4481838345527649, |
| "learning_rate": 0.00017205830074653396, |
| "loss": 0.6494, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.35087095627444, |
| "grad_norm": 0.6211143136024475, |
| "learning_rate": 0.00016494845360824742, |
| "loss": 0.6213, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.4219694276573054, |
| "grad_norm": 0.671023428440094, |
| "learning_rate": 0.00015783860646996088, |
| "loss": 0.6584, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.4219694276573054, |
| "eval_loss": 0.5725142359733582, |
| "eval_runtime": 19.8593, |
| "eval_samples_per_second": 125.886, |
| "eval_steps_per_second": 15.761, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.4930678990401707, |
| "grad_norm": 0.3733653426170349, |
| "learning_rate": 0.00015072875933167437, |
| "loss": 0.6068, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.564166370423036, |
| "grad_norm": 0.47719132900238037, |
| "learning_rate": 0.00014361891219338783, |
| "loss": 0.6331, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.5997156061144686, |
| "eval_loss": 0.5628250241279602, |
| "eval_runtime": 20.0049, |
| "eval_samples_per_second": 124.97, |
| "eval_steps_per_second": 15.646, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.6352648418059013, |
| "grad_norm": 0.398542195558548, |
| "learning_rate": 0.0001365090650551013, |
| "loss": 0.6192, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.7063633131887666, |
| "grad_norm": 0.3857918977737427, |
| "learning_rate": 0.0001293992179168148, |
| "loss": 0.6101, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.7774617845716318, |
| "grad_norm": 0.47959432005882263, |
| "learning_rate": 0.00012228937077852825, |
| "loss": 0.6024, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.7774617845716318, |
| "eval_loss": 0.5542768239974976, |
| "eval_runtime": 19.9822, |
| "eval_samples_per_second": 125.112, |
| "eval_steps_per_second": 15.664, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.8485602559544971, |
| "grad_norm": 0.5109241604804993, |
| "learning_rate": 0.00011517952364024173, |
| "loss": 0.6037, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.9196587273373622, |
| "grad_norm": 0.5760204792022705, |
| "learning_rate": 0.00010806967650195519, |
| "loss": 0.6116, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.9552079630287948, |
| "eval_loss": 0.5471554398536682, |
| "eval_runtime": 19.7954, |
| "eval_samples_per_second": 126.292, |
| "eval_steps_per_second": 15.812, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.9907571987202275, |
| "grad_norm": 0.4118503928184509, |
| "learning_rate": 0.00010095982936366866, |
| "loss": 0.597, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.0618556701030926, |
| "grad_norm": 0.4242200255393982, |
| "learning_rate": 9.384998222538215e-05, |
| "loss": 0.6024, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.132954141485958, |
| "grad_norm": 0.4946908950805664, |
| "learning_rate": 8.674013508709563e-05, |
| "loss": 0.6123, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.132954141485958, |
| "eval_loss": 0.5394141674041748, |
| "eval_runtime": 20.0087, |
| "eval_samples_per_second": 124.945, |
| "eval_steps_per_second": 15.643, |
| "step": 6000 |
| } |
| ], |
| "logging_steps": 200, |
| "max_steps": 8439, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3194822445760512.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|