| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 276, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1092896174863388, | |
| "grad_norm": 7.8006248254154436, | |
| "learning_rate": 1.6071428571428574e-06, | |
| "loss": 1.2223, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.2185792349726776, | |
| "grad_norm": 4.844388222783676, | |
| "learning_rate": 3.3928571428571435e-06, | |
| "loss": 1.0903, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.32786885245901637, | |
| "grad_norm": 3.0957110922340183, | |
| "learning_rate": 4.999799414013322e-06, | |
| "loss": 0.9481, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.4371584699453552, | |
| "grad_norm": 2.680281559117405, | |
| "learning_rate": 4.975768018471877e-06, | |
| "loss": 0.8858, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.546448087431694, | |
| "grad_norm": 2.7043491664434707, | |
| "learning_rate": 4.912060841339536e-06, | |
| "loss": 0.8931, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6557377049180327, | |
| "grad_norm": 2.806617891201655, | |
| "learning_rate": 4.809698831278217e-06, | |
| "loss": 0.8647, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7650273224043715, | |
| "grad_norm": 2.988877065894127, | |
| "learning_rate": 4.670322405614621e-06, | |
| "loss": 0.8276, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.8743169398907104, | |
| "grad_norm": 3.224849095250869, | |
| "learning_rate": 4.4961651615930344e-06, | |
| "loss": 0.8418, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9836065573770492, | |
| "grad_norm": 2.8043508949300717, | |
| "learning_rate": 4.290018081536807e-06, | |
| "loss": 0.829, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.0874316939890711, | |
| "grad_norm": 3.097062688827004, | |
| "learning_rate": 4.0551848055539345e-06, | |
| "loss": 0.6056, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1967213114754098, | |
| "grad_norm": 2.7435175097760536, | |
| "learning_rate": 3.795428688570505e-06, | |
| "loss": 0.5148, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.3060109289617485, | |
| "grad_norm": 2.8323835125718384, | |
| "learning_rate": 3.514912490137268e-06, | |
| "loss": 0.478, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.4153005464480874, | |
| "grad_norm": 2.8362148177995863, | |
| "learning_rate": 3.2181316635191125e-06, | |
| "loss": 0.4782, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.5245901639344264, | |
| "grad_norm": 2.6159666839429807, | |
| "learning_rate": 2.909842313152888e-06, | |
| "loss": 0.4866, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.633879781420765, | |
| "grad_norm": 2.6862167736013034, | |
| "learning_rate": 2.5949849750018486e-06, | |
| "loss": 0.4925, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.7431693989071038, | |
| "grad_norm": 2.9005042165979877, | |
| "learning_rate": 2.27860544127575e-06, | |
| "loss": 0.4672, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.8524590163934427, | |
| "grad_norm": 2.8760498712590588, | |
| "learning_rate": 1.9657738983516227e-06, | |
| "loss": 0.4581, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.9617486338797814, | |
| "grad_norm": 2.8992839199365017, | |
| "learning_rate": 1.6615036737622574e-06, | |
| "loss": 0.4614, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.0655737704918034, | |
| "grad_norm": 2.6886295405633924, | |
| "learning_rate": 1.3706708943843822e-06, | |
| "loss": 0.348, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.1748633879781423, | |
| "grad_norm": 2.841014116152583, | |
| "learning_rate": 1.0979363433559892e-06, | |
| "loss": 0.2581, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.2841530054644807, | |
| "grad_norm": 2.5452200095961905, | |
| "learning_rate": 8.476707680161486e-07, | |
| "loss": 0.2529, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.3934426229508197, | |
| "grad_norm": 2.7836057891970567, | |
| "learning_rate": 6.238848358558439e-07, | |
| "loss": 0.2509, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.5027322404371586, | |
| "grad_norm": 2.8036652568167026, | |
| "learning_rate": 4.3016486098094667e-07, | |
| "loss": 0.2387, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.612021857923497, | |
| "grad_norm": 2.5075873714323333, | |
| "learning_rate": 2.696153311122704e-07, | |
| "loss": 0.2508, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.721311475409836, | |
| "grad_norm": 2.5704796097332814, | |
| "learning_rate": 1.448091561646628e-07, | |
| "loss": 0.2736, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.830601092896175, | |
| "grad_norm": 2.639131043723551, | |
| "learning_rate": 5.774643570378296e-08, | |
| "loss": 0.273, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.939890710382514, | |
| "grad_norm": 2.499486520403044, | |
| "learning_rate": 9.822406058697665e-09, | |
| "loss": 0.2575, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 276, | |
| "total_flos": 14707949568000.0, | |
| "train_loss": 0.5563258748987446, | |
| "train_runtime": 4077.0893, | |
| "train_samples_per_second": 2.145, | |
| "train_steps_per_second": 0.068 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 276, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 14707949568000.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |