| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.4721435316336166, | |
| "eval_steps": 500, | |
| "global_step": 250, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.018885741265344664, | |
| "grad_norm": 0.4920569062232971, | |
| "learning_rate": 0.00019936113105200085, | |
| "loss": 0.7296, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03777148253068933, | |
| "grad_norm": 0.16350923478603363, | |
| "learning_rate": 0.0001971631732914674, | |
| "loss": 0.1418, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.056657223796033995, | |
| "grad_norm": 0.137547567486763, | |
| "learning_rate": 0.00019343289424566122, | |
| "loss": 0.1158, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07554296506137866, | |
| "grad_norm": 0.1265052706003189, | |
| "learning_rate": 0.00018822912264349534, | |
| "loss": 0.1129, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09442870632672333, | |
| "grad_norm": 0.12020522356033325, | |
| "learning_rate": 0.00018163392507171842, | |
| "loss": 0.109, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11331444759206799, | |
| "grad_norm": 0.19054941833019257, | |
| "learning_rate": 0.0001737513117358174, | |
| "loss": 0.1082, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13220018885741266, | |
| "grad_norm": 0.10867941379547119, | |
| "learning_rate": 0.00016470559615694446, | |
| "loss": 0.1073, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1510859301227573, | |
| "grad_norm": 0.13128109276294708, | |
| "learning_rate": 0.00015463943467342693, | |
| "loss": 0.1036, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.16997167138810199, | |
| "grad_norm": 0.09215914458036423, | |
| "learning_rate": 0.0001437115766650933, | |
| "loss": 0.0999, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.18885741265344666, | |
| "grad_norm": 0.15978454053401947, | |
| "learning_rate": 0.00013209436098072095, | |
| "loss": 0.1009, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2077431539187913, | |
| "grad_norm": 0.11780431866645813, | |
| "learning_rate": 0.00011997099805144069, | |
| "loss": 0.0996, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.22662889518413598, | |
| "grad_norm": 0.1909545511007309, | |
| "learning_rate": 0.00010753268055279329, | |
| "loss": 0.1015, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.24551463644948066, | |
| "grad_norm": 0.12635937333106995, | |
| "learning_rate": 9.497556818202306e-05, | |
| "loss": 0.1023, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.26440037771482533, | |
| "grad_norm": 0.12850093841552734, | |
| "learning_rate": 8.249769410247239e-05, | |
| "loss": 0.1005, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.28328611898017, | |
| "grad_norm": 0.10055939108133316, | |
| "learning_rate": 7.029584184229653e-05, | |
| "loss": 0.0958, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3021718602455146, | |
| "grad_norm": 0.13055960834026337, | |
| "learning_rate": 5.856244190067159e-05, | |
| "loss": 0.0972, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3210576015108593, | |
| "grad_norm": 0.11343678832054138, | |
| "learning_rate": 4.748253700387042e-05, | |
| "loss": 0.0977, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.33994334277620397, | |
| "grad_norm": 0.12275710701942444, | |
| "learning_rate": 3.7230863870929964e-05, | |
| "loss": 0.0975, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3588290840415486, | |
| "grad_norm": 0.12053252756595612, | |
| "learning_rate": 2.7969097511209308e-05, | |
| "loss": 0.0975, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.3777148253068933, | |
| "grad_norm": 0.0943133682012558, | |
| "learning_rate": 1.9843301512912327e-05, | |
| "loss": 0.0968, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.39660056657223797, | |
| "grad_norm": 0.10614251345396042, | |
| "learning_rate": 1.2981624533047432e-05, | |
| "loss": 0.0974, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4154863078375826, | |
| "grad_norm": 0.11574462801218033, | |
| "learning_rate": 7.492279316554207e-06, | |
| "loss": 0.0979, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4343720491029273, | |
| "grad_norm": 0.12150143086910248, | |
| "learning_rate": 3.461836116672612e-06, | |
| "loss": 0.0998, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.45325779036827196, | |
| "grad_norm": 0.11058085411787033, | |
| "learning_rate": 9.538574303348813e-07, | |
| "loss": 0.096, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4721435316336166, | |
| "grad_norm": 0.11206520348787308, | |
| "learning_rate": 7.895579618388827e-09, | |
| "loss": 0.0987, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4721435316336166, | |
| "step": 250, | |
| "total_flos": 1.875195005553869e+16, | |
| "train_loss": 0.12820044946670534, | |
| "train_runtime": 1672.7519, | |
| "train_samples_per_second": 9.565, | |
| "train_steps_per_second": 0.149 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 250, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.875195005553869e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |