{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4721435316336166, "eval_steps": 500, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.018885741265344664, "grad_norm": 0.4920569062232971, "learning_rate": 0.00019936113105200085, "loss": 0.7296, "step": 10 }, { "epoch": 0.03777148253068933, "grad_norm": 0.16350923478603363, "learning_rate": 0.0001971631732914674, "loss": 0.1418, "step": 20 }, { "epoch": 0.056657223796033995, "grad_norm": 0.137547567486763, "learning_rate": 0.00019343289424566122, "loss": 0.1158, "step": 30 }, { "epoch": 0.07554296506137866, "grad_norm": 0.1265052706003189, "learning_rate": 0.00018822912264349534, "loss": 0.1129, "step": 40 }, { "epoch": 0.09442870632672333, "grad_norm": 0.12020522356033325, "learning_rate": 0.00018163392507171842, "loss": 0.109, "step": 50 }, { "epoch": 0.11331444759206799, "grad_norm": 0.19054941833019257, "learning_rate": 0.0001737513117358174, "loss": 0.1082, "step": 60 }, { "epoch": 0.13220018885741266, "grad_norm": 0.10867941379547119, "learning_rate": 0.00016470559615694446, "loss": 0.1073, "step": 70 }, { "epoch": 0.1510859301227573, "grad_norm": 0.13128109276294708, "learning_rate": 0.00015463943467342693, "loss": 0.1036, "step": 80 }, { "epoch": 0.16997167138810199, "grad_norm": 0.09215914458036423, "learning_rate": 0.0001437115766650933, "loss": 0.0999, "step": 90 }, { "epoch": 0.18885741265344666, "grad_norm": 0.15978454053401947, "learning_rate": 0.00013209436098072095, "loss": 0.1009, "step": 100 }, { "epoch": 0.2077431539187913, "grad_norm": 0.11780431866645813, "learning_rate": 0.00011997099805144069, "loss": 0.0996, "step": 110 }, { "epoch": 0.22662889518413598, "grad_norm": 0.1909545511007309, "learning_rate": 0.00010753268055279329, "loss": 0.1015, "step": 120 }, { "epoch": 0.24551463644948066, "grad_norm": 0.12635937333106995, "learning_rate": 9.497556818202306e-05, "loss": 0.1023, "step": 130 }, { "epoch": 0.26440037771482533, "grad_norm": 0.12850093841552734, "learning_rate": 8.249769410247239e-05, "loss": 0.1005, "step": 140 }, { "epoch": 0.28328611898017, "grad_norm": 0.10055939108133316, "learning_rate": 7.029584184229653e-05, "loss": 0.0958, "step": 150 }, { "epoch": 0.3021718602455146, "grad_norm": 0.13055960834026337, "learning_rate": 5.856244190067159e-05, "loss": 0.0972, "step": 160 }, { "epoch": 0.3210576015108593, "grad_norm": 0.11343678832054138, "learning_rate": 4.748253700387042e-05, "loss": 0.0977, "step": 170 }, { "epoch": 0.33994334277620397, "grad_norm": 0.12275710701942444, "learning_rate": 3.7230863870929964e-05, "loss": 0.0975, "step": 180 }, { "epoch": 0.3588290840415486, "grad_norm": 0.12053252756595612, "learning_rate": 2.7969097511209308e-05, "loss": 0.0975, "step": 190 }, { "epoch": 0.3777148253068933, "grad_norm": 0.0943133682012558, "learning_rate": 1.9843301512912327e-05, "loss": 0.0968, "step": 200 }, { "epoch": 0.39660056657223797, "grad_norm": 0.10614251345396042, "learning_rate": 1.2981624533047432e-05, "loss": 0.0974, "step": 210 }, { "epoch": 0.4154863078375826, "grad_norm": 0.11574462801218033, "learning_rate": 7.492279316554207e-06, "loss": 0.0979, "step": 220 }, { "epoch": 0.4343720491029273, "grad_norm": 0.12150143086910248, "learning_rate": 3.461836116672612e-06, "loss": 0.0998, "step": 230 }, { "epoch": 0.45325779036827196, "grad_norm": 0.11058085411787033, "learning_rate": 9.538574303348813e-07, "loss": 0.096, "step": 240 }, { "epoch": 0.4721435316336166, "grad_norm": 0.11206520348787308, "learning_rate": 7.895579618388827e-09, "loss": 0.0987, "step": 250 }, { "epoch": 0.4721435316336166, "step": 250, "total_flos": 1.875195005553869e+16, "train_loss": 0.12820044946670534, "train_runtime": 1672.7519, "train_samples_per_second": 9.565, "train_steps_per_second": 0.149 } ], "logging_steps": 10, "max_steps": 250, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.875195005553869e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }