{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9920692141312184, "eval_steps": 500, "global_step": 43, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.023071377072819033, "grad_norm": 11.529925678238175, "learning_rate": 0.0, "loss": 2.0516, "step": 1 }, { "epoch": 0.046142754145638065, "grad_norm": 11.988806479251192, "learning_rate": 2.0000000000000003e-06, "loss": 2.1984, "step": 2 }, { "epoch": 0.0692141312184571, "grad_norm": 10.48865159335969, "learning_rate": 4.000000000000001e-06, "loss": 2.0262, "step": 3 }, { "epoch": 0.09228550829127613, "grad_norm": 7.872336296393583, "learning_rate": 6e-06, "loss": 1.9887, "step": 4 }, { "epoch": 0.11535688536409516, "grad_norm": 5.600464723177604, "learning_rate": 8.000000000000001e-06, "loss": 1.8539, "step": 5 }, { "epoch": 0.1384282624369142, "grad_norm": 5.379982813408392, "learning_rate": 1e-05, "loss": 1.6958, "step": 6 }, { "epoch": 0.16149963950973323, "grad_norm": 6.150171510754621, "learning_rate": 9.98292246503335e-06, "loss": 1.5919, "step": 7 }, { "epoch": 0.18457101658255226, "grad_norm": 7.730500186977511, "learning_rate": 9.931806517013612e-06, "loss": 1.4645, "step": 8 }, { "epoch": 0.2076423936553713, "grad_norm": 5.657841206609976, "learning_rate": 9.847001329696653e-06, "loss": 1.4254, "step": 9 }, { "epoch": 0.23071377072819033, "grad_norm": 4.330967629257847, "learning_rate": 9.729086208503174e-06, "loss": 1.3279, "step": 10 }, { "epoch": 0.25378514780100936, "grad_norm": 3.4335170991549617, "learning_rate": 9.578866633275289e-06, "loss": 1.2615, "step": 11 }, { "epoch": 0.2768565248738284, "grad_norm": 3.5158187786127737, "learning_rate": 9.397368756032445e-06, "loss": 1.198, "step": 12 }, { "epoch": 0.2999279019466474, "grad_norm": 2.8923651000012804, "learning_rate": 9.185832391312644e-06, "loss": 1.2127, "step": 13 }, { "epoch": 0.32299927901946646, "grad_norm": 2.7678712156217045, "learning_rate": 8.94570254698197e-06, "loss": 1.192, "step": 14 }, { "epoch": 0.3460706560922855, "grad_norm": 2.660124872631909, "learning_rate": 8.67861955336566e-06, "loss": 1.1819, "step": 15 }, { "epoch": 0.3691420331651045, "grad_norm": 2.599566946174234, "learning_rate": 8.386407858128707e-06, "loss": 1.1531, "step": 16 }, { "epoch": 0.39221341023792355, "grad_norm": 2.4817341082971156, "learning_rate": 8.071063563448341e-06, "loss": 1.2092, "step": 17 }, { "epoch": 0.4152847873107426, "grad_norm": 2.5122202204515136, "learning_rate": 7.734740790612137e-06, "loss": 1.1634, "step": 18 }, { "epoch": 0.4383561643835616, "grad_norm": 2.3856245897153676, "learning_rate": 7.379736965185369e-06, "loss": 1.1431, "step": 19 }, { "epoch": 0.46142754145638065, "grad_norm": 2.3564683980893326, "learning_rate": 7.008477123264849e-06, "loss": 1.1209, "step": 20 }, { "epoch": 0.4844989185291997, "grad_norm": 2.330304260395665, "learning_rate": 6.6234973460234184e-06, "loss": 1.1877, "step": 21 }, { "epoch": 0.5075702956020187, "grad_norm": 2.1638793690371845, "learning_rate": 6.227427435703997e-06, "loss": 1.0835, "step": 22 }, { "epoch": 0.5306416726748377, "grad_norm": 2.121227470925116, "learning_rate": 5.82297295140367e-06, "loss": 1.0847, "step": 23 }, { "epoch": 0.5537130497476568, "grad_norm": 2.18566259900884, "learning_rate": 5.412896727361663e-06, "loss": 1.1755, "step": 24 }, { "epoch": 0.5767844268204758, "grad_norm": 2.2506433459611905, "learning_rate": 5e-06, "loss": 1.1454, "step": 25 }, { "epoch": 0.5998558038932948, "grad_norm": 1.9935766338338359, "learning_rate": 4.587103272638339e-06, "loss": 1.0624, "step": 26 }, { "epoch": 0.6229271809661139, "grad_norm": 2.024598244784711, "learning_rate": 4.17702704859633e-06, "loss": 1.1401, "step": 27 }, { "epoch": 0.6459985580389329, "grad_norm": 1.9014864665100077, "learning_rate": 3.7725725642960047e-06, "loss": 1.0402, "step": 28 }, { "epoch": 0.669069935111752, "grad_norm": 1.9082403800768388, "learning_rate": 3.3765026539765832e-06, "loss": 1.1484, "step": 29 }, { "epoch": 0.692141312184571, "grad_norm": 1.9140993299277556, "learning_rate": 2.991522876735154e-06, "loss": 1.0909, "step": 30 }, { "epoch": 0.71521268925739, "grad_norm": 2.0056513408575634, "learning_rate": 2.6202630348146323e-06, "loss": 1.118, "step": 31 }, { "epoch": 0.738284066330209, "grad_norm": 2.0600105157689463, "learning_rate": 2.265259209387867e-06, "loss": 1.0666, "step": 32 }, { "epoch": 0.7613554434030281, "grad_norm": 1.8062623897997554, "learning_rate": 1.928936436551661e-06, "loss": 1.086, "step": 33 }, { "epoch": 0.7844268204758471, "grad_norm": 1.9039511452967517, "learning_rate": 1.6135921418712959e-06, "loss": 1.0922, "step": 34 }, { "epoch": 0.8074981975486661, "grad_norm": 1.796397313791375, "learning_rate": 1.321380446634342e-06, "loss": 1.0186, "step": 35 }, { "epoch": 0.8305695746214852, "grad_norm": 1.9645645345105642, "learning_rate": 1.0542974530180327e-06, "loss": 1.131, "step": 36 }, { "epoch": 0.8536409516943042, "grad_norm": 1.8741132808199075, "learning_rate": 8.141676086873574e-07, "loss": 1.0346, "step": 37 }, { "epoch": 0.8767123287671232, "grad_norm": 1.871976097374707, "learning_rate": 6.026312439675553e-07, "loss": 1.0757, "step": 38 }, { "epoch": 0.8997837058399423, "grad_norm": 1.9322927512032353, "learning_rate": 4.211333667247125e-07, "loss": 1.1638, "step": 39 }, { "epoch": 0.9228550829127613, "grad_norm": 1.811353999287456, "learning_rate": 2.7091379149682683e-07, "loss": 1.0145, "step": 40 }, { "epoch": 0.9459264599855803, "grad_norm": 1.836091748570849, "learning_rate": 1.5299867030334815e-07, "loss": 1.0958, "step": 41 }, { "epoch": 0.9689978370583994, "grad_norm": 1.8619728224137473, "learning_rate": 6.819348298638839e-08, "loss": 1.0899, "step": 42 }, { "epoch": 0.9920692141312184, "grad_norm": 1.859646123275278, "learning_rate": 1.7077534966650767e-08, "loss": 1.0737, "step": 43 }, { "epoch": 0.9920692141312184, "step": 43, "total_flos": 5518750777344.0, "train_loss": 1.2716188818909402, "train_runtime": 679.939, "train_samples_per_second": 2.04, "train_steps_per_second": 0.063 } ], "logging_steps": 1, "max_steps": 43, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5518750777344.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }