| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9920692141312184, | |
| "eval_steps": 500, | |
| "global_step": 43, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.023071377072819033, | |
| "grad_norm": 11.529925678238175, | |
| "learning_rate": 0.0, | |
| "loss": 2.0516, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.046142754145638065, | |
| "grad_norm": 11.988806479251192, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 2.1984, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0692141312184571, | |
| "grad_norm": 10.48865159335969, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 2.0262, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.09228550829127613, | |
| "grad_norm": 7.872336296393583, | |
| "learning_rate": 6e-06, | |
| "loss": 1.9887, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.11535688536409516, | |
| "grad_norm": 5.600464723177604, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.8539, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.1384282624369142, | |
| "grad_norm": 5.379982813408392, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6958, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.16149963950973323, | |
| "grad_norm": 6.150171510754621, | |
| "learning_rate": 9.98292246503335e-06, | |
| "loss": 1.5919, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.18457101658255226, | |
| "grad_norm": 7.730500186977511, | |
| "learning_rate": 9.931806517013612e-06, | |
| "loss": 1.4645, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.2076423936553713, | |
| "grad_norm": 5.657841206609976, | |
| "learning_rate": 9.847001329696653e-06, | |
| "loss": 1.4254, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.23071377072819033, | |
| "grad_norm": 4.330967629257847, | |
| "learning_rate": 9.729086208503174e-06, | |
| "loss": 1.3279, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.25378514780100936, | |
| "grad_norm": 3.4335170991549617, | |
| "learning_rate": 9.578866633275289e-06, | |
| "loss": 1.2615, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.2768565248738284, | |
| "grad_norm": 3.5158187786127737, | |
| "learning_rate": 9.397368756032445e-06, | |
| "loss": 1.198, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.2999279019466474, | |
| "grad_norm": 2.8923651000012804, | |
| "learning_rate": 9.185832391312644e-06, | |
| "loss": 1.2127, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.32299927901946646, | |
| "grad_norm": 2.7678712156217045, | |
| "learning_rate": 8.94570254698197e-06, | |
| "loss": 1.192, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.3460706560922855, | |
| "grad_norm": 2.660124872631909, | |
| "learning_rate": 8.67861955336566e-06, | |
| "loss": 1.1819, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.3691420331651045, | |
| "grad_norm": 2.599566946174234, | |
| "learning_rate": 8.386407858128707e-06, | |
| "loss": 1.1531, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.39221341023792355, | |
| "grad_norm": 2.4817341082971156, | |
| "learning_rate": 8.071063563448341e-06, | |
| "loss": 1.2092, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.4152847873107426, | |
| "grad_norm": 2.5122202204515136, | |
| "learning_rate": 7.734740790612137e-06, | |
| "loss": 1.1634, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.4383561643835616, | |
| "grad_norm": 2.3856245897153676, | |
| "learning_rate": 7.379736965185369e-06, | |
| "loss": 1.1431, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.46142754145638065, | |
| "grad_norm": 2.3564683980893326, | |
| "learning_rate": 7.008477123264849e-06, | |
| "loss": 1.1209, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.4844989185291997, | |
| "grad_norm": 2.330304260395665, | |
| "learning_rate": 6.6234973460234184e-06, | |
| "loss": 1.1877, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.5075702956020187, | |
| "grad_norm": 2.1638793690371845, | |
| "learning_rate": 6.227427435703997e-06, | |
| "loss": 1.0835, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.5306416726748377, | |
| "grad_norm": 2.121227470925116, | |
| "learning_rate": 5.82297295140367e-06, | |
| "loss": 1.0847, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.5537130497476568, | |
| "grad_norm": 2.18566259900884, | |
| "learning_rate": 5.412896727361663e-06, | |
| "loss": 1.1755, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.5767844268204758, | |
| "grad_norm": 2.2506433459611905, | |
| "learning_rate": 5e-06, | |
| "loss": 1.1454, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.5998558038932948, | |
| "grad_norm": 1.9935766338338359, | |
| "learning_rate": 4.587103272638339e-06, | |
| "loss": 1.0624, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.6229271809661139, | |
| "grad_norm": 2.024598244784711, | |
| "learning_rate": 4.17702704859633e-06, | |
| "loss": 1.1401, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.6459985580389329, | |
| "grad_norm": 1.9014864665100077, | |
| "learning_rate": 3.7725725642960047e-06, | |
| "loss": 1.0402, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.669069935111752, | |
| "grad_norm": 1.9082403800768388, | |
| "learning_rate": 3.3765026539765832e-06, | |
| "loss": 1.1484, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.692141312184571, | |
| "grad_norm": 1.9140993299277556, | |
| "learning_rate": 2.991522876735154e-06, | |
| "loss": 1.0909, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.71521268925739, | |
| "grad_norm": 2.0056513408575634, | |
| "learning_rate": 2.6202630348146323e-06, | |
| "loss": 1.118, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.738284066330209, | |
| "grad_norm": 2.0600105157689463, | |
| "learning_rate": 2.265259209387867e-06, | |
| "loss": 1.0666, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.7613554434030281, | |
| "grad_norm": 1.8062623897997554, | |
| "learning_rate": 1.928936436551661e-06, | |
| "loss": 1.086, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.7844268204758471, | |
| "grad_norm": 1.9039511452967517, | |
| "learning_rate": 1.6135921418712959e-06, | |
| "loss": 1.0922, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.8074981975486661, | |
| "grad_norm": 1.796397313791375, | |
| "learning_rate": 1.321380446634342e-06, | |
| "loss": 1.0186, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.8305695746214852, | |
| "grad_norm": 1.9645645345105642, | |
| "learning_rate": 1.0542974530180327e-06, | |
| "loss": 1.131, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.8536409516943042, | |
| "grad_norm": 1.8741132808199075, | |
| "learning_rate": 8.141676086873574e-07, | |
| "loss": 1.0346, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.8767123287671232, | |
| "grad_norm": 1.871976097374707, | |
| "learning_rate": 6.026312439675553e-07, | |
| "loss": 1.0757, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.8997837058399423, | |
| "grad_norm": 1.9322927512032353, | |
| "learning_rate": 4.211333667247125e-07, | |
| "loss": 1.1638, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.9228550829127613, | |
| "grad_norm": 1.811353999287456, | |
| "learning_rate": 2.7091379149682683e-07, | |
| "loss": 1.0145, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.9459264599855803, | |
| "grad_norm": 1.836091748570849, | |
| "learning_rate": 1.5299867030334815e-07, | |
| "loss": 1.0958, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.9689978370583994, | |
| "grad_norm": 1.8619728224137473, | |
| "learning_rate": 6.819348298638839e-08, | |
| "loss": 1.0899, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.9920692141312184, | |
| "grad_norm": 1.859646123275278, | |
| "learning_rate": 1.7077534966650767e-08, | |
| "loss": 1.0737, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.9920692141312184, | |
| "step": 43, | |
| "total_flos": 5518750777344.0, | |
| "train_loss": 1.2716188818909402, | |
| "train_runtime": 679.939, | |
| "train_samples_per_second": 2.04, | |
| "train_steps_per_second": 0.063 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 43, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5518750777344.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |