| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 207, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.072992700729927, | |
| "grad_norm": 0.5574292540550232, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.9145, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.145985401459854, | |
| "grad_norm": 0.7793935537338257, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.8681, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.21897810218978103, | |
| "grad_norm": 0.5158766508102417, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 1.8109, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.291970802919708, | |
| "grad_norm": 0.45905911922454834, | |
| "learning_rate": 2.999930775127745e-05, | |
| "loss": 1.781, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.36496350364963503, | |
| "grad_norm": 0.4622262120246887, | |
| "learning_rate": 2.997508575424375e-05, | |
| "loss": 1.6989, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.43795620437956206, | |
| "grad_norm": 0.45384281873703003, | |
| "learning_rate": 2.9916315189017684e-05, | |
| "loss": 1.647, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5109489051094891, | |
| "grad_norm": 0.44557806849479675, | |
| "learning_rate": 2.9823131643395782e-05, | |
| "loss": 1.6472, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.583941605839416, | |
| "grad_norm": 0.5212520360946655, | |
| "learning_rate": 2.9695750098322613e-05, | |
| "loss": 1.5885, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.656934306569343, | |
| "grad_norm": 0.5191179513931274, | |
| "learning_rate": 2.9534464431914712e-05, | |
| "loss": 1.4624, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.7299270072992701, | |
| "grad_norm": 0.5629550814628601, | |
| "learning_rate": 2.9339646741463255e-05, | |
| "loss": 1.4562, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.8029197080291971, | |
| "grad_norm": 0.6789025068283081, | |
| "learning_rate": 2.911174648497964e-05, | |
| "loss": 1.4566, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.8759124087591241, | |
| "grad_norm": 0.660894513130188, | |
| "learning_rate": 2.885128944426449e-05, | |
| "loss": 1.3949, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.948905109489051, | |
| "grad_norm": 0.7208005785942078, | |
| "learning_rate": 2.85588765118923e-05, | |
| "loss": 1.3413, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.0145985401459854, | |
| "grad_norm": 0.8066607713699341, | |
| "learning_rate": 2.8235182304910364e-05, | |
| "loss": 1.2873, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.0875912408759123, | |
| "grad_norm": 0.8854206204414368, | |
| "learning_rate": 2.7880953608450127e-05, | |
| "loss": 1.1724, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.1605839416058394, | |
| "grad_norm": 0.932307243347168, | |
| "learning_rate": 2.7497007652841757e-05, | |
| "loss": 1.1631, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.2335766423357664, | |
| "grad_norm": 1.0038235187530518, | |
| "learning_rate": 2.708423022820675e-05, | |
| "loss": 1.0906, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.3065693430656935, | |
| "grad_norm": 1.1178096532821655, | |
| "learning_rate": 2.664357364087825e-05, | |
| "loss": 1.0563, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.3795620437956204, | |
| "grad_norm": 1.1691752672195435, | |
| "learning_rate": 2.6176054516363894e-05, | |
| "loss": 1.0173, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.4525547445255476, | |
| "grad_norm": 1.2345852851867676, | |
| "learning_rate": 2.568275145391978e-05, | |
| "loss": 1.0038, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.5255474452554745, | |
| "grad_norm": 1.3198442459106445, | |
| "learning_rate": 2.5164802538146695e-05, | |
| "loss": 0.9339, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.5985401459854014, | |
| "grad_norm": 1.176343321800232, | |
| "learning_rate": 2.462340271334949e-05, | |
| "loss": 0.9511, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.6715328467153285, | |
| "grad_norm": 1.2280197143554688, | |
| "learning_rate": 2.4059801026717166e-05, | |
| "loss": 0.9082, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.7445255474452555, | |
| "grad_norm": 1.3133600950241089, | |
| "learning_rate": 2.3475297746683803e-05, | |
| "loss": 0.8563, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.8175182481751824, | |
| "grad_norm": 1.5432037115097046, | |
| "learning_rate": 2.2871241363118536e-05, | |
| "loss": 0.8363, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.8905109489051095, | |
| "grad_norm": 1.9158047437667847, | |
| "learning_rate": 2.2249025476265262e-05, | |
| "loss": 0.7977, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.9635036496350367, | |
| "grad_norm": 1.5664846897125244, | |
| "learning_rate": 2.161008558160963e-05, | |
| "loss": 0.7834, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 2.0291970802919708, | |
| "grad_norm": 1.4847023487091064, | |
| "learning_rate": 2.0955895758090724e-05, | |
| "loss": 0.709, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.102189781021898, | |
| "grad_norm": 1.427018642425537, | |
| "learning_rate": 2.028796526729806e-05, | |
| "loss": 0.6542, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.1751824817518246, | |
| "grad_norm": 1.4234408140182495, | |
| "learning_rate": 1.9607835071499746e-05, | |
| "loss": 0.6478, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.2481751824817517, | |
| "grad_norm": 1.8172283172607422, | |
| "learning_rate": 1.8917074278534948e-05, | |
| "loss": 0.6285, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.321167883211679, | |
| "grad_norm": 1.7156193256378174, | |
| "learning_rate": 1.8217276521772584e-05, | |
| "loss": 0.6131, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.394160583941606, | |
| "grad_norm": 1.572896957397461, | |
| "learning_rate": 1.7510056283487887e-05, | |
| "loss": 0.5831, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.4671532846715327, | |
| "grad_norm": 1.6992086172103882, | |
| "learning_rate": 1.6797045170139112e-05, | |
| "loss": 0.6198, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.54014598540146, | |
| "grad_norm": 1.53786039352417, | |
| "learning_rate": 1.6079888148137506e-05, | |
| "loss": 0.563, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.613138686131387, | |
| "grad_norm": 1.7264128923416138, | |
| "learning_rate": 1.536023974879501e-05, | |
| "loss": 0.5706, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.686131386861314, | |
| "grad_norm": 1.9326459169387817, | |
| "learning_rate": 1.4639760251204992e-05, | |
| "loss": 0.5418, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 2.759124087591241, | |
| "grad_norm": 1.5844260454177856, | |
| "learning_rate": 1.3920111851862495e-05, | |
| "loss": 0.5393, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.832116788321168, | |
| "grad_norm": 1.6592376232147217, | |
| "learning_rate": 1.3202954829860894e-05, | |
| "loss": 0.5291, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 2.905109489051095, | |
| "grad_norm": 1.6008892059326172, | |
| "learning_rate": 1.248994371651211e-05, | |
| "loss": 0.5163, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.978102189781022, | |
| "grad_norm": 1.6370270252227783, | |
| "learning_rate": 1.1782723478227419e-05, | |
| "loss": 0.4952, | |
| "step": 205 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 345, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.748885598876467e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |