| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 50, |
| "global_step": 222, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04509582863585118, |
| "grad_norm": 4.648494720458984, |
| "learning_rate": 1.173913043478261e-06, |
| "loss": 1.6578, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.09019165727170236, |
| "grad_norm": 7.20250129699707, |
| "learning_rate": 2.4782608695652173e-06, |
| "loss": 1.6865, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.13528748590755355, |
| "grad_norm": 3.1192681789398193, |
| "learning_rate": 2.993275923901659e-06, |
| "loss": 1.5591, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.18038331454340473, |
| "grad_norm": 1.3946388959884644, |
| "learning_rate": 2.952402458097761e-06, |
| "loss": 1.6427, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2254791431792559, |
| "grad_norm": 1.1817127466201782, |
| "learning_rate": 2.8754064422891833e-06, |
| "loss": 1.2641, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2254791431792559, |
| "eval_loss": 1.4092315435409546, |
| "eval_runtime": 14.0932, |
| "eval_samples_per_second": 13.269, |
| "eval_steps_per_second": 6.67, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2705749718151071, |
| "grad_norm": 0.6758719086647034, |
| "learning_rate": 2.764202836352026e-06, |
| "loss": 1.2384, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3156708004509583, |
| "grad_norm": 0.7143831253051758, |
| "learning_rate": 2.621557373567564e-06, |
| "loss": 1.3275, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.36076662908680945, |
| "grad_norm": 0.6760919094085693, |
| "learning_rate": 2.4510177743583583e-06, |
| "loss": 1.2616, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.40586245772266066, |
| "grad_norm": 0.5790563225746155, |
| "learning_rate": 2.256825511303766e-06, |
| "loss": 1.0877, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4509582863585118, |
| "grad_norm": 0.5748745799064636, |
| "learning_rate": 2.043810319917937e-06, |
| "loss": 1.2241, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4509582863585118, |
| "eval_loss": 1.2807611227035522, |
| "eval_runtime": 13.4863, |
| "eval_samples_per_second": 13.866, |
| "eval_steps_per_second": 6.97, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.496054114994363, |
| "grad_norm": 0.5320383310317993, |
| "learning_rate": 1.8172700788017286e-06, |
| "loss": 1.1341, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5411499436302142, |
| "grad_norm": 0.5653139352798462, |
| "learning_rate": 1.5828390466567621e-06, |
| "loss": 1.1733, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5862457722660653, |
| "grad_norm": 0.3555745780467987, |
| "learning_rate": 1.3463477332250803e-06, |
| "loss": 1.1083, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6313416009019166, |
| "grad_norm": 0.5427935719490051, |
| "learning_rate": 1.1136778892896554e-06, |
| "loss": 1.1287, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6764374295377678, |
| "grad_norm": 0.518344521522522, |
| "learning_rate": 8.906162222643167e-07, |
| "loss": 1.191, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6764374295377678, |
| "eval_loss": 1.2335149049758911, |
| "eval_runtime": 13.2304, |
| "eval_samples_per_second": 14.134, |
| "eval_steps_per_second": 7.105, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7215332581736189, |
| "grad_norm": 0.5353164076805115, |
| "learning_rate": 6.827104755973948e-07, |
| "loss": 1.0559, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7666290868094702, |
| "grad_norm": 0.5589386820793152, |
| "learning_rate": 4.951314514232176e-07, |
| "loss": 1.1292, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.8117249154453213, |
| "grad_norm": 0.4542039632797241, |
| "learning_rate": 3.325444080817054e-07, |
| "loss": 1.1606, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8568207440811725, |
| "grad_norm": 0.4739130437374115, |
| "learning_rate": 1.9899303096492822e-07, |
| "loss": 1.2275, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.9019165727170236, |
| "grad_norm": 0.4491319954395294, |
| "learning_rate": 9.77988624396025e-08, |
| "loss": 1.1409, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9019165727170236, |
| "eval_loss": 1.2203494310379028, |
| "eval_runtime": 13.2982, |
| "eval_samples_per_second": 14.062, |
| "eval_steps_per_second": 7.069, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9470124013528749, |
| "grad_norm": 0.4336180090904236, |
| "learning_rate": 3.147869211342819e-08, |
| "loss": 1.042, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.992108229988726, |
| "grad_norm": 0.5455783605575562, |
| "learning_rate": 1.681962023334649e-09, |
| "loss": 1.1968, |
| "step": 220 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 222, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.4466834382336e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|