| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 210.0, |
| "global_step": 210, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 10.0, |
| "learning_rate": 1.9989834093992945e-05, |
| "loss": 0.2542, |
| "step": 10 |
| }, |
| { |
| "epoch": 20.0, |
| "learning_rate": 1.9809681293474693e-05, |
| "loss": 0.2436, |
| "step": 20 |
| }, |
| { |
| "epoch": 30.0, |
| "learning_rate": 1.9408298407861045e-05, |
| "loss": 0.2287, |
| "step": 30 |
| }, |
| { |
| "epoch": 40.0, |
| "learning_rate": 1.879473751206489e-05, |
| "loss": 0.2096, |
| "step": 40 |
| }, |
| { |
| "epoch": 50.0, |
| "learning_rate": 1.798283576604356e-05, |
| "loss": 0.1882, |
| "step": 50 |
| }, |
| { |
| "epoch": 60.0, |
| "learning_rate": 1.6990903356131125e-05, |
| "loss": 0.1687, |
| "step": 60 |
| }, |
| { |
| "epoch": 70.0, |
| "learning_rate": 1.5841310559698346e-05, |
| "loss": 0.1537, |
| "step": 70 |
| }, |
| { |
| "epoch": 80.0, |
| "learning_rate": 1.4559983245748639e-05, |
| "loss": 0.1427, |
| "step": 80 |
| }, |
| { |
| "epoch": 90.0, |
| "learning_rate": 1.3175818189031326e-05, |
| "loss": 0.1343, |
| "step": 90 |
| }, |
| { |
| "epoch": 100.0, |
| "learning_rate": 1.1720031383636585e-05, |
| "loss": 0.1261, |
| "step": 100 |
| }, |
| { |
| "epoch": 110.0, |
| "learning_rate": 1.0225454053046922e-05, |
| "loss": 0.12, |
| "step": 110 |
| }, |
| { |
| "epoch": 120.0, |
| "learning_rate": 8.72579223318095e-06, |
| "loss": 0.1166, |
| "step": 120 |
| }, |
| { |
| "epoch": 130.0, |
| "learning_rate": 7.254866626475152e-06, |
| "loss": 0.1139, |
| "step": 130 |
| }, |
| { |
| "epoch": 140.0, |
| "learning_rate": 5.845849869981137e-06, |
| "loss": 0.1116, |
| "step": 140 |
| }, |
| { |
| "epoch": 150.0, |
| "learning_rate": 4.530518418775734e-06, |
| "loss": 0.1096, |
| "step": 150 |
| }, |
| { |
| "epoch": 160.0, |
| "learning_rate": 3.338535916373267e-06, |
| "loss": 0.108, |
| "step": 160 |
| }, |
| { |
| "epoch": 170.0, |
| "learning_rate": 2.2967842137278706e-06, |
| "loss": 0.1068, |
| "step": 170 |
| }, |
| { |
| "epoch": 180.0, |
| "learning_rate": 1.4287571238320053e-06, |
| "loss": 0.1059, |
| "step": 180 |
| }, |
| { |
| "epoch": 190.0, |
| "learning_rate": 7.540305840905371e-07, |
| "loss": 0.1053, |
| "step": 190 |
| }, |
| { |
| "epoch": 200.0, |
| "learning_rate": 2.878211754847926e-07, |
| "loss": 0.105, |
| "step": 200 |
| }, |
| { |
| "epoch": 210.0, |
| "learning_rate": 4.0642954899238196e-08, |
| "loss": 0.1048, |
| "step": 210 |
| } |
| ], |
| "max_steps": 216, |
| "num_train_epochs": 216, |
| "total_flos": 3.04771919929344e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|