| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 360.0, |
| "global_step": 360, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 10.0, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 1.9474, |
| "step": 10 |
| }, |
| { |
| "epoch": 20.0, |
| "learning_rate": 9.983600282541213e-06, |
| "loss": 1.9169, |
| "step": 20 |
| }, |
| { |
| "epoch": 30.0, |
| "learning_rate": 9.927048004251748e-06, |
| "loss": 1.8596, |
| "step": 30 |
| }, |
| { |
| "epoch": 40.0, |
| "learning_rate": 9.830598527058083e-06, |
| "loss": 1.7779, |
| "step": 40 |
| }, |
| { |
| "epoch": 50.0, |
| "learning_rate": 9.695032858974042e-06, |
| "loss": 1.6849, |
| "step": 50 |
| }, |
| { |
| "epoch": 60.0, |
| "learning_rate": 9.52144875476464e-06, |
| "loss": 1.5859, |
| "step": 60 |
| }, |
| { |
| "epoch": 70.0, |
| "learning_rate": 9.311251826781587e-06, |
| "loss": 1.4716, |
| "step": 70 |
| }, |
| { |
| "epoch": 80.0, |
| "learning_rate": 9.066144162895259e-06, |
| "loss": 1.3578, |
| "step": 80 |
| }, |
| { |
| "epoch": 90.0, |
| "learning_rate": 8.788110543690415e-06, |
| "loss": 1.2642, |
| "step": 90 |
| }, |
| { |
| "epoch": 100.0, |
| "learning_rate": 8.479402370533127e-06, |
| "loss": 1.1801, |
| "step": 100 |
| }, |
| { |
| "epoch": 110.0, |
| "learning_rate": 8.142519434652782e-06, |
| "loss": 1.1065, |
| "step": 110 |
| }, |
| { |
| "epoch": 120.0, |
| "learning_rate": 7.780189674865617e-06, |
| "loss": 1.0596, |
| "step": 120 |
| }, |
| { |
| "epoch": 130.0, |
| "learning_rate": 7.395347087853349e-06, |
| "loss": 1.0319, |
| "step": 130 |
| }, |
| { |
| "epoch": 140.0, |
| "learning_rate": 6.991107969870363e-06, |
| "loss": 1.0072, |
| "step": 140 |
| }, |
| { |
| "epoch": 150.0, |
| "learning_rate": 6.570745682264288e-06, |
| "loss": 0.9865, |
| "step": 150 |
| }, |
| { |
| "epoch": 160.0, |
| "learning_rate": 6.137664145148339e-06, |
| "loss": 0.9688, |
| "step": 160 |
| }, |
| { |
| "epoch": 170.0, |
| "learning_rate": 5.6953702738627215e-06, |
| "loss": 0.9521, |
| "step": 170 |
| }, |
| { |
| "epoch": 180.0, |
| "learning_rate": 5.247445581423257e-06, |
| "loss": 0.9354, |
| "step": 180 |
| }, |
| { |
| "epoch": 190.0, |
| "learning_rate": 4.7975171769088366e-06, |
| "loss": 0.9188, |
| "step": 190 |
| }, |
| { |
| "epoch": 200.0, |
| "learning_rate": 4.349228394630808e-06, |
| "loss": 0.9018, |
| "step": 200 |
| }, |
| { |
| "epoch": 210.0, |
| "learning_rate": 3.906209291917141e-06, |
| "loss": 0.8837, |
| "step": 210 |
| }, |
| { |
| "epoch": 220.0, |
| "learning_rate": 3.472047254408091e-06, |
| "loss": 0.8648, |
| "step": 220 |
| }, |
| { |
| "epoch": 230.0, |
| "learning_rate": 3.050257946889594e-06, |
| "loss": 0.8495, |
| "step": 230 |
| }, |
| { |
| "epoch": 240.0, |
| "learning_rate": 2.6442568448924754e-06, |
| "loss": 0.8387, |
| "step": 240 |
| }, |
| { |
| "epoch": 250.0, |
| "learning_rate": 2.2573315775828655e-06, |
| "loss": 0.8302, |
| "step": 250 |
| }, |
| { |
| "epoch": 260.0, |
| "learning_rate": 1.892615305899645e-06, |
| "loss": 0.823, |
| "step": 260 |
| }, |
| { |
| "epoch": 270.0, |
| "learning_rate": 1.5530613515117721e-06, |
| "loss": 0.8171, |
| "step": 270 |
| }, |
| { |
| "epoch": 280.0, |
| "learning_rate": 1.2414192820396987e-06, |
| "loss": 0.8121, |
| "step": 280 |
| }, |
| { |
| "epoch": 290.0, |
| "learning_rate": 9.602126461929002e-07, |
| "loss": 0.8081, |
| "step": 290 |
| }, |
| { |
| "epoch": 300.0, |
| "learning_rate": 7.117185391151371e-07, |
| "loss": 0.805, |
| "step": 300 |
| }, |
| { |
| "epoch": 310.0, |
| "learning_rate": 4.979491634088712e-07, |
| "loss": 0.8027, |
| "step": 310 |
| }, |
| { |
| "epoch": 320.0, |
| "learning_rate": 3.206355351500184e-07, |
| "loss": 0.8011, |
| "step": 320 |
| }, |
| { |
| "epoch": 330.0, |
| "learning_rate": 1.8121346683502183e-07, |
| "loss": 0.8, |
| "step": 330 |
| }, |
| { |
| "epoch": 340.0, |
| "learning_rate": 8.081194076451749e-08, |
| "loss": 0.7994, |
| "step": 340 |
| }, |
| { |
| "epoch": 350.0, |
| "learning_rate": 2.0243967011164267e-08, |
| "loss": 0.7991, |
| "step": 350 |
| }, |
| { |
| "epoch": 360.0, |
| "learning_rate": 0.0, |
| "loss": 0.799, |
| "step": 360 |
| } |
| ], |
| "max_steps": 360, |
| "num_train_epochs": 360, |
| "total_flos": 1.00894355288064e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|