| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999800466907437, | |
| "global_step": 25058, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.900830074227792e-05, | |
| "loss": 3.3225, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.801061537233618e-05, | |
| "loss": 2.3912, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.701293000239445e-05, | |
| "loss": 2.1243, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.6015244632452715e-05, | |
| "loss": 1.989, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.5017559262510976e-05, | |
| "loss": 1.889, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.401987389256924e-05, | |
| "loss": 1.808, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.3022188522627504e-05, | |
| "loss": 1.7416, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.202649852342566e-05, | |
| "loss": 1.7027, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.102881315348392e-05, | |
| "loss": 1.6679, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.003112778354218e-05, | |
| "loss": 1.6188, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.903344241360045e-05, | |
| "loss": 1.5844, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.8035757043658716e-05, | |
| "loss": 1.5613, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.7038071673716977e-05, | |
| "loss": 1.5266, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.6040386303775244e-05, | |
| "loss": 1.5123, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.5042700933833505e-05, | |
| "loss": 1.472, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.404501556389177e-05, | |
| "loss": 1.4397, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.304733019395004e-05, | |
| "loss": 1.4177, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.20496448240083e-05, | |
| "loss": 1.4125, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.105195945406657e-05, | |
| "loss": 1.3836, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.0054274084124832e-05, | |
| "loss": 1.368, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9056588714183096e-05, | |
| "loss": 1.3567, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.805890334424136e-05, | |
| "loss": 1.3294, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.7063213345039512e-05, | |
| "loss": 1.3221, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.6065527975097776e-05, | |
| "loss": 1.3013, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.5067842605156037e-05, | |
| "loss": 1.3042, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4070157235214305e-05, | |
| "loss": 1.2593, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.307247186527257e-05, | |
| "loss": 1.2566, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.2076781866070717e-05, | |
| "loss": 1.2441, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.107909649612898e-05, | |
| "loss": 1.2255, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0081411126187246e-05, | |
| "loss": 1.2176, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.908372575624551e-05, | |
| "loss": 1.212, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8086040386303777e-05, | |
| "loss": 1.1993, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.708835501636204e-05, | |
| "loss": 1.1604, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.609266501716019e-05, | |
| "loss": 1.201, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5094979647218454e-05, | |
| "loss": 1.1614, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.4097294277276718e-05, | |
| "loss": 1.1584, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.3099608907334984e-05, | |
| "loss": 1.1534, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.2101923537393248e-05, | |
| "loss": 1.1386, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.1104238167451512e-05, | |
| "loss": 1.1231, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.0106552797509778e-05, | |
| "loss": 1.1396, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.108867427568042e-06, | |
| "loss": 1.1261, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.11317742836619e-06, | |
| "loss": 1.1098, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.1154920584244555e-06, | |
| "loss": 1.1006, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.1178066884827205e-06, | |
| "loss": 1.099, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.1201213185409855e-06, | |
| "loss": 1.1121, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.12243594859925e-06, | |
| "loss": 1.108, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.124750578657515e-06, | |
| "loss": 1.0903, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.129060579455663e-06, | |
| "loss": 1.0892, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.1313752095139278e-06, | |
| "loss": 1.0597, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.3368983957219251e-07, | |
| "loss": 1.0854, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 25058, | |
| "total_flos": 7.377974265999974e+16, | |
| "train_loss": 1.4067512356623162, | |
| "train_runtime": 5393.9428, | |
| "train_samples_per_second": 148.66, | |
| "train_steps_per_second": 4.646 | |
| } | |
| ], | |
| "max_steps": 25058, | |
| "num_train_epochs": 1, | |
| "total_flos": 7.377974265999974e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |