| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "global_step": 102957, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9514360364035474e-05, |
| "loss": 1.3581, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9028720728070946e-05, |
| "loss": 0.6756, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.854308109210642e-05, |
| "loss": 0.5975, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.805744145614189e-05, |
| "loss": 0.5572, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.757180182017736e-05, |
| "loss": 0.5289, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.7086162184212826e-05, |
| "loss": 0.5127, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.66005225482483e-05, |
| "loss": 0.4999, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.611488291228377e-05, |
| "loss": 0.4843, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.562924327631924e-05, |
| "loss": 0.4701, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.5143603640354713e-05, |
| "loss": 0.4619, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.4657964004390185e-05, |
| "loss": 0.4581, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 4.417232436842566e-05, |
| "loss": 0.4538, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.368668473246112e-05, |
| "loss": 0.4452, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.3201045096496594e-05, |
| "loss": 0.4426, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.271540546053207e-05, |
| "loss": 0.4381, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.2229765824567544e-05, |
| "loss": 0.4302, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.174412618860301e-05, |
| "loss": 0.4299, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.125848655263848e-05, |
| "loss": 0.4218, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.077284691667395e-05, |
| "loss": 0.4219, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.0287207280709424e-05, |
| "loss": 0.4178, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.9801567644744896e-05, |
| "loss": 0.4169, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.931592800878037e-05, |
| "loss": 0.4161, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 3.883028837281584e-05, |
| "loss": 0.409, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 3.8344648736851305e-05, |
| "loss": 0.4057, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.7859009100886777e-05, |
| "loss": 0.4045, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 3.7373369464922255e-05, |
| "loss": 0.4049, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 3.688772982895772e-05, |
| "loss": 0.399, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 3.640209019299319e-05, |
| "loss": 0.3971, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.5916450557028664e-05, |
| "loss": 0.3951, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 3.5430810921064135e-05, |
| "loss": 0.3963, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 3.49451712850996e-05, |
| "loss": 0.3933, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 3.445953164913508e-05, |
| "loss": 0.3924, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.397389201317055e-05, |
| "loss": 0.3889, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.348825237720602e-05, |
| "loss": 0.3868, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 3.300261274124149e-05, |
| "loss": 0.3844, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 3.251697310527696e-05, |
| "loss": 0.3802, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 3.203133346931244e-05, |
| "loss": 0.3825, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 3.15456938333479e-05, |
| "loss": 0.3802, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 3.1060054197383375e-05, |
| "loss": 0.378, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 3.0574414561418846e-05, |
| "loss": 0.3768, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 3.0088774925454315e-05, |
| "loss": 0.3749, |
| "step": 41000 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 2.9603135289489787e-05, |
| "loss": 0.3758, |
| "step": 42000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 2.9117495653525262e-05, |
| "loss": 0.3733, |
| "step": 43000 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 2.8631856017560734e-05, |
| "loss": 0.3694, |
| "step": 44000 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 2.8146216381596202e-05, |
| "loss": 0.3689, |
| "step": 45000 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 2.7660576745631674e-05, |
| "loss": 0.3693, |
| "step": 46000 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 2.7174937109667142e-05, |
| "loss": 0.3703, |
| "step": 47000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 2.6689297473702614e-05, |
| "loss": 0.366, |
| "step": 48000 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 2.620365783773809e-05, |
| "loss": 0.3664, |
| "step": 49000 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 2.5718018201773557e-05, |
| "loss": 0.365, |
| "step": 50000 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 2.523237856580903e-05, |
| "loss": 0.3636, |
| "step": 51000 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 2.4746738929844498e-05, |
| "loss": 0.3611, |
| "step": 52000 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 2.4261099293879973e-05, |
| "loss": 0.3626, |
| "step": 53000 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 2.377545965791544e-05, |
| "loss": 0.3588, |
| "step": 54000 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2.3289820021950913e-05, |
| "loss": 0.3618, |
| "step": 55000 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 2.2804180385986385e-05, |
| "loss": 0.3589, |
| "step": 56000 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 2.2318540750021853e-05, |
| "loss": 0.356, |
| "step": 57000 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 2.1832901114057325e-05, |
| "loss": 0.3545, |
| "step": 58000 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.1347261478092797e-05, |
| "loss": 0.3561, |
| "step": 59000 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.086162184212827e-05, |
| "loss": 0.3534, |
| "step": 60000 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 2.0375982206163737e-05, |
| "loss": 0.3514, |
| "step": 61000 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.9890342570199212e-05, |
| "loss": 0.3526, |
| "step": 62000 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.940470293423468e-05, |
| "loss": 0.3512, |
| "step": 63000 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 1.8919063298270152e-05, |
| "loss": 0.3509, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 1.8433423662305624e-05, |
| "loss": 0.3484, |
| "step": 65000 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.7947784026341092e-05, |
| "loss": 0.3478, |
| "step": 66000 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.7462144390376567e-05, |
| "loss": 0.3449, |
| "step": 67000 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 1.6976504754412036e-05, |
| "loss": 0.3483, |
| "step": 68000 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 1.6490865118447508e-05, |
| "loss": 0.3455, |
| "step": 69000 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.600522548248298e-05, |
| "loss": 0.3441, |
| "step": 70000 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 1.551958584651845e-05, |
| "loss": 0.3446, |
| "step": 71000 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 1.503394621055392e-05, |
| "loss": 0.343, |
| "step": 72000 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 1.4548306574589393e-05, |
| "loss": 0.3405, |
| "step": 73000 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.4062666938624863e-05, |
| "loss": 0.3401, |
| "step": 74000 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 1.3577027302660333e-05, |
| "loss": 0.3385, |
| "step": 75000 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1.3091387666695807e-05, |
| "loss": 0.3378, |
| "step": 76000 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 1.2605748030731277e-05, |
| "loss": 0.337, |
| "step": 77000 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 1.2120108394766749e-05, |
| "loss": 0.3361, |
| "step": 78000 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.1634468758802219e-05, |
| "loss": 0.338, |
| "step": 79000 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 1.1148829122837689e-05, |
| "loss": 0.3367, |
| "step": 80000 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1.066318948687316e-05, |
| "loss": 0.3351, |
| "step": 81000 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 1.0177549850908632e-05, |
| "loss": 0.3363, |
| "step": 82000 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 9.691910214944102e-06, |
| "loss": 0.3327, |
| "step": 83000 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 9.206270578979574e-06, |
| "loss": 0.3318, |
| "step": 84000 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 8.720630943015046e-06, |
| "loss": 0.3307, |
| "step": 85000 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 8.234991307050518e-06, |
| "loss": 0.3337, |
| "step": 86000 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 7.749351671085988e-06, |
| "loss": 0.3299, |
| "step": 87000 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 7.263712035121459e-06, |
| "loss": 0.3291, |
| "step": 88000 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 6.7780723991569305e-06, |
| "loss": 0.3328, |
| "step": 89000 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 6.292432763192401e-06, |
| "loss": 0.3279, |
| "step": 90000 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 5.8067931272278715e-06, |
| "loss": 0.3331, |
| "step": 91000 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.321153491263343e-06, |
| "loss": 0.3293, |
| "step": 92000 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.835513855298814e-06, |
| "loss": 0.3274, |
| "step": 93000 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 4.349874219334285e-06, |
| "loss": 0.3267, |
| "step": 94000 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 3.864234583369757e-06, |
| "loss": 0.3261, |
| "step": 95000 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 3.3785949474052275e-06, |
| "loss": 0.3238, |
| "step": 96000 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 2.8929553114406984e-06, |
| "loss": 0.3237, |
| "step": 97000 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 2.4073156754761698e-06, |
| "loss": 0.3225, |
| "step": 98000 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 1.9216760395116407e-06, |
| "loss": 0.3242, |
| "step": 99000 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 1.436036403547112e-06, |
| "loss": 0.3275, |
| "step": 100000 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 9.50396767582583e-07, |
| "loss": 0.3219, |
| "step": 101000 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 4.6475713161805417e-07, |
| "loss": 0.3232, |
| "step": 102000 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 102957, |
| "total_flos": 7.554546670318524e+17, |
| "train_loss": 0.3894348529865434, |
| "train_runtime": 39614.9645, |
| "train_samples_per_second": 83.164, |
| "train_steps_per_second": 2.599 |
| } |
| ], |
| "max_steps": 102957, |
| "num_train_epochs": 3, |
| "total_flos": 7.554546670318524e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|