| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "global_step": 56860, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9824129440731624e-05, |
| "loss": 0.5315, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.9648258881463246e-05, |
| "loss": 0.2968, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.9472388322194867e-05, |
| "loss": 0.2756, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.929651776292649e-05, |
| "loss": 0.2673, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.9120647203658108e-05, |
| "loss": 0.2521, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 1.894477664438973e-05, |
| "loss": 0.2405, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 1.8768906085121352e-05, |
| "loss": 0.2264, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.8593035525852974e-05, |
| "loss": 0.2264, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.8417164966584596e-05, |
| "loss": 0.2231, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.8241294407316218e-05, |
| "loss": 0.6134, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.806542384804784e-05, |
| "loss": 0.6581, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 1.788955328877946e-05, |
| "loss": 0.6592, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 1.771368272951108e-05, |
| "loss": 0.6561, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1.7537812170242702e-05, |
| "loss": 0.6599, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 1.7361941610974324e-05, |
| "loss": 0.6569, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 1.7186071051705946e-05, |
| "loss": 0.6531, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1.7010200492437568e-05, |
| "loss": 0.2212, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.683432993316919e-05, |
| "loss": 0.4708, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 1.665845937390081e-05, |
| "loss": 0.2605, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 1.6482588814632434e-05, |
| "loss": 0.1987, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 1.6306718255364052e-05, |
| "loss": 0.1908, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.6130847696095674e-05, |
| "loss": 0.185, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.5954977136827296e-05, |
| "loss": 0.1767, |
| "step": 11500 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1.5779106577558918e-05, |
| "loss": 0.1551, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 1.560323601829054e-05, |
| "loss": 0.1476, |
| "step": 12500 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 1.5427365459022162e-05, |
| "loss": 0.1449, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.5251494899753782e-05, |
| "loss": 0.1566, |
| "step": 13500 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 1.5075624340485404e-05, |
| "loss": 0.1586, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 1.4899753781217026e-05, |
| "loss": 0.1487, |
| "step": 14500 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 1.4723883221948648e-05, |
| "loss": 0.1493, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 1.4548012662680268e-05, |
| "loss": 0.1547, |
| "step": 15500 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 1.437214210341189e-05, |
| "loss": 0.1528, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.4196271544143512e-05, |
| "loss": 0.1447, |
| "step": 16500 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 1.4020400984875134e-05, |
| "loss": 0.1562, |
| "step": 17000 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 1.3844530425606754e-05, |
| "loss": 0.1022, |
| "step": 17500 |
| }, |
| { |
| "epoch": 3.17, |
| "learning_rate": 1.3668659866338376e-05, |
| "loss": 0.099, |
| "step": 18000 |
| }, |
| { |
| "epoch": 3.25, |
| "learning_rate": 1.3492789307069998e-05, |
| "loss": 0.0971, |
| "step": 18500 |
| }, |
| { |
| "epoch": 3.34, |
| "learning_rate": 1.331691874780162e-05, |
| "loss": 0.1046, |
| "step": 19000 |
| }, |
| { |
| "epoch": 3.43, |
| "learning_rate": 1.314104818853324e-05, |
| "loss": 0.1083, |
| "step": 19500 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 1.2965177629264862e-05, |
| "loss": 0.0998, |
| "step": 20000 |
| }, |
| { |
| "epoch": 3.61, |
| "learning_rate": 1.2789307069996484e-05, |
| "loss": 0.1049, |
| "step": 20500 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 1.2613436510728106e-05, |
| "loss": 0.1052, |
| "step": 21000 |
| }, |
| { |
| "epoch": 3.78, |
| "learning_rate": 1.2437565951459726e-05, |
| "loss": 0.108, |
| "step": 21500 |
| }, |
| { |
| "epoch": 3.87, |
| "learning_rate": 1.2261695392191348e-05, |
| "loss": 0.0991, |
| "step": 22000 |
| }, |
| { |
| "epoch": 3.96, |
| "learning_rate": 1.208582483292297e-05, |
| "loss": 0.1024, |
| "step": 22500 |
| }, |
| { |
| "epoch": 4.05, |
| "learning_rate": 1.1909954273654592e-05, |
| "loss": 0.0822, |
| "step": 23000 |
| }, |
| { |
| "epoch": 4.13, |
| "learning_rate": 1.1734083714386212e-05, |
| "loss": 0.069, |
| "step": 23500 |
| }, |
| { |
| "epoch": 4.22, |
| "learning_rate": 1.1558213155117834e-05, |
| "loss": 0.0686, |
| "step": 24000 |
| }, |
| { |
| "epoch": 4.31, |
| "learning_rate": 1.1382342595849456e-05, |
| "loss": 0.0709, |
| "step": 24500 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 1.1206472036581078e-05, |
| "loss": 0.0747, |
| "step": 25000 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 1.1030601477312698e-05, |
| "loss": 0.0658, |
| "step": 25500 |
| }, |
| { |
| "epoch": 4.57, |
| "learning_rate": 1.085473091804432e-05, |
| "loss": 0.0732, |
| "step": 26000 |
| }, |
| { |
| "epoch": 4.66, |
| "learning_rate": 1.0678860358775942e-05, |
| "loss": 0.0715, |
| "step": 26500 |
| }, |
| { |
| "epoch": 4.75, |
| "learning_rate": 1.0502989799507564e-05, |
| "loss": 0.0778, |
| "step": 27000 |
| }, |
| { |
| "epoch": 4.84, |
| "learning_rate": 1.0327119240239184e-05, |
| "loss": 0.0724, |
| "step": 27500 |
| }, |
| { |
| "epoch": 4.92, |
| "learning_rate": 1.0151248680970806e-05, |
| "loss": 0.0722, |
| "step": 28000 |
| }, |
| { |
| "epoch": 5.01, |
| "learning_rate": 9.975378121702428e-06, |
| "loss": 0.0691, |
| "step": 28500 |
| }, |
| { |
| "epoch": 5.1, |
| "learning_rate": 9.79950756243405e-06, |
| "loss": 0.0485, |
| "step": 29000 |
| }, |
| { |
| "epoch": 5.19, |
| "learning_rate": 9.62363700316567e-06, |
| "loss": 0.0503, |
| "step": 29500 |
| }, |
| { |
| "epoch": 5.28, |
| "learning_rate": 9.447766443897292e-06, |
| "loss": 0.0536, |
| "step": 30000 |
| }, |
| { |
| "epoch": 5.36, |
| "learning_rate": 9.271895884628914e-06, |
| "loss": 0.0503, |
| "step": 30500 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 9.096025325360536e-06, |
| "loss": 0.0452, |
| "step": 31000 |
| }, |
| { |
| "epoch": 5.54, |
| "learning_rate": 8.920154766092157e-06, |
| "loss": 0.0473, |
| "step": 31500 |
| }, |
| { |
| "epoch": 5.63, |
| "learning_rate": 8.744284206823778e-06, |
| "loss": 0.0488, |
| "step": 32000 |
| }, |
| { |
| "epoch": 5.72, |
| "learning_rate": 8.5684136475554e-06, |
| "loss": 0.0507, |
| "step": 32500 |
| }, |
| { |
| "epoch": 5.8, |
| "learning_rate": 8.392543088287022e-06, |
| "loss": 0.0467, |
| "step": 33000 |
| }, |
| { |
| "epoch": 5.89, |
| "learning_rate": 8.216672529018643e-06, |
| "loss": 0.0556, |
| "step": 33500 |
| }, |
| { |
| "epoch": 5.98, |
| "learning_rate": 8.040801969750265e-06, |
| "loss": 0.0481, |
| "step": 34000 |
| }, |
| { |
| "epoch": 6.07, |
| "learning_rate": 7.864931410481886e-06, |
| "loss": 0.0367, |
| "step": 34500 |
| }, |
| { |
| "epoch": 6.16, |
| "learning_rate": 7.689060851213508e-06, |
| "loss": 0.0329, |
| "step": 35000 |
| }, |
| { |
| "epoch": 6.24, |
| "learning_rate": 7.5131902919451295e-06, |
| "loss": 0.0294, |
| "step": 35500 |
| }, |
| { |
| "epoch": 6.33, |
| "learning_rate": 7.3373197326767506e-06, |
| "loss": 0.032, |
| "step": 36000 |
| }, |
| { |
| "epoch": 6.42, |
| "learning_rate": 7.1614491734083725e-06, |
| "loss": 0.0331, |
| "step": 36500 |
| }, |
| { |
| "epoch": 6.51, |
| "learning_rate": 6.985578614139994e-06, |
| "loss": 0.0274, |
| "step": 37000 |
| }, |
| { |
| "epoch": 6.6, |
| "learning_rate": 6.8097080548716155e-06, |
| "loss": 0.0385, |
| "step": 37500 |
| }, |
| { |
| "epoch": 6.68, |
| "learning_rate": 6.633837495603237e-06, |
| "loss": 0.0367, |
| "step": 38000 |
| }, |
| { |
| "epoch": 6.77, |
| "learning_rate": 6.4579669363348586e-06, |
| "loss": 0.0447, |
| "step": 38500 |
| }, |
| { |
| "epoch": 6.86, |
| "learning_rate": 6.28209637706648e-06, |
| "loss": 0.0433, |
| "step": 39000 |
| }, |
| { |
| "epoch": 6.95, |
| "learning_rate": 6.106225817798102e-06, |
| "loss": 0.034, |
| "step": 39500 |
| }, |
| { |
| "epoch": 7.03, |
| "learning_rate": 5.930355258529723e-06, |
| "loss": 0.0311, |
| "step": 40000 |
| }, |
| { |
| "epoch": 7.12, |
| "learning_rate": 5.754484699261345e-06, |
| "loss": 0.0189, |
| "step": 40500 |
| }, |
| { |
| "epoch": 7.21, |
| "learning_rate": 5.578614139992966e-06, |
| "loss": 0.02, |
| "step": 41000 |
| }, |
| { |
| "epoch": 7.3, |
| "learning_rate": 5.402743580724588e-06, |
| "loss": 0.0218, |
| "step": 41500 |
| }, |
| { |
| "epoch": 7.39, |
| "learning_rate": 5.226873021456209e-06, |
| "loss": 0.0217, |
| "step": 42000 |
| }, |
| { |
| "epoch": 7.47, |
| "learning_rate": 5.051002462187831e-06, |
| "loss": 0.0282, |
| "step": 42500 |
| }, |
| { |
| "epoch": 7.56, |
| "learning_rate": 4.875131902919452e-06, |
| "loss": 0.0267, |
| "step": 43000 |
| }, |
| { |
| "epoch": 7.65, |
| "learning_rate": 4.699261343651073e-06, |
| "loss": 0.026, |
| "step": 43500 |
| }, |
| { |
| "epoch": 7.74, |
| "learning_rate": 4.523390784382695e-06, |
| "loss": 0.0217, |
| "step": 44000 |
| }, |
| { |
| "epoch": 7.83, |
| "learning_rate": 4.347520225114316e-06, |
| "loss": 0.0194, |
| "step": 44500 |
| }, |
| { |
| "epoch": 7.91, |
| "learning_rate": 4.171649665845938e-06, |
| "loss": 0.026, |
| "step": 45000 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 3.995779106577559e-06, |
| "loss": 0.0274, |
| "step": 45500 |
| }, |
| { |
| "epoch": 8.09, |
| "learning_rate": 3.819908547309181e-06, |
| "loss": 0.0107, |
| "step": 46000 |
| }, |
| { |
| "epoch": 8.18, |
| "learning_rate": 3.6440379880408023e-06, |
| "loss": 0.0103, |
| "step": 46500 |
| }, |
| { |
| "epoch": 8.27, |
| "learning_rate": 3.468167428772424e-06, |
| "loss": 0.0172, |
| "step": 47000 |
| }, |
| { |
| "epoch": 8.35, |
| "learning_rate": 3.2922968695040454e-06, |
| "loss": 0.0129, |
| "step": 47500 |
| }, |
| { |
| "epoch": 8.44, |
| "learning_rate": 3.116426310235667e-06, |
| "loss": 0.017, |
| "step": 48000 |
| }, |
| { |
| "epoch": 8.53, |
| "learning_rate": 2.9405557509672884e-06, |
| "loss": 0.0204, |
| "step": 48500 |
| }, |
| { |
| "epoch": 8.62, |
| "learning_rate": 2.76468519169891e-06, |
| "loss": 0.0128, |
| "step": 49000 |
| }, |
| { |
| "epoch": 8.71, |
| "learning_rate": 2.5888146324305314e-06, |
| "loss": 0.0213, |
| "step": 49500 |
| }, |
| { |
| "epoch": 8.79, |
| "learning_rate": 2.412944073162153e-06, |
| "loss": 0.0125, |
| "step": 50000 |
| }, |
| { |
| "epoch": 8.88, |
| "learning_rate": 2.2370735138937744e-06, |
| "loss": 0.0128, |
| "step": 50500 |
| }, |
| { |
| "epoch": 8.97, |
| "learning_rate": 2.061202954625396e-06, |
| "loss": 0.0182, |
| "step": 51000 |
| }, |
| { |
| "epoch": 9.06, |
| "learning_rate": 1.8853323953570175e-06, |
| "loss": 0.0116, |
| "step": 51500 |
| }, |
| { |
| "epoch": 9.15, |
| "learning_rate": 1.709461836088639e-06, |
| "loss": 0.0081, |
| "step": 52000 |
| }, |
| { |
| "epoch": 9.23, |
| "learning_rate": 1.5335912768202605e-06, |
| "loss": 0.0084, |
| "step": 52500 |
| }, |
| { |
| "epoch": 9.32, |
| "learning_rate": 1.357720717551882e-06, |
| "loss": 0.0076, |
| "step": 53000 |
| }, |
| { |
| "epoch": 9.41, |
| "learning_rate": 1.1818501582835035e-06, |
| "loss": 0.0089, |
| "step": 53500 |
| }, |
| { |
| "epoch": 9.5, |
| "learning_rate": 1.005979599015125e-06, |
| "loss": 0.0067, |
| "step": 54000 |
| }, |
| { |
| "epoch": 9.58, |
| "learning_rate": 8.301090397467465e-07, |
| "loss": 0.0066, |
| "step": 54500 |
| }, |
| { |
| "epoch": 9.67, |
| "learning_rate": 6.542384804783681e-07, |
| "loss": 0.0097, |
| "step": 55000 |
| }, |
| { |
| "epoch": 9.76, |
| "learning_rate": 4.783679212099895e-07, |
| "loss": 0.0099, |
| "step": 55500 |
| }, |
| { |
| "epoch": 9.85, |
| "learning_rate": 3.02497361941611e-07, |
| "loss": 0.0059, |
| "step": 56000 |
| }, |
| { |
| "epoch": 9.94, |
| "learning_rate": 1.2662680267323252e-07, |
| "loss": 0.0075, |
| "step": 56500 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 56860, |
| "total_flos": 9.365103496606515e+17, |
| "train_runtime": 69350.928, |
| "train_samples_per_second": 52.464, |
| "train_steps_per_second": 0.82 |
| } |
| ], |
| "max_steps": 56860, |
| "num_train_epochs": 10, |
| "total_flos": 9.365103496606515e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|