| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "global_step": 53830, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.953836150845253e-05, | |
| "loss": 3.2037, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.9073936466654286e-05, | |
| "loss": 3.166, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.860951142485603e-05, | |
| "loss": 3.1434, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.814508638305778e-05, | |
| "loss": 3.1104, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.7680661341259524e-05, | |
| "loss": 3.0987, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.721623629946127e-05, | |
| "loss": 3.0677, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.6751811257663017e-05, | |
| "loss": 3.0787, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.628738621586476e-05, | |
| "loss": 3.047, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.582389002415011e-05, | |
| "loss": 3.0487, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.5359464982351854e-05, | |
| "loss": 3.0396, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.489596879063719e-05, | |
| "loss": 2.983, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.443154374883894e-05, | |
| "loss": 2.8114, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 4.3967118707040685e-05, | |
| "loss": 2.8242, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.350269366524243e-05, | |
| "loss": 2.8101, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.303826862344418e-05, | |
| "loss": 2.8055, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 4.2573843581645924e-05, | |
| "loss": 2.821, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 4.210941853984767e-05, | |
| "loss": 2.8286, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 4.1644993498049416e-05, | |
| "loss": 2.8026, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 4.118056845625116e-05, | |
| "loss": 2.8078, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 4.0716143414452915e-05, | |
| "loss": 2.7948, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 4.0251718372654655e-05, | |
| "loss": 2.8053, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 3.97872933308564e-05, | |
| "loss": 2.6906, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.9323797139141746e-05, | |
| "loss": 2.5819, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 3.8859372097343485e-05, | |
| "loss": 2.5768, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 3.839587590562883e-05, | |
| "loss": 2.597, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 3.7931450863830584e-05, | |
| "loss": 2.5986, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 3.746702582203232e-05, | |
| "loss": 2.6172, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 3.700260078023407e-05, | |
| "loss": 2.5917, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.653817573843582e-05, | |
| "loss": 2.6019, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.607375069663756e-05, | |
| "loss": 2.598, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 3.560932565483931e-05, | |
| "loss": 2.5893, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 3.514490061304106e-05, | |
| "loss": 2.589, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 3.468047557124281e-05, | |
| "loss": 2.4718, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 3.4216979379528146e-05, | |
| "loss": 2.41, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 3.375255433772989e-05, | |
| "loss": 2.3767, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 3.328812929593164e-05, | |
| "loss": 2.3999, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 3.2823704254133384e-05, | |
| "loss": 2.4211, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 3.236020806241873e-05, | |
| "loss": 2.4202, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 3.1895783020620476e-05, | |
| "loss": 2.4131, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 3.1431357978822215e-05, | |
| "loss": 2.4155, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 3.096693293702397e-05, | |
| "loss": 2.3968, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 3.050343674530931e-05, | |
| "loss": 2.4189, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 3.0039011703511056e-05, | |
| "loss": 2.406, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 2.95745866617128e-05, | |
| "loss": 2.2482, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 2.911016161991455e-05, | |
| "loss": 2.2432, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 2.8646665428199894e-05, | |
| "loss": 2.2532, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 2.8182240386401637e-05, | |
| "loss": 2.2649, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 2.7717815344603383e-05, | |
| "loss": 2.2365, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 2.7253390302805126e-05, | |
| "loss": 2.233, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 2.6788965261006875e-05, | |
| "loss": 2.2569, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 2.632454021920862e-05, | |
| "loss": 2.2609, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 2.5860115177410364e-05, | |
| "loss": 2.256, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 2.5395690135612117e-05, | |
| "loss": 2.2553, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 2.4933122793981055e-05, | |
| "loss": 2.1951, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 2.4468697752182798e-05, | |
| "loss": 2.1061, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 2.4004272710384544e-05, | |
| "loss": 2.0723, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 2.353984766858629e-05, | |
| "loss": 2.0796, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 2.3075422626788036e-05, | |
| "loss": 2.1206, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 2.261192643507338e-05, | |
| "loss": 2.1276, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 2.2147501393275128e-05, | |
| "loss": 2.1108, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 2.168400520156047e-05, | |
| "loss": 2.1241, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 2.1219580159762216e-05, | |
| "loss": 2.1392, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 2.0755155117963962e-05, | |
| "loss": 2.1427, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 2.0291658926249304e-05, | |
| "loss": 2.0999, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 1.982723388445105e-05, | |
| "loss": 2.0637, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 1.9362808842652796e-05, | |
| "loss": 1.9977, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 1.8898383800854543e-05, | |
| "loss": 1.98, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 1.843395875905629e-05, | |
| "loss": 1.9836, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 1.7969533717258035e-05, | |
| "loss": 2.0058, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 1.750510867545978e-05, | |
| "loss": 1.9843, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 1.7040683633661527e-05, | |
| "loss": 1.9899, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 1.6576258591863277e-05, | |
| "loss": 2.0094, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 1.611183355006502e-05, | |
| "loss": 2.0092, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 1.5647408508266766e-05, | |
| "loss": 2.0013, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 1.5182983466468512e-05, | |
| "loss": 2.0088, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 1.4719487274753854e-05, | |
| "loss": 1.9496, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 1.4255062232955602e-05, | |
| "loss": 1.8963, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 1.379063719115735e-05, | |
| "loss": 1.8895, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 1.3326212149359094e-05, | |
| "loss": 1.9075, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 1.2862715957644436e-05, | |
| "loss": 1.9191, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 1.2398290915846184e-05, | |
| "loss": 1.9075, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 1.193386587404793e-05, | |
| "loss": 1.9071, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 1.1469440832249676e-05, | |
| "loss": 1.9168, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 1.100501579045142e-05, | |
| "loss": 1.9142, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 1.0540590748653167e-05, | |
| "loss": 1.8864, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "learning_rate": 1.0076165706854915e-05, | |
| "loss": 1.9086, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 9.61174066505666e-06, | |
| "loss": 1.8377, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 9.148244473342003e-06, | |
| "loss": 1.8389, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 8.683819431543749e-06, | |
| "loss": 1.8276, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "learning_rate": 8.219394389745495e-06, | |
| "loss": 1.8399, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "learning_rate": 7.754969347947241e-06, | |
| "loss": 1.8389, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "learning_rate": 7.29240200631618e-06, | |
| "loss": 1.8308, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 6.827976964517926e-06, | |
| "loss": 1.8329, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "learning_rate": 6.363551922719674e-06, | |
| "loss": 1.8351, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "learning_rate": 5.8991268809214195e-06, | |
| "loss": 1.8573, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "learning_rate": 5.434701839123166e-06, | |
| "loss": 1.8221, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 4.9712056474085085e-06, | |
| "loss": 1.8384, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 4.506780605610255e-06, | |
| "loss": 1.7756, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 4.042355563812001e-06, | |
| "loss": 1.7839, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 3.577930522013747e-06, | |
| "loss": 1.7839, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 9.38, | |
| "learning_rate": 3.113505480215493e-06, | |
| "loss": 1.8112, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "learning_rate": 2.6490804384172397e-06, | |
| "loss": 1.8078, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 2.184655396618986e-06, | |
| "loss": 1.7745, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 9.66, | |
| "learning_rate": 1.720230354820732e-06, | |
| "loss": 1.7955, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 1.2558053130224781e-06, | |
| "loss": 1.7978, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "learning_rate": 7.923091213078211e-07, | |
| "loss": 1.8062, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "learning_rate": 3.2788407950956715e-07, | |
| "loss": 1.7702, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 53830, | |
| "total_flos": 9.998391677288448e+16, | |
| "train_loss": 2.279561182284652, | |
| "train_runtime": 34779.9602, | |
| "train_samples_per_second": 1.548, | |
| "train_steps_per_second": 1.548 | |
| } | |
| ], | |
| "max_steps": 53830, | |
| "num_train_epochs": 10, | |
| "total_flos": 9.998391677288448e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |