| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.224483333333333, |
| "global_step": 60000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.002197932219132781, |
| "loss": 3.6508, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.004419300239533186, |
| "loss": 2.1932, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.006713071372359991, |
| "loss": 1.9369, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.009095062501728535, |
| "loss": 1.6844, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.011655605398118496, |
| "loss": 1.557, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.014309737831354141, |
| "loss": 1.4849, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.016971396282315254, |
| "loss": 1.6145, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.01942763663828373, |
| "loss": 1.6191, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.022188229486346245, |
| "loss": 1.5503, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.025435281917452812, |
| "loss": 1.5535, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.43, |
| "eval_bleu": 11.554505529017694, |
| "eval_loss": 1.8505867719650269, |
| "eval_runtime": 81.2961, |
| "eval_samples_per_second": 230.257, |
| "eval_steps_per_second": 0.91, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.024702614173293114, |
| "loss": 1.4991, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.024255122989416122, |
| "loss": 1.4803, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.02393551729619503, |
| "loss": 1.4415, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.02371666394174099, |
| "loss": 1.4454, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.02343085967004299, |
| "loss": 1.4106, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.02309497445821762, |
| "loss": 1.3996, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.02294100448489189, |
| "loss": 1.3815, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.022620119154453278, |
| "loss": 1.3915, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.022044328972697258, |
| "loss": 1.5557, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.02174384333193302, |
| "loss": 1.2879, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.87, |
| "eval_bleu": 15.736323415434928, |
| "eval_loss": 1.4790531396865845, |
| "eval_runtime": 81.7862, |
| "eval_samples_per_second": 228.877, |
| "eval_steps_per_second": 0.905, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.02188653126358986, |
| "loss": 1.0018, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.02189297415316105, |
| "loss": 0.9382, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.02184857614338398, |
| "loss": 0.9229, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.021777568385004997, |
| "loss": 1.2887, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.021719269454479218, |
| "loss": 1.3278, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.021626409143209457, |
| "loss": 1.3052, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.06, |
| "eval_bleu": 15.689633530325342, |
| "eval_loss": 1.344283103942871, |
| "eval_runtime": 81.5254, |
| "eval_samples_per_second": 229.61, |
| "eval_steps_per_second": 0.908, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.02152758091688156, |
| "loss": 1.2788, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.021510712802410126, |
| "loss": 1.369, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.021486839279532433, |
| "loss": 1.3663, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.021426061168313026, |
| "loss": 1.2913, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.021364226937294006, |
| "loss": 1.2785, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.021301250904798508, |
| "loss": 1.2471, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.021244272589683533, |
| "loss": 1.2191, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.021167641505599022, |
| "loss": 1.2128, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.021093450486660004, |
| "loss": 1.1959, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.02111968770623207, |
| "loss": 1.3139, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.22, |
| "eval_bleu": 16.118544297247887, |
| "eval_loss": 1.2931314706802368, |
| "eval_runtime": 81.4634, |
| "eval_samples_per_second": 229.784, |
| "eval_steps_per_second": 0.908, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.021165331825613976, |
| "loss": 1.3204, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.021186990663409233, |
| "loss": 1.2925, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.021194208413362503, |
| "loss": 1.3002, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.021185804158449173, |
| "loss": 1.2473, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.021163810044527054, |
| "loss": 1.2455, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.021145079284906387, |
| "loss": 1.2411, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.021122504025697708, |
| "loss": 1.2214, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.021091610193252563, |
| "loss": 1.2239, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.021048951894044876, |
| "loss": 1.228, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.02102178893983364, |
| "loss": 1.2119, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.29, |
| "eval_bleu": 15.491337721835558, |
| "eval_loss": 1.3143055438995361, |
| "eval_runtime": 81.4828, |
| "eval_samples_per_second": 229.73, |
| "eval_steps_per_second": 0.908, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.020987574011087418, |
| "loss": 1.1862, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.02096775360405445, |
| "loss": 1.2127, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.02094915322959423, |
| "loss": 1.228, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.02091248333454132, |
| "loss": 1.2456, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.020875241607427597, |
| "loss": 1.2494, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.020842362195253372, |
| "loss": 1.1684, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.02081664651632309, |
| "loss": 1.2389, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.020784636959433556, |
| "loss": 1.1898, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.020755469799041748, |
| "loss": 1.1915, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.02073347009718418, |
| "loss": 1.1929, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.42, |
| "eval_bleu": 15.669781616522553, |
| "eval_loss": 1.3155781030654907, |
| "eval_runtime": 81.5967, |
| "eval_samples_per_second": 229.409, |
| "eval_steps_per_second": 0.907, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.020710671320557594, |
| "loss": 1.2275, |
| "step": 40500 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.020680107176303864, |
| "loss": 1.1966, |
| "step": 41000 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.020645136013627052, |
| "loss": 1.2111, |
| "step": 41500 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.020648332312703133, |
| "loss": 1.3908, |
| "step": 42000 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.02063341997563839, |
| "loss": 1.3234, |
| "step": 42500 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.02064467966556549, |
| "loss": 0.9668, |
| "step": 43000 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.020663069561123848, |
| "loss": 0.8413, |
| "step": 43500 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.0206832867115736, |
| "loss": 0.8273, |
| "step": 44000 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.020704049617052078, |
| "loss": 0.7956, |
| "step": 44500 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.020729372277855873, |
| "loss": 0.7875, |
| "step": 45000 |
| }, |
| { |
| "epoch": 1.36, |
| "eval_bleu": 17.782492667022247, |
| "eval_loss": 1.2672479152679443, |
| "eval_runtime": 81.5877, |
| "eval_samples_per_second": 229.434, |
| "eval_steps_per_second": 0.907, |
| "step": 45000 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.020742451772093773, |
| "loss": 0.7773, |
| "step": 45500 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.020756695419549942, |
| "loss": 0.7604, |
| "step": 46000 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.020774947479367256, |
| "loss": 0.8933, |
| "step": 46500 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 0.02075868472456932, |
| "loss": 1.2112, |
| "step": 47000 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 0.020726632326841354, |
| "loss": 1.1824, |
| "step": 47500 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 0.020695462822914124, |
| "loss": 1.1919, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 0.020659752190113068, |
| "loss": 1.1771, |
| "step": 48500 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 0.02061435766518116, |
| "loss": 1.1045, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 0.02060552127659321, |
| "loss": 1.2627, |
| "step": 49500 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 0.020578160881996155, |
| "loss": 1.1741, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.06, |
| "eval_bleu": 17.817437675612407, |
| "eval_loss": 1.2213643789291382, |
| "eval_runtime": 81.6429, |
| "eval_samples_per_second": 229.279, |
| "eval_steps_per_second": 0.906, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 0.02054954506456852, |
| "loss": 1.1366, |
| "step": 50500 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 0.020517559722065926, |
| "loss": 1.1321, |
| "step": 51000 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 0.020486876368522644, |
| "loss": 1.1128, |
| "step": 51500 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 0.020454248413443565, |
| "loss": 1.0992, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 0.020409852266311646, |
| "loss": 1.0874, |
| "step": 52500 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 0.020372800529003143, |
| "loss": 1.0964, |
| "step": 53000 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 0.020410917699337006, |
| "loss": 1.2197, |
| "step": 53500 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 0.020421497523784637, |
| "loss": 1.1628, |
| "step": 54000 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 0.02043827436864376, |
| "loss": 1.1952, |
| "step": 54500 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 0.020448317751288414, |
| "loss": 1.1603, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.14, |
| "eval_bleu": 17.473470393567123, |
| "eval_loss": 1.1755515336990356, |
| "eval_runtime": 81.7968, |
| "eval_samples_per_second": 228.848, |
| "eval_steps_per_second": 0.905, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 0.020445365458726883, |
| "loss": 1.1201, |
| "step": 55500 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 0.020447757095098495, |
| "loss": 1.1362, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 0.020444199442863464, |
| "loss": 1.151, |
| "step": 56500 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 0.020440472289919853, |
| "loss": 1.125, |
| "step": 57000 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 0.020431550219655037, |
| "loss": 1.1041, |
| "step": 57500 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 0.0204232819378376, |
| "loss": 1.1338, |
| "step": 58000 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 0.02041398547589779, |
| "loss": 1.1279, |
| "step": 58500 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 0.02040201798081398, |
| "loss": 1.0957, |
| "step": 59000 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 0.020392674952745438, |
| "loss": 1.1315, |
| "step": 59500 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 0.020384889096021652, |
| "loss": 1.146, |
| "step": 60000 |
| }, |
| { |
| "epoch": 2.22, |
| "eval_bleu": 17.10106003319275, |
| "eval_loss": 1.2006129026412964, |
| "eval_runtime": 81.8911, |
| "eval_samples_per_second": 228.584, |
| "eval_steps_per_second": 0.904, |
| "step": 60000 |
| } |
| ], |
| "max_steps": 60000, |
| "num_train_epochs": 9223372036854775807, |
| "total_flos": 4.6881481692662784e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|