| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 5649, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.017702248185519562, |
| "grad_norm": 0.04682525247335434, |
| "learning_rate": 4.4267374944665786e-07, |
| "loss": 2.7706, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.035404496371039124, |
| "grad_norm": 0.04805780574679375, |
| "learning_rate": 8.853474988933157e-07, |
| "loss": 2.7666, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.053106744556558685, |
| "grad_norm": 0.0528537780046463, |
| "learning_rate": 1.3280212483399734e-06, |
| "loss": 2.7509, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.07080899274207825, |
| "grad_norm": 0.07866832613945007, |
| "learning_rate": 1.7706949977866315e-06, |
| "loss": 2.7561, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.08851124092759781, |
| "grad_norm": 0.0911114439368248, |
| "learning_rate": 2.2133687472332895e-06, |
| "loss": 2.7683, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.10621348911311737, |
| "grad_norm": 0.09670838713645935, |
| "learning_rate": 2.656042496679947e-06, |
| "loss": 2.7376, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.12391573729863693, |
| "grad_norm": 0.10767289996147156, |
| "learning_rate": 3.098716246126605e-06, |
| "loss": 2.7265, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.1416179854841565, |
| "grad_norm": 0.11828861385583878, |
| "learning_rate": 3.541389995573263e-06, |
| "loss": 2.7201, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.15932023366967604, |
| "grad_norm": 0.13154348731040955, |
| "learning_rate": 3.98406374501992e-06, |
| "loss": 2.7123, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.17702248185519562, |
| "grad_norm": 0.14556218683719635, |
| "learning_rate": 4.426737494466579e-06, |
| "loss": 2.7015, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.19472473004071517, |
| "grad_norm": 0.15351223945617676, |
| "learning_rate": 4.869411243913236e-06, |
| "loss": 2.6959, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.21242697822623474, |
| "grad_norm": 0.1721131056547165, |
| "learning_rate": 5.312084993359894e-06, |
| "loss": 2.6947, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.2301292264117543, |
| "grad_norm": 0.18679605424404144, |
| "learning_rate": 5.754758742806552e-06, |
| "loss": 2.6706, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.24783147459727387, |
| "grad_norm": 0.17619894444942474, |
| "learning_rate": 6.19743249225321e-06, |
| "loss": 2.6608, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.2655337227827934, |
| "grad_norm": 0.1907527893781662, |
| "learning_rate": 6.640106241699867e-06, |
| "loss": 2.6539, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.283235970968313, |
| "grad_norm": 0.20181308686733246, |
| "learning_rate": 7.082779991146526e-06, |
| "loss": 2.6598, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.3009382191538325, |
| "grad_norm": 0.21578721702098846, |
| "learning_rate": 7.525453740593184e-06, |
| "loss": 2.6435, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.3186404673393521, |
| "grad_norm": 0.20917312800884247, |
| "learning_rate": 7.96812749003984e-06, |
| "loss": 2.6344, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.33634271552487166, |
| "grad_norm": 0.23269931972026825, |
| "learning_rate": 8.4108012394865e-06, |
| "loss": 2.6365, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.35404496371039124, |
| "grad_norm": 0.2302529662847519, |
| "learning_rate": 8.853474988933158e-06, |
| "loss": 2.6323, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.37174721189591076, |
| "grad_norm": 0.25494036078453064, |
| "learning_rate": 9.296148738379815e-06, |
| "loss": 2.6171, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.38944946008143033, |
| "grad_norm": 0.2526334822177887, |
| "learning_rate": 9.738822487826472e-06, |
| "loss": 2.6152, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.4071517082669499, |
| "grad_norm": 0.2761424779891968, |
| "learning_rate": 1.0181496237273129e-05, |
| "loss": 2.6144, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.4248539564524695, |
| "grad_norm": 0.2666952908039093, |
| "learning_rate": 1.0624169986719787e-05, |
| "loss": 2.596, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.442556204637989, |
| "grad_norm": 0.2540760338306427, |
| "learning_rate": 1.1066843736166446e-05, |
| "loss": 2.6047, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.4602584528235086, |
| "grad_norm": 0.28952619433403015, |
| "learning_rate": 1.1509517485613105e-05, |
| "loss": 2.584, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.47796070100902815, |
| "grad_norm": 0.25384747982025146, |
| "learning_rate": 1.1952191235059762e-05, |
| "loss": 2.5861, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.49566294919454773, |
| "grad_norm": 0.27219265699386597, |
| "learning_rate": 1.239486498450642e-05, |
| "loss": 2.5773, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.5133651973800673, |
| "grad_norm": 0.27737173438072205, |
| "learning_rate": 1.2837538733953077e-05, |
| "loss": 2.5735, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.5310674455655868, |
| "grad_norm": 0.2793057858943939, |
| "learning_rate": 1.3280212483399734e-05, |
| "loss": 2.5717, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.5487696937511064, |
| "grad_norm": 0.2786986231803894, |
| "learning_rate": 1.3722886232846393e-05, |
| "loss": 2.5779, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.566471941936626, |
| "grad_norm": 0.267103910446167, |
| "learning_rate": 1.4165559982293052e-05, |
| "loss": 2.5569, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.5841741901221456, |
| "grad_norm": 0.30275553464889526, |
| "learning_rate": 1.4608233731739709e-05, |
| "loss": 2.5662, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.601876438307665, |
| "grad_norm": 0.2926766276359558, |
| "learning_rate": 1.5050907481186367e-05, |
| "loss": 2.5539, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.6195786864931846, |
| "grad_norm": 0.2998274862766266, |
| "learning_rate": 1.5493581230633026e-05, |
| "loss": 2.5592, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.6372809346787042, |
| "grad_norm": 0.2824733853340149, |
| "learning_rate": 1.593625498007968e-05, |
| "loss": 2.5652, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.6549831828642237, |
| "grad_norm": 0.33311328291893005, |
| "learning_rate": 1.637892872952634e-05, |
| "loss": 2.551, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.6726854310497433, |
| "grad_norm": 0.321186363697052, |
| "learning_rate": 1.6821602478973e-05, |
| "loss": 2.55, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.6903876792352629, |
| "grad_norm": 0.33593156933784485, |
| "learning_rate": 1.7264276228419657e-05, |
| "loss": 2.5468, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.7080899274207825, |
| "grad_norm": 0.32341769337654114, |
| "learning_rate": 1.7706949977866316e-05, |
| "loss": 2.5465, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.725792175606302, |
| "grad_norm": 0.3142276108264923, |
| "learning_rate": 1.814962372731297e-05, |
| "loss": 2.5291, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.7434944237918215, |
| "grad_norm": 0.3352969288825989, |
| "learning_rate": 1.859229747675963e-05, |
| "loss": 2.5193, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.7611966719773411, |
| "grad_norm": 0.31670665740966797, |
| "learning_rate": 1.903497122620629e-05, |
| "loss": 2.5263, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.7788989201628607, |
| "grad_norm": 0.336976021528244, |
| "learning_rate": 1.9477644975652944e-05, |
| "loss": 2.5253, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.7966011683483802, |
| "grad_norm": 0.3269643783569336, |
| "learning_rate": 1.9920318725099602e-05, |
| "loss": 2.5182, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.8143034165338998, |
| "grad_norm": 0.3271143436431885, |
| "learning_rate": 1.9741718189189488e-05, |
| "loss": 2.5278, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.8320056647194194, |
| "grad_norm": 0.3091905415058136, |
| "learning_rate": 1.8749113254181498e-05, |
| "loss": 2.5208, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.849707912904939, |
| "grad_norm": 0.33515864610671997, |
| "learning_rate": 1.7085783500963825e-05, |
| "loss": 2.5139, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.8674101610904585, |
| "grad_norm": 0.33183321356773376, |
| "learning_rate": 1.487924317171598e-05, |
| "loss": 2.5108, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.885112409275978, |
| "grad_norm": 0.32752835750579834, |
| "learning_rate": 1.2298650136294059e-05, |
| "loss": 2.5113, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.9028146574614976, |
| "grad_norm": 0.3689501881599426, |
| "learning_rate": 9.541837905851817e-06, |
| "loss": 2.5141, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.9205169056470172, |
| "grad_norm": 0.3316275179386139, |
| "learning_rate": 6.8201493134721215e-06, |
| "loss": 2.5211, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.9382191538325367, |
| "grad_norm": 0.36550265550613403, |
| "learning_rate": 4.342234542700692e-06, |
| "loss": 2.5125, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.9559214020180563, |
| "grad_norm": 0.30869239568710327, |
| "learning_rate": 2.2980555800703273e-06, |
| "loss": 2.4942, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.9736236502035759, |
| "grad_norm": 0.32581713795661926, |
| "learning_rate": 8.443233428209019e-07, |
| "loss": 2.5019, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.9913258983890955, |
| "grad_norm": 0.3429378569126129, |
| "learning_rate": 9.248390122572615e-08, |
| "loss": 2.5082, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 5649, |
| "total_flos": 8.231609035317576e+17, |
| "train_loss": 2.6019921150011687, |
| "train_runtime": 1598.815, |
| "train_samples_per_second": 56.525, |
| "train_steps_per_second": 3.533 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 5649, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.231609035317576e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|