{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 472, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0211864406779661, "grad_norm": 4.637205123901367, "learning_rate": 4.904661016949153e-05, "loss": 3.0019, "step": 10 }, { "epoch": 0.0423728813559322, "grad_norm": 4.062796115875244, "learning_rate": 4.7987288135593225e-05, "loss": 2.6502, "step": 20 }, { "epoch": 0.0635593220338983, "grad_norm": 3.6472411155700684, "learning_rate": 4.692796610169492e-05, "loss": 2.4454, "step": 30 }, { "epoch": 0.0847457627118644, "grad_norm": 3.438002347946167, "learning_rate": 4.5868644067796616e-05, "loss": 2.2664, "step": 40 }, { "epoch": 0.1059322033898305, "grad_norm": 3.674481153488159, "learning_rate": 4.480932203389831e-05, "loss": 2.3392, "step": 50 }, { "epoch": 0.1271186440677966, "grad_norm": 4.97629976272583, "learning_rate": 4.375e-05, "loss": 2.192, "step": 60 }, { "epoch": 0.1483050847457627, "grad_norm": 3.4427733421325684, "learning_rate": 4.2690677966101695e-05, "loss": 2.1182, "step": 70 }, { "epoch": 0.1694915254237288, "grad_norm": 3.9113576412200928, "learning_rate": 4.163135593220339e-05, "loss": 2.1282, "step": 80 }, { "epoch": 0.1906779661016949, "grad_norm": 3.6016507148742676, "learning_rate": 4.0572033898305086e-05, "loss": 2.1691, "step": 90 }, { "epoch": 0.211864406779661, "grad_norm": 3.363358974456787, "learning_rate": 3.951271186440678e-05, "loss": 2.0797, "step": 100 }, { "epoch": 0.2330508474576271, "grad_norm": 3.2457995414733887, "learning_rate": 3.8453389830508476e-05, "loss": 2.219, "step": 110 }, { "epoch": 0.2542372881355932, "grad_norm": 3.339585065841675, "learning_rate": 3.739406779661017e-05, "loss": 2.1114, "step": 120 }, { "epoch": 0.2754237288135593, "grad_norm": 3.4949917793273926, "learning_rate": 3.633474576271187e-05, "loss": 1.966, "step": 130 }, { "epoch": 0.2966101694915254, "grad_norm": 3.223611354827881, "learning_rate": 3.527542372881356e-05, "loss": 2.1414, "step": 140 }, { "epoch": 0.3177966101694915, "grad_norm": 3.4801230430603027, "learning_rate": 3.421610169491525e-05, "loss": 2.0981, "step": 150 }, { "epoch": 0.3389830508474576, "grad_norm": 3.30033540725708, "learning_rate": 3.315677966101695e-05, "loss": 1.9982, "step": 160 }, { "epoch": 0.3601694915254237, "grad_norm": 3.305997610092163, "learning_rate": 3.209745762711864e-05, "loss": 1.9845, "step": 170 }, { "epoch": 0.3813559322033898, "grad_norm": 3.4289534091949463, "learning_rate": 3.1038135593220344e-05, "loss": 2.0987, "step": 180 }, { "epoch": 0.4025423728813559, "grad_norm": 3.018153190612793, "learning_rate": 2.9978813559322032e-05, "loss": 2.0208, "step": 190 }, { "epoch": 0.423728813559322, "grad_norm": 3.609093427658081, "learning_rate": 2.891949152542373e-05, "loss": 1.996, "step": 200 }, { "epoch": 0.4449152542372881, "grad_norm": 4.0159220695495605, "learning_rate": 2.7860169491525423e-05, "loss": 1.9612, "step": 210 }, { "epoch": 0.4661016949152542, "grad_norm": 3.264458179473877, "learning_rate": 2.6800847457627122e-05, "loss": 2.0203, "step": 220 }, { "epoch": 0.4872881355932203, "grad_norm": 3.696259021759033, "learning_rate": 2.5741525423728814e-05, "loss": 2.0867, "step": 230 }, { "epoch": 0.5084745762711864, "grad_norm": 4.3933796882629395, "learning_rate": 2.468220338983051e-05, "loss": 1.9675, "step": 240 }, { "epoch": 0.5296610169491526, "grad_norm": 3.876000165939331, "learning_rate": 2.3622881355932204e-05, "loss": 2.0325, "step": 250 }, { "epoch": 0.5508474576271186, "grad_norm": 3.8333613872528076, "learning_rate": 2.25635593220339e-05, "loss": 1.9674, "step": 260 }, { "epoch": 0.5720338983050848, "grad_norm": 3.398927688598633, "learning_rate": 2.1504237288135595e-05, "loss": 1.9873, "step": 270 }, { "epoch": 0.5932203389830508, "grad_norm": 2.980912446975708, "learning_rate": 2.044491525423729e-05, "loss": 1.9447, "step": 280 }, { "epoch": 0.614406779661017, "grad_norm": 4.0269246101379395, "learning_rate": 1.9385593220338986e-05, "loss": 1.9553, "step": 290 }, { "epoch": 0.635593220338983, "grad_norm": 3.15983247756958, "learning_rate": 1.832627118644068e-05, "loss": 1.9081, "step": 300 }, { "epoch": 0.6567796610169492, "grad_norm": 4.2125935554504395, "learning_rate": 1.7266949152542373e-05, "loss": 1.9373, "step": 310 }, { "epoch": 0.6779661016949152, "grad_norm": 3.3405373096466064, "learning_rate": 1.620762711864407e-05, "loss": 1.8637, "step": 320 }, { "epoch": 0.6991525423728814, "grad_norm": 3.782801628112793, "learning_rate": 1.5148305084745764e-05, "loss": 2.029, "step": 330 }, { "epoch": 0.7203389830508474, "grad_norm": 3.6767022609710693, "learning_rate": 1.408898305084746e-05, "loss": 1.7768, "step": 340 }, { "epoch": 0.7415254237288136, "grad_norm": 3.3231544494628906, "learning_rate": 1.3029661016949155e-05, "loss": 1.9421, "step": 350 }, { "epoch": 0.7627118644067796, "grad_norm": 3.579037666320801, "learning_rate": 1.1970338983050848e-05, "loss": 1.8764, "step": 360 }, { "epoch": 0.7838983050847458, "grad_norm": 3.5025410652160645, "learning_rate": 1.0911016949152544e-05, "loss": 1.8414, "step": 370 }, { "epoch": 0.8050847457627118, "grad_norm": 4.006308078765869, "learning_rate": 9.851694915254237e-06, "loss": 1.8852, "step": 380 }, { "epoch": 0.826271186440678, "grad_norm": 3.3026678562164307, "learning_rate": 8.792372881355933e-06, "loss": 1.9024, "step": 390 }, { "epoch": 0.847457627118644, "grad_norm": 3.574389696121216, "learning_rate": 7.733050847457628e-06, "loss": 1.8935, "step": 400 }, { "epoch": 0.8686440677966102, "grad_norm": 3.5349714756011963, "learning_rate": 6.6737288135593225e-06, "loss": 1.8763, "step": 410 }, { "epoch": 0.8898305084745762, "grad_norm": 3.3055741786956787, "learning_rate": 5.614406779661018e-06, "loss": 1.9133, "step": 420 }, { "epoch": 0.9110169491525424, "grad_norm": 5.87731409072876, "learning_rate": 4.5550847457627115e-06, "loss": 1.9598, "step": 430 }, { "epoch": 0.9322033898305084, "grad_norm": 3.8937673568725586, "learning_rate": 3.495762711864407e-06, "loss": 1.8699, "step": 440 }, { "epoch": 0.9533898305084746, "grad_norm": 3.4998295307159424, "learning_rate": 2.436440677966102e-06, "loss": 1.9595, "step": 450 }, { "epoch": 0.9745762711864406, "grad_norm": 3.4720184803009033, "learning_rate": 1.3771186440677967e-06, "loss": 1.8245, "step": 460 }, { "epoch": 0.9957627118644068, "grad_norm": 3.557469367980957, "learning_rate": 3.1779661016949154e-07, "loss": 2.0716, "step": 470 } ], "logging_steps": 10, "max_steps": 472, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1823897464012800.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }