| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 472, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0211864406779661, | |
| "grad_norm": 4.637205123901367, | |
| "learning_rate": 4.904661016949153e-05, | |
| "loss": 3.0019, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0423728813559322, | |
| "grad_norm": 4.062796115875244, | |
| "learning_rate": 4.7987288135593225e-05, | |
| "loss": 2.6502, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0635593220338983, | |
| "grad_norm": 3.6472411155700684, | |
| "learning_rate": 4.692796610169492e-05, | |
| "loss": 2.4454, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0847457627118644, | |
| "grad_norm": 3.438002347946167, | |
| "learning_rate": 4.5868644067796616e-05, | |
| "loss": 2.2664, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1059322033898305, | |
| "grad_norm": 3.674481153488159, | |
| "learning_rate": 4.480932203389831e-05, | |
| "loss": 2.3392, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1271186440677966, | |
| "grad_norm": 4.97629976272583, | |
| "learning_rate": 4.375e-05, | |
| "loss": 2.192, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1483050847457627, | |
| "grad_norm": 3.4427733421325684, | |
| "learning_rate": 4.2690677966101695e-05, | |
| "loss": 2.1182, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1694915254237288, | |
| "grad_norm": 3.9113576412200928, | |
| "learning_rate": 4.163135593220339e-05, | |
| "loss": 2.1282, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1906779661016949, | |
| "grad_norm": 3.6016507148742676, | |
| "learning_rate": 4.0572033898305086e-05, | |
| "loss": 2.1691, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.211864406779661, | |
| "grad_norm": 3.363358974456787, | |
| "learning_rate": 3.951271186440678e-05, | |
| "loss": 2.0797, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2330508474576271, | |
| "grad_norm": 3.2457995414733887, | |
| "learning_rate": 3.8453389830508476e-05, | |
| "loss": 2.219, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2542372881355932, | |
| "grad_norm": 3.339585065841675, | |
| "learning_rate": 3.739406779661017e-05, | |
| "loss": 2.1114, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2754237288135593, | |
| "grad_norm": 3.4949917793273926, | |
| "learning_rate": 3.633474576271187e-05, | |
| "loss": 1.966, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2966101694915254, | |
| "grad_norm": 3.223611354827881, | |
| "learning_rate": 3.527542372881356e-05, | |
| "loss": 2.1414, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3177966101694915, | |
| "grad_norm": 3.4801230430603027, | |
| "learning_rate": 3.421610169491525e-05, | |
| "loss": 2.0981, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3389830508474576, | |
| "grad_norm": 3.30033540725708, | |
| "learning_rate": 3.315677966101695e-05, | |
| "loss": 1.9982, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3601694915254237, | |
| "grad_norm": 3.305997610092163, | |
| "learning_rate": 3.209745762711864e-05, | |
| "loss": 1.9845, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3813559322033898, | |
| "grad_norm": 3.4289534091949463, | |
| "learning_rate": 3.1038135593220344e-05, | |
| "loss": 2.0987, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4025423728813559, | |
| "grad_norm": 3.018153190612793, | |
| "learning_rate": 2.9978813559322032e-05, | |
| "loss": 2.0208, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.423728813559322, | |
| "grad_norm": 3.609093427658081, | |
| "learning_rate": 2.891949152542373e-05, | |
| "loss": 1.996, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4449152542372881, | |
| "grad_norm": 4.0159220695495605, | |
| "learning_rate": 2.7860169491525423e-05, | |
| "loss": 1.9612, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4661016949152542, | |
| "grad_norm": 3.264458179473877, | |
| "learning_rate": 2.6800847457627122e-05, | |
| "loss": 2.0203, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4872881355932203, | |
| "grad_norm": 3.696259021759033, | |
| "learning_rate": 2.5741525423728814e-05, | |
| "loss": 2.0867, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5084745762711864, | |
| "grad_norm": 4.3933796882629395, | |
| "learning_rate": 2.468220338983051e-05, | |
| "loss": 1.9675, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5296610169491526, | |
| "grad_norm": 3.876000165939331, | |
| "learning_rate": 2.3622881355932204e-05, | |
| "loss": 2.0325, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5508474576271186, | |
| "grad_norm": 3.8333613872528076, | |
| "learning_rate": 2.25635593220339e-05, | |
| "loss": 1.9674, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5720338983050848, | |
| "grad_norm": 3.398927688598633, | |
| "learning_rate": 2.1504237288135595e-05, | |
| "loss": 1.9873, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5932203389830508, | |
| "grad_norm": 2.980912446975708, | |
| "learning_rate": 2.044491525423729e-05, | |
| "loss": 1.9447, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.614406779661017, | |
| "grad_norm": 4.0269246101379395, | |
| "learning_rate": 1.9385593220338986e-05, | |
| "loss": 1.9553, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.635593220338983, | |
| "grad_norm": 3.15983247756958, | |
| "learning_rate": 1.832627118644068e-05, | |
| "loss": 1.9081, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6567796610169492, | |
| "grad_norm": 4.2125935554504395, | |
| "learning_rate": 1.7266949152542373e-05, | |
| "loss": 1.9373, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 3.3405373096466064, | |
| "learning_rate": 1.620762711864407e-05, | |
| "loss": 1.8637, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6991525423728814, | |
| "grad_norm": 3.782801628112793, | |
| "learning_rate": 1.5148305084745764e-05, | |
| "loss": 2.029, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7203389830508474, | |
| "grad_norm": 3.6767022609710693, | |
| "learning_rate": 1.408898305084746e-05, | |
| "loss": 1.7768, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7415254237288136, | |
| "grad_norm": 3.3231544494628906, | |
| "learning_rate": 1.3029661016949155e-05, | |
| "loss": 1.9421, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7627118644067796, | |
| "grad_norm": 3.579037666320801, | |
| "learning_rate": 1.1970338983050848e-05, | |
| "loss": 1.8764, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7838983050847458, | |
| "grad_norm": 3.5025410652160645, | |
| "learning_rate": 1.0911016949152544e-05, | |
| "loss": 1.8414, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8050847457627118, | |
| "grad_norm": 4.006308078765869, | |
| "learning_rate": 9.851694915254237e-06, | |
| "loss": 1.8852, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.826271186440678, | |
| "grad_norm": 3.3026678562164307, | |
| "learning_rate": 8.792372881355933e-06, | |
| "loss": 1.9024, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.847457627118644, | |
| "grad_norm": 3.574389696121216, | |
| "learning_rate": 7.733050847457628e-06, | |
| "loss": 1.8935, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8686440677966102, | |
| "grad_norm": 3.5349714756011963, | |
| "learning_rate": 6.6737288135593225e-06, | |
| "loss": 1.8763, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8898305084745762, | |
| "grad_norm": 3.3055741786956787, | |
| "learning_rate": 5.614406779661018e-06, | |
| "loss": 1.9133, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9110169491525424, | |
| "grad_norm": 5.87731409072876, | |
| "learning_rate": 4.5550847457627115e-06, | |
| "loss": 1.9598, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9322033898305084, | |
| "grad_norm": 3.8937673568725586, | |
| "learning_rate": 3.495762711864407e-06, | |
| "loss": 1.8699, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9533898305084746, | |
| "grad_norm": 3.4998295307159424, | |
| "learning_rate": 2.436440677966102e-06, | |
| "loss": 1.9595, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9745762711864406, | |
| "grad_norm": 3.4720184803009033, | |
| "learning_rate": 1.3771186440677967e-06, | |
| "loss": 1.8245, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9957627118644068, | |
| "grad_norm": 3.557469367980957, | |
| "learning_rate": 3.1779661016949154e-07, | |
| "loss": 2.0716, | |
| "step": 470 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 472, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1823897464012800.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |