{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 235, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02127659574468085, "grad_norm": 1.2565886974334717, "learning_rate": 2.033898305084746e-06, "loss": 1.2616, "step": 5 }, { "epoch": 0.0425531914893617, "grad_norm": 0.6704114675521851, "learning_rate": 4.576271186440678e-06, "loss": 1.3138, "step": 10 }, { "epoch": 0.06382978723404255, "grad_norm": 0.6389040946960449, "learning_rate": 7.1186440677966106e-06, "loss": 1.2464, "step": 15 }, { "epoch": 0.0851063829787234, "grad_norm": 0.5411674976348877, "learning_rate": 9.661016949152542e-06, "loss": 1.1941, "step": 20 }, { "epoch": 0.10638297872340426, "grad_norm": 0.5558146238327026, "learning_rate": 1.2203389830508475e-05, "loss": 1.2202, "step": 25 }, { "epoch": 0.1276595744680851, "grad_norm": 0.6425330638885498, "learning_rate": 1.4745762711864408e-05, "loss": 1.2116, "step": 30 }, { "epoch": 0.14893617021276595, "grad_norm": 0.5123224258422852, "learning_rate": 1.728813559322034e-05, "loss": 1.1589, "step": 35 }, { "epoch": 0.1702127659574468, "grad_norm": 0.563200831413269, "learning_rate": 1.983050847457627e-05, "loss": 1.1442, "step": 40 }, { "epoch": 0.19148936170212766, "grad_norm": 0.4202119708061218, "learning_rate": 2.2372881355932205e-05, "loss": 1.1202, "step": 45 }, { "epoch": 0.2127659574468085, "grad_norm": 0.48950880765914917, "learning_rate": 2.4915254237288138e-05, "loss": 1.1672, "step": 50 }, { "epoch": 0.23404255319148937, "grad_norm": 0.42279088497161865, "learning_rate": 2.7457627118644068e-05, "loss": 1.0812, "step": 55 }, { "epoch": 0.2553191489361702, "grad_norm": 0.48940858244895935, "learning_rate": 3e-05, "loss": 1.1478, "step": 60 }, { "epoch": 0.2765957446808511, "grad_norm": 0.43859195709228516, "learning_rate": 2.9998514182537154e-05, "loss": 1.0574, "step": 65 }, { "epoch": 0.2978723404255319, "grad_norm": 0.506537139415741, "learning_rate": 2.9994057024502427e-05, "loss": 1.1242, "step": 70 }, { "epoch": 0.3191489361702128, "grad_norm": 0.5277345180511475, "learning_rate": 2.998662940889891e-05, "loss": 1.0275, "step": 75 }, { "epoch": 0.3404255319148936, "grad_norm": 0.5273380279541016, "learning_rate": 2.9976232807204073e-05, "loss": 1.0244, "step": 80 }, { "epoch": 0.3617021276595745, "grad_norm": 0.6024174690246582, "learning_rate": 2.9962869279078226e-05, "loss": 0.9984, "step": 85 }, { "epoch": 0.3829787234042553, "grad_norm": 0.6643021106719971, "learning_rate": 2.9946541471956496e-05, "loss": 0.9743, "step": 90 }, { "epoch": 0.40425531914893614, "grad_norm": 0.6192746758460999, "learning_rate": 2.9927252620524346e-05, "loss": 1.0283, "step": 95 }, { "epoch": 0.425531914893617, "grad_norm": 0.6014237999916077, "learning_rate": 2.9905006546076746e-05, "loss": 0.9382, "step": 100 }, { "epoch": 0.44680851063829785, "grad_norm": 0.6564805507659912, "learning_rate": 2.9879807655761145e-05, "loss": 1.0015, "step": 105 }, { "epoch": 0.46808510638297873, "grad_norm": 0.5979855060577393, "learning_rate": 2.985166094170439e-05, "loss": 0.9551, "step": 110 }, { "epoch": 0.48936170212765956, "grad_norm": 0.7205618619918823, "learning_rate": 2.9820571980023704e-05, "loss": 0.9, "step": 115 }, { "epoch": 0.5106382978723404, "grad_norm": 0.7843464016914368, "learning_rate": 2.9786546929722055e-05, "loss": 0.9238, "step": 120 }, { "epoch": 0.5319148936170213, "grad_norm": 0.7814379930496216, "learning_rate": 2.974959253146796e-05, "loss": 0.8699, "step": 125 }, { "epoch": 0.5531914893617021, "grad_norm": 0.7265343070030212, "learning_rate": 2.9709716106260115e-05, "loss": 0.8573, "step": 130 }, { "epoch": 0.574468085106383, "grad_norm": 0.7019990086555481, "learning_rate": 2.966692555397705e-05, "loss": 0.8643, "step": 135 }, { "epoch": 0.5957446808510638, "grad_norm": 0.6668793559074402, "learning_rate": 2.962122935181207e-05, "loss": 0.8744, "step": 140 }, { "epoch": 0.6170212765957447, "grad_norm": 0.8258264064788818, "learning_rate": 2.957263655259387e-05, "loss": 0.8215, "step": 145 }, { "epoch": 0.6382978723404256, "grad_norm": 0.909509003162384, "learning_rate": 2.9521156782993066e-05, "loss": 0.8543, "step": 150 }, { "epoch": 0.6595744680851063, "grad_norm": 0.8529114723205566, "learning_rate": 2.9466800241615075e-05, "loss": 0.8333, "step": 155 }, { "epoch": 0.6808510638297872, "grad_norm": 0.821320652961731, "learning_rate": 2.940957769697969e-05, "loss": 0.824, "step": 160 }, { "epoch": 0.7021276595744681, "grad_norm": 0.8300550580024719, "learning_rate": 2.9349500485387718e-05, "loss": 0.7846, "step": 165 }, { "epoch": 0.723404255319149, "grad_norm": 0.8043350577354431, "learning_rate": 2.9286580508675174e-05, "loss": 0.7491, "step": 170 }, { "epoch": 0.7446808510638298, "grad_norm": 0.9453855156898499, "learning_rate": 2.9220830231855417e-05, "loss": 0.7455, "step": 175 }, { "epoch": 0.7659574468085106, "grad_norm": 0.9748790264129639, "learning_rate": 2.9152262680649704e-05, "loss": 0.7643, "step": 180 }, { "epoch": 0.7872340425531915, "grad_norm": 0.9387068152427673, "learning_rate": 2.90808914389067e-05, "loss": 0.7152, "step": 185 }, { "epoch": 0.8085106382978723, "grad_norm": 0.9829296469688416, "learning_rate": 2.900673064591139e-05, "loss": 0.7687, "step": 190 }, { "epoch": 0.8297872340425532, "grad_norm": 0.8986831903457642, "learning_rate": 2.8929794993583937e-05, "loss": 0.7267, "step": 195 }, { "epoch": 0.851063829787234, "grad_norm": 0.9627141952514648, "learning_rate": 2.8850099723569104e-05, "loss": 0.7198, "step": 200 }, { "epoch": 0.8723404255319149, "grad_norm": 1.081162691116333, "learning_rate": 2.876766062421675e-05, "loss": 0.7497, "step": 205 }, { "epoch": 0.8936170212765957, "grad_norm": 1.0189602375030518, "learning_rate": 2.8682494027454e-05, "loss": 0.6205, "step": 210 }, { "epoch": 0.9148936170212766, "grad_norm": 1.164552092552185, "learning_rate": 2.8594616805549752e-05, "loss": 0.6354, "step": 215 }, { "epoch": 0.9361702127659575, "grad_norm": 0.9463388919830322, "learning_rate": 2.8504046367772117e-05, "loss": 0.6468, "step": 220 }, { "epoch": 0.9574468085106383, "grad_norm": 0.9885657429695129, "learning_rate": 2.8410800656939512e-05, "loss": 0.6339, "step": 225 }, { "epoch": 0.9787234042553191, "grad_norm": 1.0227285623550415, "learning_rate": 2.8314898145865996e-05, "loss": 0.5977, "step": 230 }, { "epoch": 1.0, "grad_norm": 1.0185871124267578, "learning_rate": 2.8216357833701667e-05, "loss": 0.6557, "step": 235 } ], "logging_steps": 5, "max_steps": 1175, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.3767294444660326e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }