| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 235, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02127659574468085, | |
| "grad_norm": 1.2565886974334717, | |
| "learning_rate": 2.033898305084746e-06, | |
| "loss": 1.2616, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0425531914893617, | |
| "grad_norm": 0.6704114675521851, | |
| "learning_rate": 4.576271186440678e-06, | |
| "loss": 1.3138, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06382978723404255, | |
| "grad_norm": 0.6389040946960449, | |
| "learning_rate": 7.1186440677966106e-06, | |
| "loss": 1.2464, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0851063829787234, | |
| "grad_norm": 0.5411674976348877, | |
| "learning_rate": 9.661016949152542e-06, | |
| "loss": 1.1941, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10638297872340426, | |
| "grad_norm": 0.5558146238327026, | |
| "learning_rate": 1.2203389830508475e-05, | |
| "loss": 1.2202, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1276595744680851, | |
| "grad_norm": 0.6425330638885498, | |
| "learning_rate": 1.4745762711864408e-05, | |
| "loss": 1.2116, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14893617021276595, | |
| "grad_norm": 0.5123224258422852, | |
| "learning_rate": 1.728813559322034e-05, | |
| "loss": 1.1589, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.1702127659574468, | |
| "grad_norm": 0.563200831413269, | |
| "learning_rate": 1.983050847457627e-05, | |
| "loss": 1.1442, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.19148936170212766, | |
| "grad_norm": 0.4202119708061218, | |
| "learning_rate": 2.2372881355932205e-05, | |
| "loss": 1.1202, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.2127659574468085, | |
| "grad_norm": 0.48950880765914917, | |
| "learning_rate": 2.4915254237288138e-05, | |
| "loss": 1.1672, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.23404255319148937, | |
| "grad_norm": 0.42279088497161865, | |
| "learning_rate": 2.7457627118644068e-05, | |
| "loss": 1.0812, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.2553191489361702, | |
| "grad_norm": 0.48940858244895935, | |
| "learning_rate": 3e-05, | |
| "loss": 1.1478, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2765957446808511, | |
| "grad_norm": 0.43859195709228516, | |
| "learning_rate": 2.9998514182537154e-05, | |
| "loss": 1.0574, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.2978723404255319, | |
| "grad_norm": 0.506537139415741, | |
| "learning_rate": 2.9994057024502427e-05, | |
| "loss": 1.1242, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3191489361702128, | |
| "grad_norm": 0.5277345180511475, | |
| "learning_rate": 2.998662940889891e-05, | |
| "loss": 1.0275, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.3404255319148936, | |
| "grad_norm": 0.5273380279541016, | |
| "learning_rate": 2.9976232807204073e-05, | |
| "loss": 1.0244, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3617021276595745, | |
| "grad_norm": 0.6024174690246582, | |
| "learning_rate": 2.9962869279078226e-05, | |
| "loss": 0.9984, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.3829787234042553, | |
| "grad_norm": 0.6643021106719971, | |
| "learning_rate": 2.9946541471956496e-05, | |
| "loss": 0.9743, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.40425531914893614, | |
| "grad_norm": 0.6192746758460999, | |
| "learning_rate": 2.9927252620524346e-05, | |
| "loss": 1.0283, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.425531914893617, | |
| "grad_norm": 0.6014237999916077, | |
| "learning_rate": 2.9905006546076746e-05, | |
| "loss": 0.9382, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.44680851063829785, | |
| "grad_norm": 0.6564805507659912, | |
| "learning_rate": 2.9879807655761145e-05, | |
| "loss": 1.0015, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.46808510638297873, | |
| "grad_norm": 0.5979855060577393, | |
| "learning_rate": 2.985166094170439e-05, | |
| "loss": 0.9551, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.48936170212765956, | |
| "grad_norm": 0.7205618619918823, | |
| "learning_rate": 2.9820571980023704e-05, | |
| "loss": 0.9, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5106382978723404, | |
| "grad_norm": 0.7843464016914368, | |
| "learning_rate": 2.9786546929722055e-05, | |
| "loss": 0.9238, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5319148936170213, | |
| "grad_norm": 0.7814379930496216, | |
| "learning_rate": 2.974959253146796e-05, | |
| "loss": 0.8699, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5531914893617021, | |
| "grad_norm": 0.7265343070030212, | |
| "learning_rate": 2.9709716106260115e-05, | |
| "loss": 0.8573, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.574468085106383, | |
| "grad_norm": 0.7019990086555481, | |
| "learning_rate": 2.966692555397705e-05, | |
| "loss": 0.8643, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.5957446808510638, | |
| "grad_norm": 0.6668793559074402, | |
| "learning_rate": 2.962122935181207e-05, | |
| "loss": 0.8744, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6170212765957447, | |
| "grad_norm": 0.8258264064788818, | |
| "learning_rate": 2.957263655259387e-05, | |
| "loss": 0.8215, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6382978723404256, | |
| "grad_norm": 0.909509003162384, | |
| "learning_rate": 2.9521156782993066e-05, | |
| "loss": 0.8543, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6595744680851063, | |
| "grad_norm": 0.8529114723205566, | |
| "learning_rate": 2.9466800241615075e-05, | |
| "loss": 0.8333, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6808510638297872, | |
| "grad_norm": 0.821320652961731, | |
| "learning_rate": 2.940957769697969e-05, | |
| "loss": 0.824, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7021276595744681, | |
| "grad_norm": 0.8300550580024719, | |
| "learning_rate": 2.9349500485387718e-05, | |
| "loss": 0.7846, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.723404255319149, | |
| "grad_norm": 0.8043350577354431, | |
| "learning_rate": 2.9286580508675174e-05, | |
| "loss": 0.7491, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7446808510638298, | |
| "grad_norm": 0.9453855156898499, | |
| "learning_rate": 2.9220830231855417e-05, | |
| "loss": 0.7455, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7659574468085106, | |
| "grad_norm": 0.9748790264129639, | |
| "learning_rate": 2.9152262680649704e-05, | |
| "loss": 0.7643, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7872340425531915, | |
| "grad_norm": 0.9387068152427673, | |
| "learning_rate": 2.90808914389067e-05, | |
| "loss": 0.7152, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.8085106382978723, | |
| "grad_norm": 0.9829296469688416, | |
| "learning_rate": 2.900673064591139e-05, | |
| "loss": 0.7687, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8297872340425532, | |
| "grad_norm": 0.8986831903457642, | |
| "learning_rate": 2.8929794993583937e-05, | |
| "loss": 0.7267, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.851063829787234, | |
| "grad_norm": 0.9627141952514648, | |
| "learning_rate": 2.8850099723569104e-05, | |
| "loss": 0.7198, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8723404255319149, | |
| "grad_norm": 1.081162691116333, | |
| "learning_rate": 2.876766062421675e-05, | |
| "loss": 0.7497, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.8936170212765957, | |
| "grad_norm": 1.0189602375030518, | |
| "learning_rate": 2.8682494027454e-05, | |
| "loss": 0.6205, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9148936170212766, | |
| "grad_norm": 1.164552092552185, | |
| "learning_rate": 2.8594616805549752e-05, | |
| "loss": 0.6354, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.9361702127659575, | |
| "grad_norm": 0.9463388919830322, | |
| "learning_rate": 2.8504046367772117e-05, | |
| "loss": 0.6468, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9574468085106383, | |
| "grad_norm": 0.9885657429695129, | |
| "learning_rate": 2.8410800656939512e-05, | |
| "loss": 0.6339, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.9787234042553191, | |
| "grad_norm": 1.0227285623550415, | |
| "learning_rate": 2.8314898145865996e-05, | |
| "loss": 0.5977, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.0185871124267578, | |
| "learning_rate": 2.8216357833701667e-05, | |
| "loss": 0.6557, | |
| "step": 235 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1175, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.3767294444660326e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |