| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 10146, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.14784151389710232, | |
| "grad_norm": 3.7569432258605957, | |
| "learning_rate": 1.9016361127537947e-05, | |
| "loss": 0.156098388671875, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.29568302779420463, | |
| "grad_norm": 0.009413833729922771, | |
| "learning_rate": 1.8030751034890598e-05, | |
| "loss": 0.09492064666748047, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4435245416913069, | |
| "grad_norm": 0.07881546020507812, | |
| "learning_rate": 1.704514094224325e-05, | |
| "loss": 0.07839873504638672, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5913660555884093, | |
| "grad_norm": 0.015427447855472565, | |
| "learning_rate": 1.6059530849595903e-05, | |
| "loss": 0.07814859008789063, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7392075694855116, | |
| "grad_norm": 14.57061767578125, | |
| "learning_rate": 1.5073920756948552e-05, | |
| "loss": 0.08136223602294922, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.8870490833826138, | |
| "grad_norm": 1.0001031160354614, | |
| "learning_rate": 1.4088310664301204e-05, | |
| "loss": 0.07243869018554687, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.0348905972797162, | |
| "grad_norm": 30.364643096923828, | |
| "learning_rate": 1.3102700571653855e-05, | |
| "loss": 0.04469930648803711, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.1827321111768185, | |
| "grad_norm": 0.0182269848883152, | |
| "learning_rate": 1.2117090479006506e-05, | |
| "loss": 0.03440779495239258, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.3305736250739209, | |
| "grad_norm": 0.0005539056146517396, | |
| "learning_rate": 1.1131480386359156e-05, | |
| "loss": 0.0230482120513916, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.4784151389710232, | |
| "grad_norm": 0.739719033241272, | |
| "learning_rate": 1.0145870293711809e-05, | |
| "loss": 0.028042703628540038, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.6262566528681255, | |
| "grad_norm": 0.0063209934160113335, | |
| "learning_rate": 9.16026020106446e-06, | |
| "loss": 0.02548642349243164, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.7740981667652278, | |
| "grad_norm": 0.0015545282512903214, | |
| "learning_rate": 8.17465010841711e-06, | |
| "loss": 0.023311178207397462, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.9219396806623301, | |
| "grad_norm": 0.0024629898834973574, | |
| "learning_rate": 7.189040015769762e-06, | |
| "loss": 0.019577335357666016, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.0697811945594324, | |
| "grad_norm": 0.0016239744145423174, | |
| "learning_rate": 6.203429923122414e-06, | |
| "loss": 0.016429786682128907, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.2176227084565348, | |
| "grad_norm": 33.945587158203125, | |
| "learning_rate": 5.217819830475065e-06, | |
| "loss": 0.00910054111480713, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.365464222353637, | |
| "grad_norm": 0.0004062611551489681, | |
| "learning_rate": 4.2322097378277155e-06, | |
| "loss": 0.008913342475891112, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.5133057362507394, | |
| "grad_norm": 0.00030476730898953974, | |
| "learning_rate": 3.246599645180367e-06, | |
| "loss": 0.006344354152679444, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.6611472501478417, | |
| "grad_norm": 0.0006314264028333127, | |
| "learning_rate": 2.260989552533018e-06, | |
| "loss": 0.0064224090576171875, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.808988764044944, | |
| "grad_norm": 0.0005876660579815507, | |
| "learning_rate": 1.2753794598856695e-06, | |
| "loss": 0.007262358665466309, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.9568302779420463, | |
| "grad_norm": 0.0005308115505613387, | |
| "learning_rate": 2.897693672383205e-07, | |
| "loss": 0.004996685981750488, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 10146, | |
| "total_flos": 9.454816453507891e+16, | |
| "train_loss": 0.040564874114779687, | |
| "train_runtime": 20432.9457, | |
| "train_samples_per_second": 4.965, | |
| "train_steps_per_second": 0.497 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 10146, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.454816453507891e+16, | |
| "train_batch_size": 10, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |