| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 9610, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.5202913631633714, |
| "grad_norm": 1.687648057937622, |
| "learning_rate": 4.7403746097814776e-05, |
| "loss": 1.6767198486328125, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.0405827263267429, |
| "grad_norm": 1.4969497919082642, |
| "learning_rate": 4.480228928199792e-05, |
| "loss": 1.27803369140625, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.5608740894901145, |
| "grad_norm": 1.6620995998382568, |
| "learning_rate": 4.220083246618106e-05, |
| "loss": 1.0915474853515625, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.0811654526534857, |
| "grad_norm": 1.6661252975463867, |
| "learning_rate": 3.959937565036421e-05, |
| "loss": 0.9975990600585938, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.6014568158168574, |
| "grad_norm": 1.3287609815597534, |
| "learning_rate": 3.6997918834547346e-05, |
| "loss": 0.9085905151367187, |
| "step": 2500 |
| }, |
| { |
| "epoch": 3.121748178980229, |
| "grad_norm": 1.5800344944000244, |
| "learning_rate": 3.439646201873049e-05, |
| "loss": 0.8760296020507813, |
| "step": 3000 |
| }, |
| { |
| "epoch": 3.6420395421436003, |
| "grad_norm": 1.364020586013794, |
| "learning_rate": 3.179500520291364e-05, |
| "loss": 0.8017211303710937, |
| "step": 3500 |
| }, |
| { |
| "epoch": 4.1623309053069715, |
| "grad_norm": 1.3175318241119385, |
| "learning_rate": 2.9193548387096776e-05, |
| "loss": 0.7779053344726562, |
| "step": 4000 |
| }, |
| { |
| "epoch": 4.682622268470343, |
| "grad_norm": 4.449261665344238, |
| "learning_rate": 2.659209157127992e-05, |
| "loss": 0.7223885498046875, |
| "step": 4500 |
| }, |
| { |
| "epoch": 5.202913631633715, |
| "grad_norm": 1.5468088388442993, |
| "learning_rate": 2.3990634755463058e-05, |
| "loss": 0.6987474365234375, |
| "step": 5000 |
| }, |
| { |
| "epoch": 5.723204994797086, |
| "grad_norm": 1.247883677482605, |
| "learning_rate": 2.13891779396462e-05, |
| "loss": 0.6626260986328125, |
| "step": 5500 |
| }, |
| { |
| "epoch": 6.243496357960458, |
| "grad_norm": 1.5333998203277588, |
| "learning_rate": 1.8787721123829346e-05, |
| "loss": 0.6409296875, |
| "step": 6000 |
| }, |
| { |
| "epoch": 6.76378772112383, |
| "grad_norm": 1.6951643228530884, |
| "learning_rate": 1.618626430801249e-05, |
| "loss": 0.6173243408203125, |
| "step": 6500 |
| }, |
| { |
| "epoch": 7.2840790842872005, |
| "grad_norm": 1.4317214488983154, |
| "learning_rate": 1.3584807492195631e-05, |
| "loss": 0.5941461791992187, |
| "step": 7000 |
| }, |
| { |
| "epoch": 7.804370447450572, |
| "grad_norm": 1.5687386989593506, |
| "learning_rate": 1.0983350676378774e-05, |
| "loss": 0.5830839233398437, |
| "step": 7500 |
| }, |
| { |
| "epoch": 8.324661810613943, |
| "grad_norm": 1.5302783250808716, |
| "learning_rate": 8.381893860561914e-06, |
| "loss": 0.5607491455078125, |
| "step": 8000 |
| }, |
| { |
| "epoch": 8.844953173777315, |
| "grad_norm": 1.4110559225082397, |
| "learning_rate": 5.780437044745058e-06, |
| "loss": 0.5564122924804688, |
| "step": 8500 |
| }, |
| { |
| "epoch": 9.365244536940686, |
| "grad_norm": 1.3692632913589478, |
| "learning_rate": 3.1789802289282e-06, |
| "loss": 0.54335546875, |
| "step": 9000 |
| }, |
| { |
| "epoch": 9.885535900104058, |
| "grad_norm": 1.0497645139694214, |
| "learning_rate": 5.775234131113424e-07, |
| "loss": 0.539821533203125, |
| "step": 9500 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 9610, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.218510905176064e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|