| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.3746397694524496, |
| "eval_steps": 10, |
| "global_step": 130, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01440922190201729, |
| "grad_norm": 0.888121485710144, |
| "learning_rate": 0.0002988472622478386, |
| "loss": 2.4115, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02881844380403458, |
| "grad_norm": 0.8970298767089844, |
| "learning_rate": 0.00029740634005763684, |
| "loss": 2.23, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02881844380403458, |
| "eval_loss": 2.165903091430664, |
| "eval_runtime": 1.8407, |
| "eval_samples_per_second": 84.207, |
| "eval_steps_per_second": 10.865, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.043227665706051875, |
| "grad_norm": 1.078068494796753, |
| "learning_rate": 0.00029596541786743513, |
| "loss": 2.0155, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05763688760806916, |
| "grad_norm": 1.2069385051727295, |
| "learning_rate": 0.0002945244956772334, |
| "loss": 1.9346, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05763688760806916, |
| "eval_loss": 1.7943660020828247, |
| "eval_runtime": 1.7809, |
| "eval_samples_per_second": 87.035, |
| "eval_steps_per_second": 11.23, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07204610951008646, |
| "grad_norm": 1.3408102989196777, |
| "learning_rate": 0.0002930835734870317, |
| "loss": 1.7434, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08645533141210375, |
| "grad_norm": 1.4849472045898438, |
| "learning_rate": 0.00029164265129682994, |
| "loss": 1.5409, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08645533141210375, |
| "eval_loss": 1.5225533246994019, |
| "eval_runtime": 1.7657, |
| "eval_samples_per_second": 87.784, |
| "eval_steps_per_second": 11.327, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10086455331412104, |
| "grad_norm": 2.2489142417907715, |
| "learning_rate": 0.0002902017291066282, |
| "loss": 1.4702, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.11527377521613832, |
| "grad_norm": 2.169492244720459, |
| "learning_rate": 0.00028876080691642647, |
| "loss": 1.398, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11527377521613832, |
| "eval_loss": 1.3084412813186646, |
| "eval_runtime": 1.7715, |
| "eval_samples_per_second": 87.495, |
| "eval_steps_per_second": 11.29, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.12968299711815562, |
| "grad_norm": 2.665741205215454, |
| "learning_rate": 0.00028731988472622475, |
| "loss": 1.2839, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1440922190201729, |
| "grad_norm": 2.9897077083587646, |
| "learning_rate": 0.00028587896253602304, |
| "loss": 1.0893, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1440922190201729, |
| "eval_loss": 1.0865856409072876, |
| "eval_runtime": 1.7767, |
| "eval_samples_per_second": 87.241, |
| "eval_steps_per_second": 11.257, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1585014409221902, |
| "grad_norm": 2.3380894660949707, |
| "learning_rate": 0.0002844380403458213, |
| "loss": 1.0454, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1729106628242075, |
| "grad_norm": 2.571589469909668, |
| "learning_rate": 0.00028299711815561957, |
| "loss": 0.8605, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1729106628242075, |
| "eval_loss": 0.8586989641189575, |
| "eval_runtime": 1.7853, |
| "eval_samples_per_second": 86.822, |
| "eval_steps_per_second": 11.203, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1873198847262248, |
| "grad_norm": 3.0408358573913574, |
| "learning_rate": 0.00028155619596541786, |
| "loss": 0.8101, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.2017291066282421, |
| "grad_norm": 2.4773292541503906, |
| "learning_rate": 0.0002801152737752161, |
| "loss": 0.7769, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2017291066282421, |
| "eval_loss": 0.6477732062339783, |
| "eval_runtime": 1.781, |
| "eval_samples_per_second": 87.029, |
| "eval_steps_per_second": 11.23, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.21613832853025935, |
| "grad_norm": 2.5505621433258057, |
| "learning_rate": 0.0002786743515850144, |
| "loss": 0.6286, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.23054755043227665, |
| "grad_norm": 2.5216686725616455, |
| "learning_rate": 0.00027723342939481267, |
| "loss": 0.5306, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.23054755043227665, |
| "eval_loss": 0.4904349446296692, |
| "eval_runtime": 1.7712, |
| "eval_samples_per_second": 87.51, |
| "eval_steps_per_second": 11.292, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.24495677233429394, |
| "grad_norm": 3.0844411849975586, |
| "learning_rate": 0.00027579250720461096, |
| "loss": 0.5331, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.25936599423631124, |
| "grad_norm": 1.8952299356460571, |
| "learning_rate": 0.0002743515850144092, |
| "loss": 0.4093, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.25936599423631124, |
| "eval_loss": 0.40096166729927063, |
| "eval_runtime": 1.773, |
| "eval_samples_per_second": 87.422, |
| "eval_steps_per_second": 11.28, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2737752161383285, |
| "grad_norm": 3.3445639610290527, |
| "learning_rate": 0.0002729106628242075, |
| "loss": 0.3654, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2881844380403458, |
| "grad_norm": 1.9506555795669556, |
| "learning_rate": 0.0002714697406340057, |
| "loss": 0.3458, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2881844380403458, |
| "eval_loss": 0.32525885105133057, |
| "eval_runtime": 1.7918, |
| "eval_samples_per_second": 86.503, |
| "eval_steps_per_second": 11.162, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3025936599423631, |
| "grad_norm": 1.9951375722885132, |
| "learning_rate": 0.000270028818443804, |
| "loss": 0.2672, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3170028818443804, |
| "grad_norm": 2.8618788719177246, |
| "learning_rate": 0.0002685878962536023, |
| "loss": 0.3316, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3170028818443804, |
| "eval_loss": 0.29092785716056824, |
| "eval_runtime": 1.7704, |
| "eval_samples_per_second": 87.549, |
| "eval_steps_per_second": 11.297, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3314121037463977, |
| "grad_norm": 2.436544179916382, |
| "learning_rate": 0.00026714697406340053, |
| "loss": 0.3176, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.345821325648415, |
| "grad_norm": 1.1800215244293213, |
| "learning_rate": 0.0002657060518731988, |
| "loss": 0.2378, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.345821325648415, |
| "eval_loss": 0.25983747839927673, |
| "eval_runtime": 1.7719, |
| "eval_samples_per_second": 87.476, |
| "eval_steps_per_second": 11.287, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.36023054755043227, |
| "grad_norm": 1.0937371253967285, |
| "learning_rate": 0.0002642651296829971, |
| "loss": 0.2617, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.3746397694524496, |
| "grad_norm": 1.5132169723510742, |
| "learning_rate": 0.0002628242074927954, |
| "loss": 0.2669, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3746397694524496, |
| "eval_loss": 0.2434806078672409, |
| "eval_runtime": 1.7851, |
| "eval_samples_per_second": 86.831, |
| "eval_steps_per_second": 11.204, |
| "step": 130 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1041, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1654370819112960.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|