| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9090909090909091, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01818181818181818, |
| "grad_norm": 8.15070915222168, |
| "learning_rate": 1.8e-07, |
| "loss": 1.1203, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03636363636363636, |
| "grad_norm": 7.25395393371582, |
| "learning_rate": 3.8e-07, |
| "loss": 1.1271, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05454545454545454, |
| "grad_norm": 7.708883762359619, |
| "learning_rate": 5.8e-07, |
| "loss": 1.1521, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07272727272727272, |
| "grad_norm": 6.450888633728027, |
| "learning_rate": 7.8e-07, |
| "loss": 1.0706, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09090909090909091, |
| "grad_norm": 5.316545486450195, |
| "learning_rate": 9.8e-07, |
| "loss": 0.9168, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.10909090909090909, |
| "grad_norm": 4.022304058074951, |
| "learning_rate": 1.18e-06, |
| "loss": 0.8431, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.12727272727272726, |
| "grad_norm": 3.017310857772827, |
| "learning_rate": 1.3800000000000001e-06, |
| "loss": 0.5959, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.14545454545454545, |
| "grad_norm": 1.4964921474456787, |
| "learning_rate": 1.5800000000000003e-06, |
| "loss": 0.5449, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.16363636363636364, |
| "grad_norm": 1.020382046699524, |
| "learning_rate": 1.7800000000000001e-06, |
| "loss": 0.3732, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 1.1615612506866455, |
| "learning_rate": 1.98e-06, |
| "loss": 0.3201, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.6558809876441956, |
| "learning_rate": 2.1800000000000003e-06, |
| "loss": 0.259, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.21818181818181817, |
| "grad_norm": 0.4908086657524109, |
| "learning_rate": 2.38e-06, |
| "loss": 0.2066, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.23636363636363636, |
| "grad_norm": 0.5629522204399109, |
| "learning_rate": 2.5800000000000003e-06, |
| "loss": 0.187, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2545454545454545, |
| "grad_norm": 0.4503326117992401, |
| "learning_rate": 2.78e-06, |
| "loss": 0.1777, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2727272727272727, |
| "grad_norm": 0.4197770655155182, |
| "learning_rate": 2.9800000000000003e-06, |
| "loss": 0.1573, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2909090909090909, |
| "grad_norm": 0.4916008412837982, |
| "learning_rate": 3.1800000000000005e-06, |
| "loss": 0.1452, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3090909090909091, |
| "grad_norm": 0.5286891460418701, |
| "learning_rate": 3.38e-06, |
| "loss": 0.1385, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.32727272727272727, |
| "grad_norm": 0.3966865837574005, |
| "learning_rate": 3.58e-06, |
| "loss": 0.1298, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.34545454545454546, |
| "grad_norm": 0.5095552206039429, |
| "learning_rate": 3.7800000000000002e-06, |
| "loss": 0.1165, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 0.4470977783203125, |
| "learning_rate": 3.98e-06, |
| "loss": 0.1149, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.38181818181818183, |
| "grad_norm": 0.3943207561969757, |
| "learning_rate": 4.18e-06, |
| "loss": 0.1142, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5589470863342285, |
| "learning_rate": 4.38e-06, |
| "loss": 0.1077, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.41818181818181815, |
| "grad_norm": 0.36186227202415466, |
| "learning_rate": 4.58e-06, |
| "loss": 0.0995, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.43636363636363634, |
| "grad_norm": 0.41225889325141907, |
| "learning_rate": 4.780000000000001e-06, |
| "loss": 0.0921, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 0.46484509110450745, |
| "learning_rate": 4.98e-06, |
| "loss": 0.0849, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4727272727272727, |
| "grad_norm": 0.3528333902359009, |
| "learning_rate": 5.18e-06, |
| "loss": 0.08, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.4909090909090909, |
| "grad_norm": 0.2926469147205353, |
| "learning_rate": 5.38e-06, |
| "loss": 0.0888, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.509090909090909, |
| "grad_norm": 0.3681964576244354, |
| "learning_rate": 5.580000000000001e-06, |
| "loss": 0.0745, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5272727272727272, |
| "grad_norm": 0.38524869084358215, |
| "learning_rate": 5.78e-06, |
| "loss": 0.0823, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 0.45936059951782227, |
| "learning_rate": 5.98e-06, |
| "loss": 0.0733, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5636363636363636, |
| "grad_norm": 0.5193211436271667, |
| "learning_rate": 6.18e-06, |
| "loss": 0.0752, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5818181818181818, |
| "grad_norm": 0.3415321707725525, |
| "learning_rate": 6.38e-06, |
| "loss": 0.0638, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.3874039053916931, |
| "learning_rate": 6.58e-06, |
| "loss": 0.0625, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6181818181818182, |
| "grad_norm": 0.3335237205028534, |
| "learning_rate": 6.78e-06, |
| "loss": 0.0574, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6363636363636364, |
| "grad_norm": 0.3018106520175934, |
| "learning_rate": 6.98e-06, |
| "loss": 0.0526, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6545454545454545, |
| "grad_norm": 0.33542412519454956, |
| "learning_rate": 7.180000000000001e-06, |
| "loss": 0.0541, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6727272727272727, |
| "grad_norm": 0.33352839946746826, |
| "learning_rate": 7.3800000000000005e-06, |
| "loss": 0.0547, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.6909090909090909, |
| "grad_norm": 0.3171086311340332, |
| "learning_rate": 7.580000000000001e-06, |
| "loss": 0.0488, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.7090909090909091, |
| "grad_norm": 0.2902581989765167, |
| "learning_rate": 7.78e-06, |
| "loss": 0.0492, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 0.3150688409805298, |
| "learning_rate": 7.98e-06, |
| "loss": 0.0439, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7454545454545455, |
| "grad_norm": 0.28724926710128784, |
| "learning_rate": 8.18e-06, |
| "loss": 0.0496, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.7636363636363637, |
| "grad_norm": 0.24236780405044556, |
| "learning_rate": 8.380000000000001e-06, |
| "loss": 0.0448, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.7818181818181819, |
| "grad_norm": 0.3135775327682495, |
| "learning_rate": 8.580000000000001e-06, |
| "loss": 0.0453, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.3585805296897888, |
| "learning_rate": 8.78e-06, |
| "loss": 0.042, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.8181818181818182, |
| "grad_norm": 0.3116839528083801, |
| "learning_rate": 8.98e-06, |
| "loss": 0.042, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.8363636363636363, |
| "grad_norm": 0.2887876629829407, |
| "learning_rate": 9.180000000000002e-06, |
| "loss": 0.0437, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.8545454545454545, |
| "grad_norm": 0.31067466735839844, |
| "learning_rate": 9.38e-06, |
| "loss": 0.044, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.8727272727272727, |
| "grad_norm": 0.28833264112472534, |
| "learning_rate": 9.58e-06, |
| "loss": 0.0408, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.8909090909090909, |
| "grad_norm": 0.3931352496147156, |
| "learning_rate": 9.78e-06, |
| "loss": 0.0378, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.29344913363456726, |
| "learning_rate": 9.980000000000001e-06, |
| "loss": 0.0358, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 100000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 182, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|