| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 7.0, |
| "eval_steps": 500, |
| "global_step": 3500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2, |
| "grad_norm": 21.81972312927246, |
| "learning_rate": 4.8825e-05, |
| "loss": 46.8162, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 19.624361038208008, |
| "learning_rate": 4.7575000000000004e-05, |
| "loss": 35.0983, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 17.971105575561523, |
| "learning_rate": 4.63375e-05, |
| "loss": 33.9533, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 16.36736297607422, |
| "learning_rate": 4.5087500000000005e-05, |
| "loss": 33.1973, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 20.30103874206543, |
| "learning_rate": 4.38375e-05, |
| "loss": 32.5962, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 17.021833419799805, |
| "learning_rate": 4.2587500000000005e-05, |
| "loss": 31.0238, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 18.680706024169922, |
| "learning_rate": 4.13375e-05, |
| "loss": 30.8848, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 17.90163230895996, |
| "learning_rate": 4.00875e-05, |
| "loss": 30.7304, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 16.27179527282715, |
| "learning_rate": 3.88375e-05, |
| "loss": 30.6612, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 17.097187042236328, |
| "learning_rate": 3.7587500000000006e-05, |
| "loss": 30.476, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 17.85845184326172, |
| "learning_rate": 3.63375e-05, |
| "loss": 29.0566, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 20.759204864501953, |
| "learning_rate": 3.50875e-05, |
| "loss": 29.1165, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 16.447481155395508, |
| "learning_rate": 3.38375e-05, |
| "loss": 29.0477, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 16.511810302734375, |
| "learning_rate": 3.2587500000000006e-05, |
| "loss": 29.0484, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 15.86971378326416, |
| "learning_rate": 3.13375e-05, |
| "loss": 29.0645, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 17.456417083740234, |
| "learning_rate": 3.0087500000000003e-05, |
| "loss": 27.8042, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 18.249238967895508, |
| "learning_rate": 2.88375e-05, |
| "loss": 27.8263, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 16.873659133911133, |
| "learning_rate": 2.75875e-05, |
| "loss": 27.8207, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 18.681793212890625, |
| "learning_rate": 2.6337500000000003e-05, |
| "loss": 27.8755, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 16.73720359802246, |
| "learning_rate": 2.5087500000000003e-05, |
| "loss": 27.7821, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.2, |
| "grad_norm": 17.58981704711914, |
| "learning_rate": 2.38375e-05, |
| "loss": 26.7495, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 17.446008682250977, |
| "learning_rate": 2.25875e-05, |
| "loss": 26.9017, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.6, |
| "grad_norm": 17.84132194519043, |
| "learning_rate": 2.13375e-05, |
| "loss": 26.9374, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 16.804340362548828, |
| "learning_rate": 2.00875e-05, |
| "loss": 26.8123, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 17.43560218811035, |
| "learning_rate": 1.88375e-05, |
| "loss": 26.8506, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.2, |
| "grad_norm": 17.689725875854492, |
| "learning_rate": 1.75875e-05, |
| "loss": 26.0296, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.4, |
| "grad_norm": 17.706035614013672, |
| "learning_rate": 1.63375e-05, |
| "loss": 26.1274, |
| "step": 2700 |
| }, |
| { |
| "epoch": 5.6, |
| "grad_norm": 17.015243530273438, |
| "learning_rate": 1.50875e-05, |
| "loss": 26.0055, |
| "step": 2800 |
| }, |
| { |
| "epoch": 5.8, |
| "grad_norm": 17.540559768676758, |
| "learning_rate": 1.3837500000000001e-05, |
| "loss": 26.0804, |
| "step": 2900 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 17.685171127319336, |
| "learning_rate": 1.25875e-05, |
| "loss": 26.1928, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.2, |
| "grad_norm": 18.444171905517578, |
| "learning_rate": 1.1337500000000001e-05, |
| "loss": 25.493, |
| "step": 3100 |
| }, |
| { |
| "epoch": 6.4, |
| "grad_norm": 18.2116756439209, |
| "learning_rate": 1.0087500000000001e-05, |
| "loss": 25.5397, |
| "step": 3200 |
| }, |
| { |
| "epoch": 6.6, |
| "grad_norm": 17.715866088867188, |
| "learning_rate": 8.8375e-06, |
| "loss": 25.4594, |
| "step": 3300 |
| }, |
| { |
| "epoch": 6.8, |
| "grad_norm": 18.493494033813477, |
| "learning_rate": 7.5875e-06, |
| "loss": 25.5701, |
| "step": 3400 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 18.52659797668457, |
| "learning_rate": 6.337500000000001e-06, |
| "loss": 25.5077, |
| "step": 3500 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 4000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 8, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 30, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|