| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 308, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.032467532467532464, |
| "grad_norm": 0.42770177125930786, |
| "learning_rate": 0.00019415584415584416, |
| "loss": 1.5432, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06493506493506493, |
| "grad_norm": 0.23341798782348633, |
| "learning_rate": 0.00018766233766233769, |
| "loss": 1.3915, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09740259740259741, |
| "grad_norm": 0.18550963699817657, |
| "learning_rate": 0.0001811688311688312, |
| "loss": 1.284, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.12987012987012986, |
| "grad_norm": 0.16410741209983826, |
| "learning_rate": 0.0001746753246753247, |
| "loss": 1.2991, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.16233766233766234, |
| "grad_norm": 0.18876023590564728, |
| "learning_rate": 0.0001681818181818182, |
| "loss": 1.2557, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.19480519480519481, |
| "grad_norm": 0.16118866205215454, |
| "learning_rate": 0.0001616883116883117, |
| "loss": 1.2564, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.22727272727272727, |
| "grad_norm": 0.14530400931835175, |
| "learning_rate": 0.0001551948051948052, |
| "loss": 1.2824, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2597402597402597, |
| "grad_norm": 0.14763061702251434, |
| "learning_rate": 0.00014870129870129872, |
| "loss": 1.2596, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2922077922077922, |
| "grad_norm": 0.1304662674665451, |
| "learning_rate": 0.00014220779220779222, |
| "loss": 1.2703, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3246753246753247, |
| "grad_norm": 0.16224819421768188, |
| "learning_rate": 0.00013571428571428572, |
| "loss": 1.2321, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 0.14050635695457458, |
| "learning_rate": 0.00012922077922077922, |
| "loss": 1.2435, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.38961038961038963, |
| "grad_norm": 0.13748130202293396, |
| "learning_rate": 0.00012272727272727272, |
| "loss": 1.2309, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.42207792207792205, |
| "grad_norm": 0.153740793466568, |
| "learning_rate": 0.00011623376623376625, |
| "loss": 1.2194, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 0.1640562266111374, |
| "learning_rate": 0.00010974025974025974, |
| "loss": 1.2205, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.487012987012987, |
| "grad_norm": 0.1855195015668869, |
| "learning_rate": 0.00010324675324675325, |
| "loss": 1.245, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5194805194805194, |
| "grad_norm": 0.18740110099315643, |
| "learning_rate": 9.675324675324677e-05, |
| "loss": 1.2115, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.551948051948052, |
| "grad_norm": 0.15922732651233673, |
| "learning_rate": 9.025974025974027e-05, |
| "loss": 1.2553, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5844155844155844, |
| "grad_norm": 0.14753209054470062, |
| "learning_rate": 8.376623376623377e-05, |
| "loss": 1.2125, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6168831168831169, |
| "grad_norm": 0.14102435111999512, |
| "learning_rate": 7.727272727272727e-05, |
| "loss": 1.2312, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6493506493506493, |
| "grad_norm": 0.15314294397830963, |
| "learning_rate": 7.077922077922077e-05, |
| "loss": 1.2646, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6818181818181818, |
| "grad_norm": 0.15397056937217712, |
| "learning_rate": 6.428571428571429e-05, |
| "loss": 1.2236, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.19398224353790283, |
| "learning_rate": 5.7792207792207796e-05, |
| "loss": 1.2295, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7467532467532467, |
| "grad_norm": 0.1594349890947342, |
| "learning_rate": 5.1298701298701304e-05, |
| "loss": 1.2558, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7792207792207793, |
| "grad_norm": 0.1522524505853653, |
| "learning_rate": 4.4805194805194805e-05, |
| "loss": 1.2682, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8116883116883117, |
| "grad_norm": 0.17501364648342133, |
| "learning_rate": 3.831168831168831e-05, |
| "loss": 1.2069, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8441558441558441, |
| "grad_norm": 0.16909925639629364, |
| "learning_rate": 3.181818181818182e-05, |
| "loss": 1.1897, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8766233766233766, |
| "grad_norm": 0.14579732716083527, |
| "learning_rate": 2.5324675324675325e-05, |
| "loss": 1.2319, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.17351751029491425, |
| "learning_rate": 1.8831168831168833e-05, |
| "loss": 1.2118, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9415584415584416, |
| "grad_norm": 0.14285674691200256, |
| "learning_rate": 1.2337662337662339e-05, |
| "loss": 1.1631, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.974025974025974, |
| "grad_norm": 0.19110357761383057, |
| "learning_rate": 5.844155844155844e-06, |
| "loss": 1.2549, |
| "step": 300 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 308, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.002109407897846e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|