| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 3215, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03110419906687403, |
| "grad_norm": 0.11611274629831314, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 2.3215, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06220839813374806, |
| "grad_norm": 0.1991909295320511, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 2.2888, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09331259720062209, |
| "grad_norm": 0.3986590504646301, |
| "learning_rate": 2e-05, |
| "loss": 2.2411, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.12441679626749612, |
| "grad_norm": 0.6170083284378052, |
| "learning_rate": 1.9941980734479214e-05, |
| "loss": 2.1325, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.15552099533437014, |
| "grad_norm": 0.5896138548851013, |
| "learning_rate": 1.9768596184951174e-05, |
| "loss": 2.0947, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.18662519440124417, |
| "grad_norm": 0.69114750623703, |
| "learning_rate": 1.948185828025913e-05, |
| "loss": 2.0607, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2177293934681182, |
| "grad_norm": 0.8546278476715088, |
| "learning_rate": 1.908509428492852e-05, |
| "loss": 1.9856, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.24883359253499224, |
| "grad_norm": 0.6112543344497681, |
| "learning_rate": 1.8582908190078184e-05, |
| "loss": 1.9717, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.27993779160186627, |
| "grad_norm": 0.8754922747612, |
| "learning_rate": 1.7981127289383718e-05, |
| "loss": 1.9524, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3110419906687403, |
| "grad_norm": 0.8028755187988281, |
| "learning_rate": 1.728673456001766e-05, |
| "loss": 1.9325, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3421461897356143, |
| "grad_norm": 0.7798598408699036, |
| "learning_rate": 1.6507787633208173e-05, |
| "loss": 1.9153, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.37325038880248834, |
| "grad_norm": 1.0763300657272339, |
| "learning_rate": 1.5653325294669884e-05, |
| "loss": 1.8883, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.40435458786936235, |
| "grad_norm": 1.1706815958023071, |
| "learning_rate": 1.4733262599862234e-05, |
| "loss": 1.9038, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4354587869362364, |
| "grad_norm": 1.2860356569290161, |
| "learning_rate": 1.3758275821142382e-05, |
| "loss": 1.8769, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4665629860031104, |
| "grad_norm": 1.268933892250061, |
| "learning_rate": 1.273967856186909e-05, |
| "loss": 1.8601, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.4976671850699845, |
| "grad_norm": 0.9673184156417847, |
| "learning_rate": 1.1689290475011258e-05, |
| "loss": 1.9104, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5287713841368584, |
| "grad_norm": 1.3028920888900757, |
| "learning_rate": 1.0619300109631146e-05, |
| "loss": 1.9084, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5598755832037325, |
| "grad_norm": 1.10184645652771, |
| "learning_rate": 9.542123476751484e-06, |
| "loss": 1.8605, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.5909797822706065, |
| "grad_norm": 1.1696065664291382, |
| "learning_rate": 8.470259975787438e-06, |
| "loss": 1.8693, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6220839813374806, |
| "grad_norm": 1.0528351068496704, |
| "learning_rate": 7.416147353351909e-06, |
| "loss": 1.8479, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6531881804043546, |
| "grad_norm": 0.8877471089363098, |
| "learning_rate": 6.392017377470867e-06, |
| "loss": 1.8941, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.6842923794712286, |
| "grad_norm": 1.711288571357727, |
| "learning_rate": 5.409753901944006e-06, |
| "loss": 1.8153, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7153965785381027, |
| "grad_norm": 1.6181174516677856, |
| "learning_rate": 4.48075496785092e-06, |
| "loss": 1.8127, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7465007776049767, |
| "grad_norm": 0.8964147567749023, |
| "learning_rate": 3.615800542356738e-06, |
| "loss": 1.9051, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.7776049766718507, |
| "grad_norm": 1.0576339960098267, |
| "learning_rate": 2.8249274295566863e-06, |
| "loss": 1.9044, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8087091757387247, |
| "grad_norm": 1.3190217018127441, |
| "learning_rate": 2.1173128048757307e-06, |
| "loss": 1.7841, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8398133748055988, |
| "grad_norm": 1.4849908351898193, |
| "learning_rate": 1.501167724473016e-06, |
| "loss": 1.843, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.8709175738724728, |
| "grad_norm": 1.069494366645813, |
| "learning_rate": 9.836418453523833e-07, |
| "loss": 1.8534, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9020217729393468, |
| "grad_norm": 1.185276746749878, |
| "learning_rate": 5.707404617927526e-07, |
| "loss": 1.8983, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.9331259720062208, |
| "grad_norm": 1.2488497495651245, |
| "learning_rate": 2.672548207954495e-07, |
| "loss": 1.8157, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.9642301710730948, |
| "grad_norm": 1.7197282314300537, |
| "learning_rate": 7.670652515782917e-08, |
| "loss": 1.8261, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.995334370139969, |
| "grad_norm": 0.9283524751663208, |
| "learning_rate": 1.3066693117191886e-09, |
| "loss": 1.8391, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 3215, |
| "total_flos": 5.85604153344e+16, |
| "train_loss": 1.938271311616082, |
| "train_runtime": 1682.5697, |
| "train_samples_per_second": 3.822, |
| "train_steps_per_second": 1.911 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 3215, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.85604153344e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|