| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 2888, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03462603878116344, |
| "grad_norm": 0.14943788945674896, |
| "learning_rate": 6.944444444444445e-06, |
| "loss": 2.3885, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06925207756232687, |
| "grad_norm": 0.2681087851524353, |
| "learning_rate": 1.388888888888889e-05, |
| "loss": 2.3454, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1038781163434903, |
| "grad_norm": 0.4204924404621124, |
| "learning_rate": 1.9998948817948157e-05, |
| "loss": 2.258, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.13850415512465375, |
| "grad_norm": 0.5873008966445923, |
| "learning_rate": 1.9908568428746408e-05, |
| "loss": 2.165, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1731301939058172, |
| "grad_norm": 0.7970355153083801, |
| "learning_rate": 1.9673698799700582e-05, |
| "loss": 2.0815, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2077562326869806, |
| "grad_norm": 0.7298622131347656, |
| "learning_rate": 1.9297764858882516e-05, |
| "loss": 2.0287, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.24238227146814403, |
| "grad_norm": 0.7727171778678894, |
| "learning_rate": 1.8786248569678847e-05, |
| "loss": 2.037, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2770083102493075, |
| "grad_norm": 0.791151225566864, |
| "learning_rate": 1.8146608991420533e-05, |
| "loss": 1.9875, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.31163434903047094, |
| "grad_norm": 0.9811259508132935, |
| "learning_rate": 1.7388173509501475e-05, |
| "loss": 1.9107, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3462603878116344, |
| "grad_norm": 0.9305247068405151, |
| "learning_rate": 1.652200182109602e-05, |
| "loss": 1.8919, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3808864265927978, |
| "grad_norm": 1.0283896923065186, |
| "learning_rate": 1.5560724659869905e-05, |
| "loss": 1.878, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.4155124653739612, |
| "grad_norm": 1.1012296676635742, |
| "learning_rate": 1.4518359611441452e-05, |
| "loss": 1.8174, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.45013850415512463, |
| "grad_norm": 1.0275688171386719, |
| "learning_rate": 1.3410106705418424e-05, |
| "loss": 1.8528, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.48476454293628807, |
| "grad_norm": 0.9701151251792908, |
| "learning_rate": 1.2252126764738845e-05, |
| "loss": 1.9103, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5193905817174516, |
| "grad_norm": 0.9857981204986572, |
| "learning_rate": 1.106130574448156e-05, |
| "loss": 1.796, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.554016620498615, |
| "grad_norm": 1.2235304117202759, |
| "learning_rate": 9.855008496617326e-06, |
| "loss": 1.8084, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5886426592797784, |
| "grad_norm": 1.137815237045288, |
| "learning_rate": 8.650825551364844e-06, |
| "loss": 1.878, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6232686980609419, |
| "grad_norm": 1.0799235105514526, |
| "learning_rate": 7.4663166076497376e-06, |
| "loss": 1.8077, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6578947368421053, |
| "grad_norm": 0.8830498456954956, |
| "learning_rate": 6.318754473153221e-06, |
| "loss": 1.8718, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6925207756232687, |
| "grad_norm": 0.9494577050209045, |
| "learning_rate": 5.224873187881136e-06, |
| "loss": 1.8099, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7271468144044322, |
| "grad_norm": 0.9683797955513, |
| "learning_rate": 4.200624004178883e-06, |
| "loss": 1.8241, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7617728531855956, |
| "grad_norm": 1.5775721073150635, |
| "learning_rate": 3.2609427815531426e-06, |
| "loss": 1.7979, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.796398891966759, |
| "grad_norm": 1.1023374795913696, |
| "learning_rate": 2.4195321882076295e-06, |
| "loss": 1.8127, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.8310249307479224, |
| "grad_norm": 1.4638538360595703, |
| "learning_rate": 1.6886618852849723e-06, |
| "loss": 1.7909, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8656509695290858, |
| "grad_norm": 0.9252508282661438, |
| "learning_rate": 1.0789896075783734e-06, |
| "loss": 1.818, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.9002770083102493, |
| "grad_norm": 0.8653609752655029, |
| "learning_rate": 5.994057497592032e-07, |
| "loss": 1.7445, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.9349030470914127, |
| "grad_norm": 1.0171947479248047, |
| "learning_rate": 2.569037244032657e-07, |
| "loss": 1.7914, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9695290858725761, |
| "grad_norm": 1.0968868732452393, |
| "learning_rate": 5.647798228764156e-08, |
| "loss": 1.8258, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.476217269897461, |
| "eval_runtime": 13.7874, |
| "eval_samples_per_second": 13.708, |
| "eval_steps_per_second": 1.741, |
| "step": 2888 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 2888, |
| "total_flos": 5.2595085312e+16, |
| "train_loss": 1.9235466986151613, |
| "train_runtime": 918.9364, |
| "train_samples_per_second": 6.284, |
| "train_steps_per_second": 3.143 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 2888, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.2595085312e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|