| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9474969474969475, |
| "eval_steps": 500, |
| "global_step": 200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04884004884004884, |
| "grad_norm": 0.8704025745391846, |
| "learning_rate": 0.00019986827399037812, |
| "loss": 1.278, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.09768009768009768, |
| "grad_norm": 0.8027306199073792, |
| "learning_rate": 0.00019947344299634464, |
| "loss": 1.3067, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.14652014652014653, |
| "grad_norm": 0.9125049114227295, |
| "learning_rate": 0.00019881654720812594, |
| "loss": 1.1604, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.19536019536019536, |
| "grad_norm": 0.8446016311645508, |
| "learning_rate": 0.00019789931723094046, |
| "loss": 1.1119, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2442002442002442, |
| "grad_norm": 0.8679695129394531, |
| "learning_rate": 0.00019672416952568416, |
| "loss": 1.08, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.29304029304029305, |
| "grad_norm": 0.9639546275138855, |
| "learning_rate": 0.00019529420004271567, |
| "loss": 1.1442, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.3418803418803419, |
| "grad_norm": 0.8240556716918945, |
| "learning_rate": 0.00019361317606551238, |
| "loss": 1.0986, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.3907203907203907, |
| "grad_norm": 0.6859893202781677, |
| "learning_rate": 0.00019168552628568631, |
| "loss": 1.091, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.43956043956043955, |
| "grad_norm": 0.6568402647972107, |
| "learning_rate": 0.00018951632913550626, |
| "loss": 1.0377, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.4884004884004884, |
| "grad_norm": 0.8978002667427063, |
| "learning_rate": 0.00018711129940866575, |
| "loss": 1.0334, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5372405372405372, |
| "grad_norm": 1.0903880596160889, |
| "learning_rate": 0.00018447677320454367, |
| "loss": 1.1055, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.5860805860805861, |
| "grad_norm": 0.7571815848350525, |
| "learning_rate": 0.0001816196912356222, |
| "loss": 1.0424, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.6349206349206349, |
| "grad_norm": 0.9039235711097717, |
| "learning_rate": 0.00017854758054203988, |
| "loss": 1.0732, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.6837606837606838, |
| "grad_norm": 0.8517094254493713, |
| "learning_rate": 0.00017526853466145244, |
| "loss": 1.0926, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.7326007326007326, |
| "grad_norm": 0.9040933847427368, |
| "learning_rate": 0.0001717911923064442, |
| "loss": 1.0293, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.7814407814407814, |
| "grad_norm": 1.0867003202438354, |
| "learning_rate": 0.0001681247146056654, |
| "loss": 1.0589, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.8302808302808303, |
| "grad_norm": 0.7861086130142212, |
| "learning_rate": 0.00016427876096865394, |
| "loss": 0.9705, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.8791208791208791, |
| "grad_norm": 0.8542861938476562, |
| "learning_rate": 0.00016026346363792567, |
| "loss": 0.9613, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.927960927960928, |
| "grad_norm": 0.87472003698349, |
| "learning_rate": 0.000156089400995377, |
| "loss": 0.9526, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.9768009768009768, |
| "grad_norm": 0.9949895143508911, |
| "learning_rate": 0.00015176756969332425, |
| "loss": 1.0577, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.0195360195360195, |
| "grad_norm": 1.0473015308380127, |
| "learning_rate": 0.00014730935568360102, |
| "loss": 0.9477, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.0683760683760684, |
| "grad_norm": 1.318867802619934, |
| "learning_rate": 0.0001427265042210381, |
| "loss": 0.7841, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.1172161172161172, |
| "grad_norm": 1.3813416957855225, |
| "learning_rate": 0.0001380310889203526, |
| "loss": 0.7016, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.1660561660561661, |
| "grad_norm": 1.2204864025115967, |
| "learning_rate": 0.00013323547994796597, |
| "loss": 0.7277, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.214896214896215, |
| "grad_norm": 0.9304312467575073, |
| "learning_rate": 0.0001283523114325511, |
| "loss": 0.6668, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.2637362637362637, |
| "grad_norm": 1.382764458656311, |
| "learning_rate": 0.0001233944481801649, |
| "loss": 0.6627, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.3125763125763126, |
| "grad_norm": 1.4755507707595825, |
| "learning_rate": 0.00011837495178165706, |
| "loss": 0.7239, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.3614163614163615, |
| "grad_norm": 1.235129475593567, |
| "learning_rate": 0.00011330704620164538, |
| "loss": 0.7067, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.4102564102564101, |
| "grad_norm": 1.1385552883148193, |
| "learning_rate": 0.00010820408293971378, |
| "loss": 0.6466, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.459096459096459, |
| "grad_norm": 1.374949336051941, |
| "learning_rate": 0.00010307950585561706, |
| "loss": 0.7069, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.507936507936508, |
| "grad_norm": 1.3680429458618164, |
| "learning_rate": 9.794681575116097e-05, |
| "loss": 0.6908, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.5567765567765568, |
| "grad_norm": 1.5387816429138184, |
| "learning_rate": 9.281953480206725e-05, |
| "loss": 0.7049, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.6056166056166057, |
| "grad_norm": 1.3777350187301636, |
| "learning_rate": 8.77111709335286e-05, |
| "loss": 0.6259, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.6544566544566544, |
| "grad_norm": 1.4641001224517822, |
| "learning_rate": 8.263518223330697e-05, |
| "loss": 0.7536, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.7032967032967035, |
| "grad_norm": 1.334525465965271, |
| "learning_rate": 7.760494149612971e-05, |
| "loss": 0.6977, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.7521367521367521, |
| "grad_norm": 1.5057116746902466, |
| "learning_rate": 7.263370099279172e-05, |
| "loss": 0.6645, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.800976800976801, |
| "grad_norm": 1.517478585243225, |
| "learning_rate": 6.773455755678054e-05, |
| "loss": 0.6264, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.84981684981685, |
| "grad_norm": 1.4641480445861816, |
| "learning_rate": 6.292041808040393e-05, |
| "loss": 0.7086, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.8986568986568986, |
| "grad_norm": 0.9789106845855713, |
| "learning_rate": 5.82039655113217e-05, |
| "loss": 0.6297, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.9474969474969475, |
| "grad_norm": 1.607702374458313, |
| "learning_rate": 5.359762543906368e-05, |
| "loss": 0.6865, |
| "step": 200 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 306, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.49507002236928e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|