| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9768009768009768, |
| "eval_steps": 500, |
| "global_step": 100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04884004884004884, |
| "grad_norm": 0.8704025745391846, |
| "learning_rate": 0.00019986827399037812, |
| "loss": 1.278, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.09768009768009768, |
| "grad_norm": 0.8027306199073792, |
| "learning_rate": 0.00019947344299634464, |
| "loss": 1.3067, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.14652014652014653, |
| "grad_norm": 0.9125049114227295, |
| "learning_rate": 0.00019881654720812594, |
| "loss": 1.1604, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.19536019536019536, |
| "grad_norm": 0.8446016311645508, |
| "learning_rate": 0.00019789931723094046, |
| "loss": 1.1119, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2442002442002442, |
| "grad_norm": 0.8679695129394531, |
| "learning_rate": 0.00019672416952568416, |
| "loss": 1.08, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.29304029304029305, |
| "grad_norm": 0.9639546275138855, |
| "learning_rate": 0.00019529420004271567, |
| "loss": 1.1442, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.3418803418803419, |
| "grad_norm": 0.8240556716918945, |
| "learning_rate": 0.00019361317606551238, |
| "loss": 1.0986, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.3907203907203907, |
| "grad_norm": 0.6859893202781677, |
| "learning_rate": 0.00019168552628568631, |
| "loss": 1.091, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.43956043956043955, |
| "grad_norm": 0.6568402647972107, |
| "learning_rate": 0.00018951632913550626, |
| "loss": 1.0377, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.4884004884004884, |
| "grad_norm": 0.8978002667427063, |
| "learning_rate": 0.00018711129940866575, |
| "loss": 1.0334, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5372405372405372, |
| "grad_norm": 1.0903880596160889, |
| "learning_rate": 0.00018447677320454367, |
| "loss": 1.1055, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.5860805860805861, |
| "grad_norm": 0.7571815848350525, |
| "learning_rate": 0.0001816196912356222, |
| "loss": 1.0424, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.6349206349206349, |
| "grad_norm": 0.9039235711097717, |
| "learning_rate": 0.00017854758054203988, |
| "loss": 1.0732, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.6837606837606838, |
| "grad_norm": 0.8517094254493713, |
| "learning_rate": 0.00017526853466145244, |
| "loss": 1.0926, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.7326007326007326, |
| "grad_norm": 0.9040933847427368, |
| "learning_rate": 0.0001717911923064442, |
| "loss": 1.0293, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.7814407814407814, |
| "grad_norm": 1.0867003202438354, |
| "learning_rate": 0.0001681247146056654, |
| "loss": 1.0589, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.8302808302808303, |
| "grad_norm": 0.7861086130142212, |
| "learning_rate": 0.00016427876096865394, |
| "loss": 0.9705, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.8791208791208791, |
| "grad_norm": 0.8542861938476562, |
| "learning_rate": 0.00016026346363792567, |
| "loss": 0.9613, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.927960927960928, |
| "grad_norm": 0.87472003698349, |
| "learning_rate": 0.000156089400995377, |
| "loss": 0.9526, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.9768009768009768, |
| "grad_norm": 0.9949895143508911, |
| "learning_rate": 0.00015176756969332425, |
| "loss": 1.0577, |
| "step": 100 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 306, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.7424687000846336e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|