| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.8018018018018018, |
| "eval_steps": 500, |
| "global_step": 200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 1.7490966856479644, |
| "epoch": 0.09009009009009009, |
| "grad_norm": 4.318613052368164, |
| "learning_rate": 9e-05, |
| "loss": 6.8516, |
| "mean_token_accuracy": 0.31817560344934465, |
| "num_tokens": 5273.0, |
| "step": 10 |
| }, |
| { |
| "entropy": 2.358074116706848, |
| "epoch": 0.18018018018018017, |
| "grad_norm": 2.591895341873169, |
| "learning_rate": 0.00019, |
| "loss": 4.7791, |
| "mean_token_accuracy": 0.4064402043819427, |
| "num_tokens": 10323.0, |
| "step": 20 |
| }, |
| { |
| "entropy": 2.7442060232162477, |
| "epoch": 0.2702702702702703, |
| "grad_norm": 2.085822105407715, |
| "learning_rate": 0.00019424920127795528, |
| "loss": 2.528, |
| "mean_token_accuracy": 0.6114133253693581, |
| "num_tokens": 15657.0, |
| "step": 30 |
| }, |
| { |
| "entropy": 1.6624038934707641, |
| "epoch": 0.36036036036036034, |
| "grad_norm": 1.898437261581421, |
| "learning_rate": 0.0001878594249201278, |
| "loss": 1.5967, |
| "mean_token_accuracy": 0.75713110268116, |
| "num_tokens": 20566.0, |
| "step": 40 |
| }, |
| { |
| "entropy": 1.3454128205776215, |
| "epoch": 0.45045045045045046, |
| "grad_norm": 2.2532074451446533, |
| "learning_rate": 0.00018146964856230032, |
| "loss": 1.3224, |
| "mean_token_accuracy": 0.7765294492244721, |
| "num_tokens": 25625.0, |
| "step": 50 |
| }, |
| { |
| "entropy": 1.1165625154972076, |
| "epoch": 0.5405405405405406, |
| "grad_norm": 2.300675868988037, |
| "learning_rate": 0.00017507987220447287, |
| "loss": 1.0734, |
| "mean_token_accuracy": 0.803844365477562, |
| "num_tokens": 30624.0, |
| "step": 60 |
| }, |
| { |
| "entropy": 0.975060197710991, |
| "epoch": 0.6306306306306306, |
| "grad_norm": 1.954689383506775, |
| "learning_rate": 0.00016869009584664536, |
| "loss": 0.9733, |
| "mean_token_accuracy": 0.818100169301033, |
| "num_tokens": 35746.0, |
| "step": 70 |
| }, |
| { |
| "entropy": 0.8530668556690216, |
| "epoch": 0.7207207207207207, |
| "grad_norm": 2.083409309387207, |
| "learning_rate": 0.0001623003194888179, |
| "loss": 0.8131, |
| "mean_token_accuracy": 0.8403199791908265, |
| "num_tokens": 41031.0, |
| "step": 80 |
| }, |
| { |
| "entropy": 0.8242652952671051, |
| "epoch": 0.8108108108108109, |
| "grad_norm": 2.1543054580688477, |
| "learning_rate": 0.00015591054313099042, |
| "loss": 0.7285, |
| "mean_token_accuracy": 0.8522787302732467, |
| "num_tokens": 45910.0, |
| "step": 90 |
| }, |
| { |
| "entropy": 0.7216496258974076, |
| "epoch": 0.9009009009009009, |
| "grad_norm": 1.8068711757659912, |
| "learning_rate": 0.00014952076677316297, |
| "loss": 0.632, |
| "mean_token_accuracy": 0.8595692455768585, |
| "num_tokens": 51027.0, |
| "step": 100 |
| }, |
| { |
| "entropy": 0.6904670834541321, |
| "epoch": 0.990990990990991, |
| "grad_norm": 2.6211395263671875, |
| "learning_rate": 0.00014313099041533546, |
| "loss": 0.6219, |
| "mean_token_accuracy": 0.8704036623239517, |
| "num_tokens": 55880.0, |
| "step": 110 |
| }, |
| { |
| "entropy": 0.5557049199938774, |
| "epoch": 1.0810810810810811, |
| "grad_norm": 2.952716112136841, |
| "learning_rate": 0.000136741214057508, |
| "loss": 0.5041, |
| "mean_token_accuracy": 0.8821422606706619, |
| "num_tokens": 60665.0, |
| "step": 120 |
| }, |
| { |
| "entropy": 0.4335968837141991, |
| "epoch": 1.1711711711711712, |
| "grad_norm": 1.972198724746704, |
| "learning_rate": 0.00013035143769968052, |
| "loss": 0.4319, |
| "mean_token_accuracy": 0.8849613904953003, |
| "num_tokens": 65647.0, |
| "step": 130 |
| }, |
| { |
| "entropy": 0.44969954043626786, |
| "epoch": 1.2612612612612613, |
| "grad_norm": 1.9271284341812134, |
| "learning_rate": 0.00012396166134185304, |
| "loss": 0.4252, |
| "mean_token_accuracy": 0.8931547611951828, |
| "num_tokens": 70510.0, |
| "step": 140 |
| }, |
| { |
| "entropy": 0.40625376254320145, |
| "epoch": 1.3513513513513513, |
| "grad_norm": 2.1082940101623535, |
| "learning_rate": 0.00011757188498402556, |
| "loss": 0.4017, |
| "mean_token_accuracy": 0.8913865387439728, |
| "num_tokens": 75632.0, |
| "step": 150 |
| }, |
| { |
| "entropy": 0.45655421912670135, |
| "epoch": 1.4414414414414414, |
| "grad_norm": 1.5840785503387451, |
| "learning_rate": 0.00011118210862619809, |
| "loss": 0.4614, |
| "mean_token_accuracy": 0.8811476260423661, |
| "num_tokens": 80976.0, |
| "step": 160 |
| }, |
| { |
| "entropy": 0.42031795233488084, |
| "epoch": 1.5315315315315314, |
| "grad_norm": 1.667891502380371, |
| "learning_rate": 0.00010479233226837062, |
| "loss": 0.395, |
| "mean_token_accuracy": 0.894879949092865, |
| "num_tokens": 86111.0, |
| "step": 170 |
| }, |
| { |
| "entropy": 0.42292200922966006, |
| "epoch": 1.6216216216216215, |
| "grad_norm": 1.7975671291351318, |
| "learning_rate": 9.840255591054314e-05, |
| "loss": 0.4021, |
| "mean_token_accuracy": 0.8952047973871231, |
| "num_tokens": 91226.0, |
| "step": 180 |
| }, |
| { |
| "entropy": 0.3855786919593811, |
| "epoch": 1.7117117117117115, |
| "grad_norm": 2.244504690170288, |
| "learning_rate": 9.201277955271566e-05, |
| "loss": 0.387, |
| "mean_token_accuracy": 0.8959153234958649, |
| "num_tokens": 96405.0, |
| "step": 190 |
| }, |
| { |
| "entropy": 0.4205052852630615, |
| "epoch": 1.8018018018018018, |
| "grad_norm": 1.7068063020706177, |
| "learning_rate": 8.562300319488819e-05, |
| "loss": 0.419, |
| "mean_token_accuracy": 0.888062196969986, |
| "num_tokens": 101805.0, |
| "step": 200 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 333, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1123762855636992.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|