| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 4, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 0.40931112319231033, |
| "epoch": 0.3076923076923077, |
| "grad_norm": 0.0650932714343071, |
| "learning_rate": 0.0015, |
| "loss": 0.5204, |
| "mean_token_accuracy": 0.8606274500489235, |
| "num_tokens": 254565.0, |
| "step": 1 |
| }, |
| { |
| "entropy": 0.46549949049949646, |
| "epoch": 0.6153846153846154, |
| "grad_norm": 0.8155490756034851, |
| "learning_rate": 0.0011250000000000001, |
| "loss": 0.4944, |
| "mean_token_accuracy": 0.8640510067343712, |
| "num_tokens": 508518.0, |
| "step": 2 |
| }, |
| { |
| "entropy": 0.5163677036762238, |
| "epoch": 0.9230769230769231, |
| "grad_norm": 0.18912406265735626, |
| "learning_rate": 0.00075, |
| "loss": 0.48, |
| "mean_token_accuracy": 0.8658391088247299, |
| "num_tokens": 750472.0, |
| "step": 3 |
| }, |
| { |
| "entropy": 0.39955340325832367, |
| "epoch": 1.0, |
| "grad_norm": 0.12699061632156372, |
| "learning_rate": 0.000375, |
| "loss": 0.4196, |
| "mean_token_accuracy": 0.8839532434940338, |
| "num_tokens": 814048.0, |
| "step": 4 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 4, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5687565574150554e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|