| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 8, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 0.40931112319231033, |
| "epoch": 0.3076923076923077, |
| "grad_norm": 0.022652290761470795, |
| "learning_rate": 0.0015, |
| "loss": 0.5204, |
| "mean_token_accuracy": 0.8606274500489235, |
| "num_tokens": 254565.0, |
| "step": 1 |
| }, |
| { |
| "entropy": 0.4583684541285038, |
| "epoch": 0.6153846153846154, |
| "grad_norm": 0.22363658249378204, |
| "learning_rate": 0.0013125, |
| "loss": 0.4936, |
| "mean_token_accuracy": 0.8646418675780296, |
| "num_tokens": 508518.0, |
| "step": 2 |
| }, |
| { |
| "entropy": 0.5834728553891182, |
| "epoch": 0.9230769230769231, |
| "grad_norm": 0.08746782690286636, |
| "learning_rate": 0.0011250000000000001, |
| "loss": 0.4866, |
| "mean_token_accuracy": 0.8650773614645004, |
| "num_tokens": 750472.0, |
| "step": 3 |
| }, |
| { |
| "entropy": 0.36713676154613495, |
| "epoch": 1.0, |
| "grad_norm": 0.04465900734066963, |
| "learning_rate": 0.0009375, |
| "loss": 0.4277, |
| "mean_token_accuracy": 0.8827557265758514, |
| "num_tokens": 814048.0, |
| "step": 4 |
| }, |
| { |
| "entropy": 0.39326300099492073, |
| "epoch": 1.3076923076923077, |
| "grad_norm": 0.08096347749233246, |
| "learning_rate": 0.00075, |
| "loss": 0.4244, |
| "mean_token_accuracy": 0.8808529078960419, |
| "num_tokens": 1056805.0, |
| "step": 5 |
| }, |
| { |
| "entropy": 0.4314521290361881, |
| "epoch": 1.6153846153846154, |
| "grad_norm": 0.03650630638003349, |
| "learning_rate": 0.0005625000000000001, |
| "loss": 0.4273, |
| "mean_token_accuracy": 0.8794465661048889, |
| "num_tokens": 1311499.0, |
| "step": 6 |
| }, |
| { |
| "entropy": 0.42971640825271606, |
| "epoch": 1.9230769230769231, |
| "grad_norm": 0.020897606387734413, |
| "learning_rate": 0.000375, |
| "loss": 0.4173, |
| "mean_token_accuracy": 0.8811118453741074, |
| "num_tokens": 1565632.0, |
| "step": 7 |
| }, |
| { |
| "entropy": 0.4403044879436493, |
| "epoch": 2.0, |
| "grad_norm": 0.02177661471068859, |
| "learning_rate": 0.0001875, |
| "loss": 0.4219, |
| "mean_token_accuracy": 0.8807980418205261, |
| "num_tokens": 1628096.0, |
| "step": 8 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 8, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.229291240186184e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|