| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 750, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2, |
| "grad_norm": 3.2038393020629883, |
| "learning_rate": 0.00018693333333333334, |
| "loss": 6.7134, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.3221516609191895, |
| "learning_rate": 0.00017360000000000002, |
| "loss": 1.9106, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.9672403931617737, |
| "learning_rate": 0.00016026666666666667, |
| "loss": 1.4008, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 2.117769241333008, |
| "learning_rate": 0.00014693333333333335, |
| "loss": 1.3099, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.8759577870368958, |
| "learning_rate": 0.00013360000000000002, |
| "loss": 1.2693, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.0408642292022705, |
| "eval_runtime": 5.7569, |
| "eval_samples_per_second": 34.741, |
| "eval_steps_per_second": 4.343, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 2.5630991458892822, |
| "learning_rate": 0.00012026666666666669, |
| "loss": 1.2582, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 1.0610485076904297, |
| "learning_rate": 0.00010693333333333333, |
| "loss": 1.2305, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.7173859477043152, |
| "learning_rate": 9.360000000000001e-05, |
| "loss": 1.2308, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.6486402153968811, |
| "learning_rate": 8.026666666666666e-05, |
| "loss": 1.2055, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.7389152646064758, |
| "learning_rate": 6.693333333333334e-05, |
| "loss": 1.2242, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 1.0144000053405762, |
| "eval_runtime": 5.8934, |
| "eval_samples_per_second": 33.936, |
| "eval_steps_per_second": 4.242, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.6505414843559265, |
| "learning_rate": 5.360000000000001e-05, |
| "loss": 1.2164, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.7379202246665955, |
| "learning_rate": 4.026666666666667e-05, |
| "loss": 1.1993, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.6985743045806885, |
| "learning_rate": 2.6933333333333332e-05, |
| "loss": 1.1988, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 1.2441872358322144, |
| "learning_rate": 1.3600000000000002e-05, |
| "loss": 1.1952, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.7287899255752563, |
| "learning_rate": 2.6666666666666667e-07, |
| "loss": 1.177, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 1.0066767930984497, |
| "eval_runtime": 5.8568, |
| "eval_samples_per_second": 34.148, |
| "eval_steps_per_second": 4.269, |
| "step": 750 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 750, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9784585617408000.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|