| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.22898141434186925, |
| "eval_steps": 999999, |
| "global_step": 750, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00030530855245582567, |
| "eval_loss": 1.4630959033966064, |
| "eval_runtime": 17.8511, |
| "eval_samples_per_second": 14.005, |
| "eval_steps_per_second": 7.002, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.015265427622791284, |
| "grad_norm": 0.17761844396591187, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 0.7964, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.030530855245582567, |
| "grad_norm": 0.059007592499256134, |
| "learning_rate": 9.992407582166581e-05, |
| "loss": 0.1063, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04579628286837385, |
| "grad_norm": 0.03449365496635437, |
| "learning_rate": 9.931806517013612e-05, |
| "loss": 0.0977, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.061061710491165135, |
| "grad_norm": 0.020240124315023422, |
| "learning_rate": 9.811340001546251e-05, |
| "loss": 0.0966, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07632713811395642, |
| "grad_norm": 0.0397501066327095, |
| "learning_rate": 9.632470336074009e-05, |
| "loss": 0.0967, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0915925657367477, |
| "grad_norm": 0.0175618976354599, |
| "learning_rate": 9.397368756032445e-05, |
| "loss": 0.0963, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10685799335953898, |
| "grad_norm": 0.01892520859837532, |
| "learning_rate": 9.108889076126226e-05, |
| "loss": 0.0957, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.12212342098233027, |
| "grad_norm": 0.05521632730960846, |
| "learning_rate": 8.770533048884482e-05, |
| "loss": 0.0962, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13738884860512154, |
| "grad_norm": 0.009517563506960869, |
| "learning_rate": 8.386407858128706e-05, |
| "loss": 0.0963, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.15265427622791283, |
| "grad_norm": 0.011137745343148708, |
| "learning_rate": 7.961176263324901e-05, |
| "loss": 0.0959, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.16791970385070412, |
| "grad_norm": 0.009700442664325237, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 0.0959, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.1831851314734954, |
| "grad_norm": 0.010401156730949879, |
| "learning_rate": 7.008477123264848e-05, |
| "loss": 0.096, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.19845055909628667, |
| "grad_norm": 0.008901902474462986, |
| "learning_rate": 6.492574055008473e-05, |
| "loss": 0.096, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.21371598671907796, |
| "grad_norm": 0.01319084782153368, |
| "learning_rate": 5.958553159618693e-05, |
| "loss": 0.0957, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.22898141434186925, |
| "grad_norm": 0.009528940543532372, |
| "learning_rate": 5.4128967273616625e-05, |
| "loss": 0.0952, |
| "step": 750 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 1500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 750, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.378004047069184e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|