| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.920502092050209, |
| "eval_steps": 500, |
| "global_step": 87, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.33472803347280333, |
| "grad_norm": 5.262314174920856, |
| "learning_rate": 5e-06, |
| "loss": 1.1175, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.6694560669456067, |
| "grad_norm": 5.981365790026769, |
| "learning_rate": 5e-06, |
| "loss": 1.0229, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.9707112970711297, |
| "eval_loss": 0.9536872506141663, |
| "eval_runtime": 21.0481, |
| "eval_samples_per_second": 38.103, |
| "eval_steps_per_second": 0.618, |
| "step": 29 |
| }, |
| { |
| "epoch": 1.00836820083682, |
| "grad_norm": 1.4208273735733241, |
| "learning_rate": 5e-06, |
| "loss": 0.9952, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.3430962343096233, |
| "grad_norm": 4.9064888353875835, |
| "learning_rate": 5e-06, |
| "loss": 0.9209, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.6778242677824267, |
| "grad_norm": 1.1284317077683494, |
| "learning_rate": 5e-06, |
| "loss": 0.8991, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.9790794979079498, |
| "eval_loss": 0.9046074151992798, |
| "eval_runtime": 20.9928, |
| "eval_samples_per_second": 38.204, |
| "eval_steps_per_second": 0.619, |
| "step": 59 |
| }, |
| { |
| "epoch": 2.01673640167364, |
| "grad_norm": 1.4649955809934467, |
| "learning_rate": 5e-06, |
| "loss": 0.9071, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.3514644351464433, |
| "grad_norm": 0.9844173290886701, |
| "learning_rate": 5e-06, |
| "loss": 0.8369, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.6861924686192467, |
| "grad_norm": 0.8203667838780383, |
| "learning_rate": 5e-06, |
| "loss": 0.8286, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.920502092050209, |
| "eval_loss": 0.8841589689254761, |
| "eval_runtime": 19.9067, |
| "eval_samples_per_second": 40.288, |
| "eval_steps_per_second": 0.653, |
| "step": 87 |
| }, |
| { |
| "epoch": 2.920502092050209, |
| "step": 87, |
| "total_flos": 145518860697600.0, |
| "train_loss": 0.931431375700852, |
| "train_runtime": 3202.1178, |
| "train_samples_per_second": 14.276, |
| "train_steps_per_second": 0.027 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 87, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 145518860697600.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|