| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9977603583426653, |
| "eval_steps": 2000, |
| "global_step": 16056, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0001868460388639761, |
| "grad_norm": 8.915643692016602, |
| "learning_rate": 0.0, |
| "loss": 3.2115, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.37369207772795215, |
| "grad_norm": 3.4466991424560547, |
| "learning_rate": 2.994527998755732e-05, |
| "loss": 1.907, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.37369207772795215, |
| "eval_cosine_accuracy": 0.9618272185325623, |
| "eval_loss": 0.6304187178611755, |
| "eval_runtime": 27.4998, |
| "eval_samples_per_second": 343.893, |
| "eval_steps_per_second": 1.345, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7473841554559043, |
| "grad_norm": 2.7122421264648438, |
| "learning_rate": 2.8015311122967933e-05, |
| "loss": 1.1209, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.7473841554559043, |
| "eval_cosine_accuracy": 0.9697578549385071, |
| "eval_loss": 0.553921639919281, |
| "eval_runtime": 25.7506, |
| "eval_samples_per_second": 367.254, |
| "eval_steps_per_second": 1.437, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.1209406494960805, |
| "grad_norm": 2.4806041717529297, |
| "learning_rate": 2.3665720196385667e-05, |
| "loss": 0.9041, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.1209406494960805, |
| "eval_cosine_accuracy": 0.9741989970207214, |
| "eval_loss": 0.5072000622749329, |
| "eval_runtime": 26.0436, |
| "eval_samples_per_second": 363.122, |
| "eval_steps_per_second": 1.421, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.494214259051885, |
| "grad_norm": 4.2601141929626465, |
| "learning_rate": 1.770493593475678e-05, |
| "loss": 0.7989, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.494214259051885, |
| "eval_cosine_accuracy": 0.9733530879020691, |
| "eval_loss": 0.4787100553512573, |
| "eval_runtime": 26.7179, |
| "eval_samples_per_second": 353.958, |
| "eval_steps_per_second": 1.385, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.8674878686076894, |
| "grad_norm": 4.208492279052734, |
| "learning_rate": 1.1241230432764519e-05, |
| "loss": 0.7458, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.8674878686076894, |
| "eval_cosine_accuracy": 0.9733530879020691, |
| "eval_loss": 0.4714418947696686, |
| "eval_runtime": 26.4276, |
| "eval_samples_per_second": 357.846, |
| "eval_steps_per_second": 1.4, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.2407614781634937, |
| "grad_norm": 3.4276180267333984, |
| "learning_rate": 5.476382537233154e-06, |
| "loss": 0.6976, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.2407614781634937, |
| "eval_cosine_accuracy": 0.974939227104187, |
| "eval_loss": 0.4594666063785553, |
| "eval_runtime": 26.464, |
| "eval_samples_per_second": 357.354, |
| "eval_steps_per_second": 1.398, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.6140350877192984, |
| "grad_norm": 2.610654592514038, |
| "learning_rate": 1.4808212490710914e-06, |
| "loss": 0.6796, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.6140350877192984, |
| "eval_cosine_accuracy": 0.9745162129402161, |
| "eval_loss": 0.45741090178489685, |
| "eval_runtime": 27.6186, |
| "eval_samples_per_second": 342.415, |
| "eval_steps_per_second": 1.34, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.9873086972751026, |
| "grad_norm": 3.243903875350952, |
| "learning_rate": 1.3191027301212577e-09, |
| "loss": 0.6764, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.9873086972751026, |
| "eval_cosine_accuracy": 0.9748334288597107, |
| "eval_loss": 0.4565849006175995, |
| "eval_runtime": 27.8151, |
| "eval_samples_per_second": 339.996, |
| "eval_steps_per_second": 1.33, |
| "step": 16000 |
| } |
| ], |
| "logging_steps": 2000, |
| "max_steps": 16056, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|