| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.998719590268886, | |
| "eval_steps": 1000, | |
| "global_step": 9366, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00032030749519538755, | |
| "grad_norm": 6.107327938079834, | |
| "learning_rate": 0.0, | |
| "loss": 2.2696, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.3203074951953876, | |
| "grad_norm": 3.857839345932007, | |
| "learning_rate": 2.977933325424131e-05, | |
| "loss": 1.6542, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3203074951953876, | |
| "eval_cosine_accuracy": 0.9641829133033752, | |
| "eval_loss": 0.5915806889533997, | |
| "eval_runtime": 28.6636, | |
| "eval_samples_per_second": 327.279, | |
| "eval_steps_per_second": 1.291, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6406149903907752, | |
| "grad_norm": 3.1807026863098145, | |
| "learning_rate": 2.6220192193617276e-05, | |
| "loss": 1.0828, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6406149903907752, | |
| "eval_cosine_accuracy": 0.9688732624053955, | |
| "eval_loss": 0.5482337474822998, | |
| "eval_runtime": 27.6361, | |
| "eval_samples_per_second": 339.447, | |
| "eval_steps_per_second": 1.339, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9609224855861627, | |
| "grad_norm": 4.243162155151367, | |
| "learning_rate": 2.2661051132993238e-05, | |
| "loss": 0.8294, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.9609224855861627, | |
| "eval_cosine_accuracy": 0.971858024597168, | |
| "eval_loss": 0.5288547277450562, | |
| "eval_runtime": 27.5987, | |
| "eval_samples_per_second": 339.908, | |
| "eval_steps_per_second": 1.341, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.2810499359795133, | |
| "grad_norm": 5.0122575759887695, | |
| "learning_rate": 1.910902835449045e-05, | |
| "loss": 0.762, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.2810499359795133, | |
| "eval_cosine_accuracy": 0.9751625657081604, | |
| "eval_loss": 0.5255293250083923, | |
| "eval_runtime": 27.7311, | |
| "eval_samples_per_second": 338.284, | |
| "eval_steps_per_second": 1.334, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.6011523687580027, | |
| "grad_norm": 3.2703332901000977, | |
| "learning_rate": 1.5549887293866415e-05, | |
| "loss": 0.7273, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.6011523687580027, | |
| "eval_cosine_accuracy": 0.9755889773368835, | |
| "eval_loss": 0.5073443055152893, | |
| "eval_runtime": 27.5769, | |
| "eval_samples_per_second": 340.176, | |
| "eval_steps_per_second": 1.342, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.9212548015364916, | |
| "grad_norm": 3.8906168937683105, | |
| "learning_rate": 1.1990746233242378e-05, | |
| "loss": 0.6962, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.9212548015364916, | |
| "eval_cosine_accuracy": 0.9772945046424866, | |
| "eval_loss": 0.49614542722702026, | |
| "eval_runtime": 27.8537, | |
| "eval_samples_per_second": 336.795, | |
| "eval_steps_per_second": 1.328, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.2413572343149806, | |
| "grad_norm": 3.059576988220215, | |
| "learning_rate": 8.431605172618341e-06, | |
| "loss": 0.6648, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.2413572343149806, | |
| "eval_cosine_accuracy": 0.9775077104568481, | |
| "eval_loss": 0.5007394552230835, | |
| "eval_runtime": 28.1686, | |
| "eval_samples_per_second": 333.03, | |
| "eval_steps_per_second": 1.314, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.56145966709347, | |
| "grad_norm": 3.0882303714752197, | |
| "learning_rate": 4.872464111994306e-06, | |
| "loss": 0.6362, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.56145966709347, | |
| "eval_cosine_accuracy": 0.9779341220855713, | |
| "eval_loss": 0.5001775622367859, | |
| "eval_runtime": 27.7547, | |
| "eval_samples_per_second": 337.996, | |
| "eval_steps_per_second": 1.333, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.881562099871959, | |
| "grad_norm": 2.918884038925171, | |
| "learning_rate": 1.3133230513702693e-06, | |
| "loss": 0.6228, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.881562099871959, | |
| "eval_cosine_accuracy": 0.9784671068191528, | |
| "eval_loss": 0.4983352720737457, | |
| "eval_runtime": 28.0171, | |
| "eval_samples_per_second": 334.831, | |
| "eval_steps_per_second": 1.321, | |
| "step": 9000 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 9366, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |