| { | |
| "best_metric": 0.6673863530158997, | |
| "best_model_checkpoint": "output/queen/checkpoint-136", | |
| "epoch": 2.0, | |
| "global_step": 136, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00013537785052914348, | |
| "loss": 0.8946, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001300082017869573, | |
| "loss": 1.1571, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00012137631040942562, | |
| "loss": 1.0891, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00010994073605561692, | |
| "loss": 1.0026, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 9.63089809342121e-05, | |
| "loss": 0.8956, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 8.120521692221655e-05, | |
| "loss": 1.0037, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 6.543181473690228e-05, | |
| "loss": 0.9259, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.982671888105533e-05, | |
| "loss": 0.9391, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.52189327703136e-05, | |
| "loss": 0.897, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 2.2384478845846266e-05, | |
| "loss": 0.969, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.2005173242556645e-05, | |
| "loss": 0.9795, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.632405062861206e-06, | |
| "loss": 1.0966, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 6.578444419609618e-07, | |
| "loss": 0.9097, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.9767317771911621, | |
| "eval_runtime": 2.0747, | |
| "eval_samples_per_second": 46.272, | |
| "eval_steps_per_second": 5.784, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.7435600757598877, | |
| "eval_runtime": 1.811, | |
| "eval_samples_per_second": 48.039, | |
| "eval_steps_per_second": 6.074, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 2.926355061606355e-07, | |
| "loss": 1.3103, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.5561795752596966e-06, | |
| "loss": 0.9826, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 1.027510448894829e-05, | |
| "loss": 0.9993, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.0092474810602806e-05, | |
| "loss": 0.8551, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.2486753626613595e-05, | |
| "loss": 1.0815, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 4.6799508616681124e-05, | |
| "loss": 0.8781, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 6.227039054081734e-05, | |
| "loss": 0.9205, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 7.807752594969924e-05, | |
| "loss": 0.7833, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 9.338117830043863e-05, | |
| "loss": 1.083, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00010736835802938959, | |
| "loss": 1.0006, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00011929601172133724, | |
| "loss": 1.0609, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00012853049599834086, | |
| "loss": 0.9702, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.00013458123912165544, | |
| "loss": 1.0468, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 0.00013712680207144277, | |
| "loss": 1.0231, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.6673863530158997, | |
| "eval_runtime": 2.2427, | |
| "eval_samples_per_second": 42.36, | |
| "eval_steps_per_second": 5.351, | |
| "step": 136 | |
| } | |
| ], | |
| "max_steps": 136, | |
| "num_train_epochs": 2, | |
| "total_flos": 139660591104000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |