| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 23820, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.895046179680941e-05, |
| "loss": 2.7969, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.790092359361881e-05, |
| "loss": 2.7837, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.685138539042822e-05, |
| "loss": 2.7809, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 4.5801847187237617e-05, |
| "loss": 2.7691, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 4.475230898404702e-05, |
| "loss": 2.7465, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 4.370277078085643e-05, |
| "loss": 2.7008, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 4.265323257766583e-05, |
| "loss": 2.6822, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 4.160369437447523e-05, |
| "loss": 2.6594, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 4.0554156171284636e-05, |
| "loss": 2.6714, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 3.950461796809404e-05, |
| "loss": 2.6431, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 3.8455079764903447e-05, |
| "loss": 2.5932, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 3.7405541561712845e-05, |
| "loss": 2.6018, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 3.635600335852225e-05, |
| "loss": 2.5792, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 3.5306465155331656e-05, |
| "loss": 2.607, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 3.425692695214106e-05, |
| "loss": 2.5306, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.36, |
| "learning_rate": 3.3207388748950466e-05, |
| "loss": 2.5202, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 3.2157850545759864e-05, |
| "loss": 2.5127, |
| "step": 8500 |
| }, |
| { |
| "epoch": 3.78, |
| "learning_rate": 3.1108312342569276e-05, |
| "loss": 2.5027, |
| "step": 9000 |
| }, |
| { |
| "epoch": 3.99, |
| "learning_rate": 3.0058774139378675e-05, |
| "loss": 2.477, |
| "step": 9500 |
| }, |
| { |
| "epoch": 4.2, |
| "learning_rate": 2.9009235936188077e-05, |
| "loss": 2.4661, |
| "step": 10000 |
| }, |
| { |
| "epoch": 4.41, |
| "learning_rate": 2.7959697732997482e-05, |
| "loss": 2.4734, |
| "step": 10500 |
| }, |
| { |
| "epoch": 4.62, |
| "learning_rate": 2.6910159529806887e-05, |
| "loss": 2.4431, |
| "step": 11000 |
| }, |
| { |
| "epoch": 4.83, |
| "learning_rate": 2.5860621326616293e-05, |
| "loss": 2.4466, |
| "step": 11500 |
| }, |
| { |
| "epoch": 5.04, |
| "learning_rate": 2.4811083123425694e-05, |
| "loss": 2.4676, |
| "step": 12000 |
| }, |
| { |
| "epoch": 5.25, |
| "learning_rate": 2.37615449202351e-05, |
| "loss": 2.3812, |
| "step": 12500 |
| }, |
| { |
| "epoch": 5.46, |
| "learning_rate": 2.27120067170445e-05, |
| "loss": 2.3898, |
| "step": 13000 |
| }, |
| { |
| "epoch": 5.67, |
| "learning_rate": 2.1662468513853907e-05, |
| "loss": 2.3788, |
| "step": 13500 |
| }, |
| { |
| "epoch": 5.88, |
| "learning_rate": 2.061293031066331e-05, |
| "loss": 2.3985, |
| "step": 14000 |
| }, |
| { |
| "epoch": 6.09, |
| "learning_rate": 1.9563392107472714e-05, |
| "loss": 2.3676, |
| "step": 14500 |
| }, |
| { |
| "epoch": 6.3, |
| "learning_rate": 1.8513853904282116e-05, |
| "loss": 2.3382, |
| "step": 15000 |
| }, |
| { |
| "epoch": 6.51, |
| "learning_rate": 1.746431570109152e-05, |
| "loss": 2.343, |
| "step": 15500 |
| }, |
| { |
| "epoch": 6.72, |
| "learning_rate": 1.6414777497900923e-05, |
| "loss": 2.3123, |
| "step": 16000 |
| }, |
| { |
| "epoch": 6.93, |
| "learning_rate": 1.5365239294710328e-05, |
| "loss": 2.348, |
| "step": 16500 |
| }, |
| { |
| "epoch": 7.14, |
| "learning_rate": 1.4315701091519732e-05, |
| "loss": 2.2985, |
| "step": 17000 |
| }, |
| { |
| "epoch": 7.35, |
| "learning_rate": 1.3266162888329137e-05, |
| "loss": 2.2836, |
| "step": 17500 |
| }, |
| { |
| "epoch": 7.56, |
| "learning_rate": 1.2216624685138539e-05, |
| "loss": 2.287, |
| "step": 18000 |
| }, |
| { |
| "epoch": 7.77, |
| "learning_rate": 1.1167086481947942e-05, |
| "loss": 2.2979, |
| "step": 18500 |
| }, |
| { |
| "epoch": 7.98, |
| "learning_rate": 1.0117548278757346e-05, |
| "loss": 2.2855, |
| "step": 19000 |
| }, |
| { |
| "epoch": 8.19, |
| "learning_rate": 9.068010075566751e-06, |
| "loss": 2.2451, |
| "step": 19500 |
| }, |
| { |
| "epoch": 8.4, |
| "learning_rate": 8.018471872376155e-06, |
| "loss": 2.2547, |
| "step": 20000 |
| }, |
| { |
| "epoch": 8.61, |
| "learning_rate": 6.968933669185558e-06, |
| "loss": 2.2408, |
| "step": 20500 |
| }, |
| { |
| "epoch": 8.82, |
| "learning_rate": 5.919395465994963e-06, |
| "loss": 2.2528, |
| "step": 21000 |
| }, |
| { |
| "epoch": 9.03, |
| "learning_rate": 4.869857262804366e-06, |
| "loss": 2.2225, |
| "step": 21500 |
| }, |
| { |
| "epoch": 9.24, |
| "learning_rate": 3.8203190596137705e-06, |
| "loss": 2.226, |
| "step": 22000 |
| }, |
| { |
| "epoch": 9.45, |
| "learning_rate": 2.770780856423174e-06, |
| "loss": 2.2351, |
| "step": 22500 |
| }, |
| { |
| "epoch": 9.66, |
| "learning_rate": 1.7212426532325778e-06, |
| "loss": 2.2179, |
| "step": 23000 |
| }, |
| { |
| "epoch": 9.87, |
| "learning_rate": 6.717044500419815e-07, |
| "loss": 2.1876, |
| "step": 23500 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 23820, |
| "total_flos": 1.04055328689984e+17, |
| "train_loss": 1.5531483301888185, |
| "train_runtime": 27676.096, |
| "train_samples_per_second": 6.885, |
| "train_steps_per_second": 0.861 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 23820, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "total_flos": 1.04055328689984e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|