| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 28137, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.91114902086221e-05, | |
| "loss": 4.1307, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.82229804172442e-05, | |
| "loss": 3.2328, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.73344706258663e-05, | |
| "loss": 2.9206, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.64459608344884e-05, | |
| "loss": 2.8462, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_e": 0.23890887290167867, | |
| "eval_f1": 0.23467707071366814, | |
| "eval_loss": 5.362636089324951, | |
| "eval_runtime": 121.7016, | |
| "eval_samples_per_second": 27.411, | |
| "eval_steps_per_second": 27.411, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.55574510431105e-05, | |
| "loss": 2.9308, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.46689412517326e-05, | |
| "loss": 2.8088, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.378043146035469e-05, | |
| "loss": 2.5237, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.2891921668976795e-05, | |
| "loss": 2.5704, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_e": 0.3579136690647482, | |
| "eval_f1": 0.3288601709033254, | |
| "eval_loss": 3.1302592754364014, | |
| "eval_runtime": 121.4271, | |
| "eval_samples_per_second": 27.473, | |
| "eval_steps_per_second": 27.473, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.200341187759889e-05, | |
| "loss": 2.2721, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.111490208622099e-05, | |
| "loss": 2.5344, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.022639229484309e-05, | |
| "loss": 2.3429, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.933788250346519e-05, | |
| "loss": 2.3649, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_e": 0.3761990407673861, | |
| "eval_f1": 0.3487221168810336, | |
| "eval_loss": 4.601644039154053, | |
| "eval_runtime": 121.4022, | |
| "eval_samples_per_second": 27.479, | |
| "eval_steps_per_second": 27.479, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.844937271208729e-05, | |
| "loss": 2.125, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.756086292070939e-05, | |
| "loss": 2.241, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.6672353129331485e-05, | |
| "loss": 2.0385, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.578384333795359e-05, | |
| "loss": 2.3278, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_e": 0.4094724220623501, | |
| "eval_f1": 0.3706672296662482, | |
| "eval_loss": 4.3857340812683105, | |
| "eval_runtime": 121.5065, | |
| "eval_samples_per_second": 27.455, | |
| "eval_steps_per_second": 27.455, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.4895333546575685e-05, | |
| "loss": 2.2073, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.400682375519779e-05, | |
| "loss": 2.089, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.311831396381988e-05, | |
| "loss": 1.8698, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.222980417244198e-05, | |
| "loss": 1.4806, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_e": 0.39718225419664266, | |
| "eval_f1": 0.3538201477224622, | |
| "eval_loss": 4.4642109870910645, | |
| "eval_runtime": 121.3475, | |
| "eval_samples_per_second": 27.491, | |
| "eval_steps_per_second": 27.491, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.134129438106408e-05, | |
| "loss": 1.381, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.0452784589686178e-05, | |
| "loss": 1.4917, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.9564274798308278e-05, | |
| "loss": 1.5309, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.8675765006930378e-05, | |
| "loss": 1.534, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_e": 0.3983812949640288, | |
| "eval_f1": 0.361609584220443, | |
| "eval_loss": 4.218064785003662, | |
| "eval_runtime": 121.4358, | |
| "eval_samples_per_second": 27.471, | |
| "eval_steps_per_second": 27.471, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.7787255215552478e-05, | |
| "loss": 1.4109, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.6898745424174578e-05, | |
| "loss": 1.562, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.6010235632796674e-05, | |
| "loss": 1.6473, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.5121725841418774e-05, | |
| "loss": 1.2592, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_e": 0.4520383693045564, | |
| "eval_f1": 0.40414986746385745, | |
| "eval_loss": 4.98923397064209, | |
| "eval_runtime": 121.4673, | |
| "eval_samples_per_second": 27.464, | |
| "eval_steps_per_second": 27.464, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.423321605004087e-05, | |
| "loss": 1.3429, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.334470625866297e-05, | |
| "loss": 1.4802, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.245619646728507e-05, | |
| "loss": 1.4203, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.156768667590717e-05, | |
| "loss": 1.2868, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_e": 0.4475419664268585, | |
| "eval_f1": 0.40389977547135164, | |
| "eval_loss": 4.433777809143066, | |
| "eval_runtime": 121.4975, | |
| "eval_samples_per_second": 27.457, | |
| "eval_steps_per_second": 27.457, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.067917688452927e-05, | |
| "loss": 1.3837, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9790667093151367e-05, | |
| "loss": 1.3352, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.8902157301773464e-05, | |
| "loss": 1.3861, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8013647510395564e-05, | |
| "loss": 1.4105, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_e": 0.4577338129496403, | |
| "eval_f1": 0.408643956398686, | |
| "eval_loss": 4.376325607299805, | |
| "eval_runtime": 121.4273, | |
| "eval_samples_per_second": 27.473, | |
| "eval_steps_per_second": 27.473, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7125137719017664e-05, | |
| "loss": 1.2234, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.6236627927639764e-05, | |
| "loss": 1.0492, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.5348118136261864e-05, | |
| "loss": 0.8572, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.4459608344883962e-05, | |
| "loss": 0.7944, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "eval_e": 0.473621103117506, | |
| "eval_f1": 0.42407242585653226, | |
| "eval_loss": 4.196831703186035, | |
| "eval_runtime": 121.4007, | |
| "eval_samples_per_second": 27.479, | |
| "eval_steps_per_second": 27.479, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.3571098553506059e-05, | |
| "loss": 0.9145, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2682588762128159e-05, | |
| "loss": 0.8874, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.1794078970750259e-05, | |
| "loss": 0.9073, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.0905569179372357e-05, | |
| "loss": 0.791, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_e": 0.4697242206235012, | |
| "eval_f1": 0.4240145974837222, | |
| "eval_loss": 4.208236217498779, | |
| "eval_runtime": 121.3326, | |
| "eval_samples_per_second": 27.495, | |
| "eval_steps_per_second": 27.495, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.0017059387994457e-05, | |
| "loss": 0.753, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.128549596616555e-06, | |
| "loss": 0.9797, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.240039805238655e-06, | |
| "loss": 0.8473, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.351530013860753e-06, | |
| "loss": 0.8997, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_e": 0.4766187050359712, | |
| "eval_f1": 0.42464439616671873, | |
| "eval_loss": 4.4838666915893555, | |
| "eval_runtime": 121.474, | |
| "eval_samples_per_second": 27.463, | |
| "eval_steps_per_second": 27.463, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.463020222482852e-06, | |
| "loss": 0.8912, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.574510431104952e-06, | |
| "loss": 0.7544, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.68600063972705e-06, | |
| "loss": 0.823, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.7974908483491486e-06, | |
| "loss": 0.835, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_e": 0.48081534772182255, | |
| "eval_f1": 0.4289052534622072, | |
| "eval_loss": 3.9213685989379883, | |
| "eval_runtime": 121.3974, | |
| "eval_samples_per_second": 27.48, | |
| "eval_steps_per_second": 27.48, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.908981056971248e-06, | |
| "loss": 0.8194, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.020471265593347e-06, | |
| "loss": 0.7346, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.131961474215446e-06, | |
| "loss": 0.642, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.434516828375449e-07, | |
| "loss": 0.8905, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_e": 0.48231414868105515, | |
| "eval_f1": 0.4308855987601117, | |
| "eval_loss": 4.153212070465088, | |
| "eval_runtime": 121.3729, | |
| "eval_samples_per_second": 27.486, | |
| "eval_steps_per_second": 27.486, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 28137, | |
| "total_flos": 1.867929495867468e+16, | |
| "train_loss": 1.6027517270257226, | |
| "train_runtime": 3981.5918, | |
| "train_samples_per_second": 7.067, | |
| "train_steps_per_second": 7.067 | |
| } | |
| ], | |
| "max_steps": 28137, | |
| "num_train_epochs": 3, | |
| "total_flos": 1.867929495867468e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |