{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 28137, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 4.91114902086221e-05, "loss": 4.1307, "step": 500 }, { "epoch": 0.11, "learning_rate": 4.82229804172442e-05, "loss": 3.2328, "step": 1000 }, { "epoch": 0.16, "learning_rate": 4.73344706258663e-05, "loss": 2.9206, "step": 1500 }, { "epoch": 0.21, "learning_rate": 4.64459608344884e-05, "loss": 2.8462, "step": 2000 }, { "epoch": 0.21, "eval_e": 0.23890887290167867, "eval_f1": 0.23467707071366814, "eval_loss": 5.362636089324951, "eval_runtime": 121.7016, "eval_samples_per_second": 27.411, "eval_steps_per_second": 27.411, "step": 2000 }, { "epoch": 0.27, "learning_rate": 4.55574510431105e-05, "loss": 2.9308, "step": 2500 }, { "epoch": 0.32, "learning_rate": 4.46689412517326e-05, "loss": 2.8088, "step": 3000 }, { "epoch": 0.37, "learning_rate": 4.378043146035469e-05, "loss": 2.5237, "step": 3500 }, { "epoch": 0.43, "learning_rate": 4.2891921668976795e-05, "loss": 2.5704, "step": 4000 }, { "epoch": 0.43, "eval_e": 0.3579136690647482, "eval_f1": 0.3288601709033254, "eval_loss": 3.1302592754364014, "eval_runtime": 121.4271, "eval_samples_per_second": 27.473, "eval_steps_per_second": 27.473, "step": 4000 }, { "epoch": 0.48, "learning_rate": 4.200341187759889e-05, "loss": 2.2721, "step": 4500 }, { "epoch": 0.53, "learning_rate": 4.111490208622099e-05, "loss": 2.5344, "step": 5000 }, { "epoch": 0.59, "learning_rate": 4.022639229484309e-05, "loss": 2.3429, "step": 5500 }, { "epoch": 0.64, "learning_rate": 3.933788250346519e-05, "loss": 2.3649, "step": 6000 }, { "epoch": 0.64, "eval_e": 0.3761990407673861, "eval_f1": 0.3487221168810336, "eval_loss": 4.601644039154053, "eval_runtime": 121.4022, "eval_samples_per_second": 27.479, "eval_steps_per_second": 27.479, "step": 6000 }, { "epoch": 0.69, "learning_rate": 3.844937271208729e-05, "loss": 2.125, "step": 6500 }, { "epoch": 0.75, "learning_rate": 3.756086292070939e-05, "loss": 2.241, "step": 7000 }, { "epoch": 0.8, "learning_rate": 3.6672353129331485e-05, "loss": 2.0385, "step": 7500 }, { "epoch": 0.85, "learning_rate": 3.578384333795359e-05, "loss": 2.3278, "step": 8000 }, { "epoch": 0.85, "eval_e": 0.4094724220623501, "eval_f1": 0.3706672296662482, "eval_loss": 4.3857340812683105, "eval_runtime": 121.5065, "eval_samples_per_second": 27.455, "eval_steps_per_second": 27.455, "step": 8000 }, { "epoch": 0.91, "learning_rate": 3.4895333546575685e-05, "loss": 2.2073, "step": 8500 }, { "epoch": 0.96, "learning_rate": 3.400682375519779e-05, "loss": 2.089, "step": 9000 }, { "epoch": 1.01, "learning_rate": 3.311831396381988e-05, "loss": 1.8698, "step": 9500 }, { "epoch": 1.07, "learning_rate": 3.222980417244198e-05, "loss": 1.4806, "step": 10000 }, { "epoch": 1.07, "eval_e": 0.39718225419664266, "eval_f1": 0.3538201477224622, "eval_loss": 4.4642109870910645, "eval_runtime": 121.3475, "eval_samples_per_second": 27.491, "eval_steps_per_second": 27.491, "step": 10000 }, { "epoch": 1.12, "learning_rate": 3.134129438106408e-05, "loss": 1.381, "step": 10500 }, { "epoch": 1.17, "learning_rate": 3.0452784589686178e-05, "loss": 1.4917, "step": 11000 }, { "epoch": 1.23, "learning_rate": 2.9564274798308278e-05, "loss": 1.5309, "step": 11500 }, { "epoch": 1.28, "learning_rate": 2.8675765006930378e-05, "loss": 1.534, "step": 12000 }, { "epoch": 1.28, "eval_e": 0.3983812949640288, "eval_f1": 0.361609584220443, "eval_loss": 4.218064785003662, "eval_runtime": 121.4358, "eval_samples_per_second": 27.471, "eval_steps_per_second": 27.471, "step": 12000 }, { "epoch": 1.33, "learning_rate": 2.7787255215552478e-05, "loss": 1.4109, "step": 12500 }, { "epoch": 1.39, "learning_rate": 2.6898745424174578e-05, "loss": 1.562, "step": 13000 }, { "epoch": 1.44, "learning_rate": 2.6010235632796674e-05, "loss": 1.6473, "step": 13500 }, { "epoch": 1.49, "learning_rate": 2.5121725841418774e-05, "loss": 1.2592, "step": 14000 }, { "epoch": 1.49, "eval_e": 0.4520383693045564, "eval_f1": 0.40414986746385745, "eval_loss": 4.98923397064209, "eval_runtime": 121.4673, "eval_samples_per_second": 27.464, "eval_steps_per_second": 27.464, "step": 14000 }, { "epoch": 1.55, "learning_rate": 2.423321605004087e-05, "loss": 1.3429, "step": 14500 }, { "epoch": 1.6, "learning_rate": 2.334470625866297e-05, "loss": 1.4802, "step": 15000 }, { "epoch": 1.65, "learning_rate": 2.245619646728507e-05, "loss": 1.4203, "step": 15500 }, { "epoch": 1.71, "learning_rate": 2.156768667590717e-05, "loss": 1.2868, "step": 16000 }, { "epoch": 1.71, "eval_e": 0.4475419664268585, "eval_f1": 0.40389977547135164, "eval_loss": 4.433777809143066, "eval_runtime": 121.4975, "eval_samples_per_second": 27.457, "eval_steps_per_second": 27.457, "step": 16000 }, { "epoch": 1.76, "learning_rate": 2.067917688452927e-05, "loss": 1.3837, "step": 16500 }, { "epoch": 1.81, "learning_rate": 1.9790667093151367e-05, "loss": 1.3352, "step": 17000 }, { "epoch": 1.87, "learning_rate": 1.8902157301773464e-05, "loss": 1.3861, "step": 17500 }, { "epoch": 1.92, "learning_rate": 1.8013647510395564e-05, "loss": 1.4105, "step": 18000 }, { "epoch": 1.92, "eval_e": 0.4577338129496403, "eval_f1": 0.408643956398686, "eval_loss": 4.376325607299805, "eval_runtime": 121.4273, "eval_samples_per_second": 27.473, "eval_steps_per_second": 27.473, "step": 18000 }, { "epoch": 1.97, "learning_rate": 1.7125137719017664e-05, "loss": 1.2234, "step": 18500 }, { "epoch": 2.03, "learning_rate": 1.6236627927639764e-05, "loss": 1.0492, "step": 19000 }, { "epoch": 2.08, "learning_rate": 1.5348118136261864e-05, "loss": 0.8572, "step": 19500 }, { "epoch": 2.13, "learning_rate": 1.4459608344883962e-05, "loss": 0.7944, "step": 20000 }, { "epoch": 2.13, "eval_e": 0.473621103117506, "eval_f1": 0.42407242585653226, "eval_loss": 4.196831703186035, "eval_runtime": 121.4007, "eval_samples_per_second": 27.479, "eval_steps_per_second": 27.479, "step": 20000 }, { "epoch": 2.19, "learning_rate": 1.3571098553506059e-05, "loss": 0.9145, "step": 20500 }, { "epoch": 2.24, "learning_rate": 1.2682588762128159e-05, "loss": 0.8874, "step": 21000 }, { "epoch": 2.29, "learning_rate": 1.1794078970750259e-05, "loss": 0.9073, "step": 21500 }, { "epoch": 2.35, "learning_rate": 1.0905569179372357e-05, "loss": 0.791, "step": 22000 }, { "epoch": 2.35, "eval_e": 0.4697242206235012, "eval_f1": 0.4240145974837222, "eval_loss": 4.208236217498779, "eval_runtime": 121.3326, "eval_samples_per_second": 27.495, "eval_steps_per_second": 27.495, "step": 22000 }, { "epoch": 2.4, "learning_rate": 1.0017059387994457e-05, "loss": 0.753, "step": 22500 }, { "epoch": 2.45, "learning_rate": 9.128549596616555e-06, "loss": 0.9797, "step": 23000 }, { "epoch": 2.51, "learning_rate": 8.240039805238655e-06, "loss": 0.8473, "step": 23500 }, { "epoch": 2.56, "learning_rate": 7.351530013860753e-06, "loss": 0.8997, "step": 24000 }, { "epoch": 2.56, "eval_e": 0.4766187050359712, "eval_f1": 0.42464439616671873, "eval_loss": 4.4838666915893555, "eval_runtime": 121.474, "eval_samples_per_second": 27.463, "eval_steps_per_second": 27.463, "step": 24000 }, { "epoch": 2.61, "learning_rate": 6.463020222482852e-06, "loss": 0.8912, "step": 24500 }, { "epoch": 2.67, "learning_rate": 5.574510431104952e-06, "loss": 0.7544, "step": 25000 }, { "epoch": 2.72, "learning_rate": 4.68600063972705e-06, "loss": 0.823, "step": 25500 }, { "epoch": 2.77, "learning_rate": 3.7974908483491486e-06, "loss": 0.835, "step": 26000 }, { "epoch": 2.77, "eval_e": 0.48081534772182255, "eval_f1": 0.4289052534622072, "eval_loss": 3.9213685989379883, "eval_runtime": 121.3974, "eval_samples_per_second": 27.48, "eval_steps_per_second": 27.48, "step": 26000 }, { "epoch": 2.83, "learning_rate": 2.908981056971248e-06, "loss": 0.8194, "step": 26500 }, { "epoch": 2.88, "learning_rate": 2.020471265593347e-06, "loss": 0.7346, "step": 27000 }, { "epoch": 2.93, "learning_rate": 1.131961474215446e-06, "loss": 0.642, "step": 27500 }, { "epoch": 2.99, "learning_rate": 2.434516828375449e-07, "loss": 0.8905, "step": 28000 }, { "epoch": 2.99, "eval_e": 0.48231414868105515, "eval_f1": 0.4308855987601117, "eval_loss": 4.153212070465088, "eval_runtime": 121.3729, "eval_samples_per_second": 27.486, "eval_steps_per_second": 27.486, "step": 28000 }, { "epoch": 3.0, "step": 28137, "total_flos": 1.867929495867468e+16, "train_loss": 1.6027517270257226, "train_runtime": 3981.5918, "train_samples_per_second": 7.067, "train_steps_per_second": 7.067 } ], "max_steps": 28137, "num_train_epochs": 3, "total_flos": 1.867929495867468e+16, "trial_name": null, "trial_params": null }