{ "best_metric": 0.016893472522497177, "best_model_checkpoint": "RO-EN_1/checkpoint-6000", "epoch": 27.397260273972602, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.28, "learning_rate": 4.943150684931507e-06, "loss": 0.0535, "step": 500 }, { "epoch": 2.28, "eval_loss": 0.031102217733860016, "eval_mse": 0.031102217733860016, "eval_pearson_correlation": 0.8579101465486042, "eval_rmse": 0.1763582080602646, "eval_runtime": 2.5761, "eval_samples_per_second": 388.185, "eval_spearman_corr": 0.7634071672215601, "eval_steps_per_second": 12.422, "learning_rate": 4.943150684931507e-06, "step": 500 }, { "epoch": 4.57, "learning_rate": 4.886187214611872e-06, "loss": 0.0397, "step": 1000 }, { "epoch": 4.57, "eval_loss": 0.05550598353147507, "eval_mse": 0.05550597980618477, "eval_pearson_correlation": 0.8498936520403519, "eval_rmse": 0.23559707403182983, "eval_runtime": 2.579, "eval_samples_per_second": 387.749, "eval_spearman_corr": 0.758833553959186, "eval_steps_per_second": 12.408, "learning_rate": 4.886187214611872e-06, "step": 1000 }, { "epoch": 6.85, "learning_rate": 4.829109589041096e-06, "loss": 0.0337, "step": 1500 }, { "epoch": 6.85, "eval_loss": 0.021814454346895218, "eval_mse": 0.02181445248425007, "eval_pearson_correlation": 0.8574044008888592, "eval_rmse": 0.14769716560840607, "eval_runtime": 2.6081, "eval_samples_per_second": 383.423, "eval_spearman_corr": 0.7602827223453543, "eval_steps_per_second": 12.27, "learning_rate": 4.829109589041096e-06, "step": 1500 }, { "epoch": 9.13, "learning_rate": 4.77203196347032e-06, "loss": 0.0287, "step": 2000 }, { "epoch": 9.13, "eval_loss": 0.019617466256022453, "eval_mse": 0.019617464393377304, "eval_pearson_correlation": 0.8628991091021981, "eval_rmse": 0.1400623619556427, "eval_runtime": 2.6039, "eval_samples_per_second": 384.039, "eval_spearman_corr": 0.7663673153661713, "eval_steps_per_second": 12.289, "learning_rate": 4.77203196347032e-06, "step": 2000 }, { "epoch": 11.42, "learning_rate": 4.715068493150685e-06, "loss": 0.0245, "step": 2500 }, { "epoch": 11.42, "eval_loss": 0.02356332167983055, "eval_mse": 0.02356332167983055, "eval_pearson_correlation": 0.8441016733928287, "eval_rmse": 0.15350349247455597, "eval_runtime": 2.598, "eval_samples_per_second": 384.906, "eval_spearman_corr": 0.7481853356383714, "eval_steps_per_second": 12.317, "learning_rate": 4.715068493150685e-06, "step": 2500 }, { "epoch": 13.7, "learning_rate": 4.657990867579909e-06, "loss": 0.0222, "step": 3000 }, { "epoch": 13.7, "eval_loss": 0.027557892724871635, "eval_mse": 0.027557892724871635, "eval_pearson_correlation": 0.8438974893300436, "eval_rmse": 0.1660057008266449, "eval_runtime": 2.5972, "eval_samples_per_second": 385.026, "eval_spearman_corr": 0.7389656607088352, "eval_steps_per_second": 12.321, "learning_rate": 4.657990867579909e-06, "step": 3000 }, { "epoch": 15.98, "learning_rate": 4.6009132420091325e-06, "loss": 0.0192, "step": 3500 }, { "epoch": 15.98, "eval_loss": 0.01782875880599022, "eval_mse": 0.01782875880599022, "eval_pearson_correlation": 0.849721481092871, "eval_rmse": 0.13352437317371368, "eval_runtime": 2.5686, "eval_samples_per_second": 389.319, "eval_spearman_corr": 0.7444835858187753, "eval_steps_per_second": 12.458, "learning_rate": 4.6009132420091325e-06, "step": 3500 }, { "epoch": 18.26, "learning_rate": 4.5438356164383565e-06, "loss": 0.017, "step": 4000 }, { "epoch": 18.26, "eval_loss": 0.036885496228933334, "eval_mse": 0.036885492503643036, "eval_pearson_correlation": 0.8272909677260112, "eval_rmse": 0.19205595552921295, "eval_runtime": 2.5678, "eval_samples_per_second": 389.436, "eval_spearman_corr": 0.7383094416231242, "eval_steps_per_second": 12.462, "learning_rate": 4.5438356164383565e-06, "step": 4000 }, { "epoch": 20.55, "learning_rate": 4.4867579908675805e-06, "loss": 0.0156, "step": 4500 }, { "epoch": 20.55, "eval_loss": 0.02785499580204487, "eval_mse": 0.02785499580204487, "eval_pearson_correlation": 0.842883000565405, "eval_rmse": 0.16689816117286682, "eval_runtime": 2.5716, "eval_samples_per_second": 388.862, "eval_spearman_corr": 0.7491195713472709, "eval_steps_per_second": 12.444, "learning_rate": 4.4867579908675805e-06, "step": 4500 }, { "epoch": 22.83, "learning_rate": 4.429680365296804e-06, "loss": 0.0137, "step": 5000 }, { "epoch": 22.83, "eval_loss": 0.030997304245829582, "eval_mse": 0.03099730797111988, "eval_pearson_correlation": 0.8406359358077359, "eval_rmse": 0.1760605275630951, "eval_runtime": 2.6837, "eval_samples_per_second": 372.623, "eval_spearman_corr": 0.7367537850258313, "eval_steps_per_second": 11.924, "learning_rate": 4.429680365296804e-06, "step": 5000 }, { "epoch": 25.11, "learning_rate": 4.372602739726028e-06, "loss": 0.0121, "step": 5500 }, { "epoch": 25.11, "eval_loss": 0.01819545030593872, "eval_mse": 0.01819545030593872, "eval_pearson_correlation": 0.8390532054666895, "eval_rmse": 0.13489051163196564, "eval_runtime": 2.5637, "eval_samples_per_second": 390.062, "eval_spearman_corr": 0.7348519956483917, "eval_steps_per_second": 12.482, "learning_rate": 4.372602739726028e-06, "step": 5500 }, { "epoch": 27.4, "learning_rate": 4.315525114155252e-06, "loss": 0.0111, "step": 6000 }, { "epoch": 27.4, "eval_loss": 0.016893472522497177, "eval_mse": 0.016893472522497177, "eval_pearson_correlation": 0.8536975289078368, "eval_rmse": 0.12997488677501678, "eval_runtime": 2.5817, "eval_samples_per_second": 387.337, "eval_spearman_corr": 0.7512867640197094, "eval_steps_per_second": 12.395, "learning_rate": 4.315525114155252e-06, "step": 6000 } ], "max_steps": 43800, "num_train_epochs": 200, "total_flos": 5.23619884848744e+16, "trial_name": null, "trial_params": null }