{ "best_metric": 1.1135584115982056, "best_model_checkpoint": "./model/emotion_classification/checkpoint-301", "epoch": 30.0, "eval_steps": 500, "global_step": 1290, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 9.972609476841367e-05, "loss": 1.9172, "step": 43 }, { "epoch": 1.0, "eval_accuracy": 0.43333333333333335, "eval_f1": 0.32631946371225834, "eval_loss": 1.5750515460968018, "eval_runtime": 4.0633, "eval_samples_per_second": 29.533, "eval_steps_per_second": 1.969, "step": 43 }, { "epoch": 2.0, "learning_rate": 9.890738003669029e-05, "loss": 1.4505, "step": 86 }, { "epoch": 2.0, "eval_accuracy": 0.5333333333333333, "eval_f1": 0.4650626007978949, "eval_loss": 1.3041330575942993, "eval_runtime": 4.1206, "eval_samples_per_second": 29.122, "eval_steps_per_second": 1.941, "step": 86 }, { "epoch": 3.0, "learning_rate": 9.755282581475769e-05, "loss": 1.1121, "step": 129 }, { "epoch": 3.0, "eval_accuracy": 0.48333333333333334, "eval_f1": 0.46841408327694595, "eval_loss": 1.2902252674102783, "eval_runtime": 4.2082, "eval_samples_per_second": 28.516, "eval_steps_per_second": 1.901, "step": 129 }, { "epoch": 4.0, "learning_rate": 9.567727288213005e-05, "loss": 0.8491, "step": 172 }, { "epoch": 4.0, "eval_accuracy": 0.5166666666666667, "eval_f1": 0.49163216814145605, "eval_loss": 1.2308896780014038, "eval_runtime": 4.2459, "eval_samples_per_second": 28.263, "eval_steps_per_second": 1.884, "step": 172 }, { "epoch": 5.0, "learning_rate": 9.330127018922194e-05, "loss": 0.6168, "step": 215 }, { "epoch": 5.0, "eval_accuracy": 0.5583333333333333, "eval_f1": 0.5309547133900583, "eval_loss": 1.2573224306106567, "eval_runtime": 4.1217, "eval_samples_per_second": 29.114, "eval_steps_per_second": 1.941, "step": 215 }, { "epoch": 6.0, "learning_rate": 9.045084971874738e-05, "loss": 0.3953, "step": 258 }, { "epoch": 6.0, "eval_accuracy": 0.575, "eval_f1": 0.5400530867590878, "eval_loss": 1.1502172946929932, "eval_runtime": 4.3017, "eval_samples_per_second": 27.896, "eval_steps_per_second": 1.86, "step": 258 }, { "epoch": 7.0, "learning_rate": 8.715724127386972e-05, "loss": 0.3048, "step": 301 }, { "epoch": 7.0, "eval_accuracy": 0.65, "eval_f1": 0.6231481481481482, "eval_loss": 1.1135584115982056, "eval_runtime": 4.1683, "eval_samples_per_second": 28.789, "eval_steps_per_second": 1.919, "step": 301 }, { "epoch": 8.0, "learning_rate": 8.345653031794292e-05, "loss": 0.1875, "step": 344 }, { "epoch": 8.0, "eval_accuracy": 0.5666666666666667, "eval_f1": 0.5597741659473293, "eval_loss": 1.4224319458007812, "eval_runtime": 4.2212, "eval_samples_per_second": 28.428, "eval_steps_per_second": 1.895, "step": 344 }, { "epoch": 9.0, "learning_rate": 7.938926261462366e-05, "loss": 0.1277, "step": 387 }, { "epoch": 9.0, "eval_accuracy": 0.6166666666666667, "eval_f1": 0.6011135939243728, "eval_loss": 1.346667766571045, "eval_runtime": 3.955, "eval_samples_per_second": 30.341, "eval_steps_per_second": 2.023, "step": 387 }, { "epoch": 10.0, "learning_rate": 7.500000000000001e-05, "loss": 0.1123, "step": 430 }, { "epoch": 10.0, "eval_accuracy": 0.5833333333333334, "eval_f1": 0.5656809749645115, "eval_loss": 1.583767294883728, "eval_runtime": 4.0704, "eval_samples_per_second": 29.481, "eval_steps_per_second": 1.965, "step": 430 }, { "epoch": 11.0, "learning_rate": 7.033683215379002e-05, "loss": 0.1123, "step": 473 }, { "epoch": 11.0, "eval_accuracy": 0.5833333333333334, "eval_f1": 0.5549914858886633, "eval_loss": 1.50627601146698, "eval_runtime": 4.0673, "eval_samples_per_second": 29.504, "eval_steps_per_second": 1.967, "step": 473 }, { "epoch": 12.0, "learning_rate": 6.545084971874738e-05, "loss": 0.0694, "step": 516 }, { "epoch": 12.0, "eval_accuracy": 0.55, "eval_f1": 0.5320146001860588, "eval_loss": 1.7733001708984375, "eval_runtime": 4.0123, "eval_samples_per_second": 29.908, "eval_steps_per_second": 1.994, "step": 516 }, { "epoch": 13.0, "learning_rate": 6.0395584540887963e-05, "loss": 0.0499, "step": 559 }, { "epoch": 13.0, "eval_accuracy": 0.5833333333333334, "eval_f1": 0.5536173850790786, "eval_loss": 1.6328585147857666, "eval_runtime": 4.0403, "eval_samples_per_second": 29.701, "eval_steps_per_second": 1.98, "step": 559 }, { "epoch": 14.0, "learning_rate": 5.522642316338268e-05, "loss": 0.0367, "step": 602 }, { "epoch": 14.0, "eval_accuracy": 0.5833333333333334, "eval_f1": 0.5684853336495889, "eval_loss": 1.6878242492675781, "eval_runtime": 3.9845, "eval_samples_per_second": 30.116, "eval_steps_per_second": 2.008, "step": 602 }, { "epoch": 15.0, "learning_rate": 5e-05, "loss": 0.0291, "step": 645 }, { "epoch": 15.0, "eval_accuracy": 0.575, "eval_f1": 0.5392005606664051, "eval_loss": 1.685531735420227, "eval_runtime": 3.9802, "eval_samples_per_second": 30.149, "eval_steps_per_second": 2.01, "step": 645 }, { "epoch": 16.0, "learning_rate": 4.477357683661734e-05, "loss": 0.0284, "step": 688 }, { "epoch": 16.0, "eval_accuracy": 0.6083333333333333, "eval_f1": 0.5880117663277057, "eval_loss": 1.7869312763214111, "eval_runtime": 4.0749, "eval_samples_per_second": 29.448, "eval_steps_per_second": 1.963, "step": 688 }, { "epoch": 17.0, "learning_rate": 3.960441545911204e-05, "loss": 0.0316, "step": 731 }, { "epoch": 17.0, "eval_accuracy": 0.5916666666666667, "eval_f1": 0.566969594710963, "eval_loss": 1.5830930471420288, "eval_runtime": 4.0457, "eval_samples_per_second": 29.661, "eval_steps_per_second": 1.977, "step": 731 }, { "epoch": 18.0, "learning_rate": 3.4549150281252636e-05, "loss": 0.0273, "step": 774 }, { "epoch": 18.0, "eval_accuracy": 0.625, "eval_f1": 0.598442406793843, "eval_loss": 1.5932706594467163, "eval_runtime": 4.1291, "eval_samples_per_second": 29.062, "eval_steps_per_second": 1.937, "step": 774 }, { "epoch": 19.0, "learning_rate": 2.9663167846209998e-05, "loss": 0.0234, "step": 817 }, { "epoch": 19.0, "eval_accuracy": 0.5833333333333334, "eval_f1": 0.5652118457947398, "eval_loss": 1.7830352783203125, "eval_runtime": 4.2401, "eval_samples_per_second": 28.301, "eval_steps_per_second": 1.887, "step": 817 }, { "epoch": 20.0, "learning_rate": 2.500000000000001e-05, "loss": 0.0194, "step": 860 }, { "epoch": 20.0, "eval_accuracy": 0.6083333333333333, "eval_f1": 0.5878385871868214, "eval_loss": 1.6804471015930176, "eval_runtime": 3.9914, "eval_samples_per_second": 30.065, "eval_steps_per_second": 2.004, "step": 860 }, { "epoch": 21.0, "learning_rate": 2.061073738537635e-05, "loss": 0.0214, "step": 903 }, { "epoch": 21.0, "eval_accuracy": 0.6, "eval_f1": 0.5700534489379734, "eval_loss": 1.596238374710083, "eval_runtime": 4.0432, "eval_samples_per_second": 29.68, "eval_steps_per_second": 1.979, "step": 903 }, { "epoch": 22.0, "learning_rate": 1.6543469682057106e-05, "loss": 0.0204, "step": 946 }, { "epoch": 22.0, "eval_accuracy": 0.625, "eval_f1": 0.5992033196773662, "eval_loss": 1.5684361457824707, "eval_runtime": 4.0843, "eval_samples_per_second": 29.381, "eval_steps_per_second": 1.959, "step": 946 }, { "epoch": 23.0, "learning_rate": 1.2842758726130283e-05, "loss": 0.0178, "step": 989 }, { "epoch": 23.0, "eval_accuracy": 0.625, "eval_f1": 0.5992033196773662, "eval_loss": 1.592431664466858, "eval_runtime": 4.0813, "eval_samples_per_second": 29.402, "eval_steps_per_second": 1.96, "step": 989 }, { "epoch": 24.0, "learning_rate": 9.549150281252633e-06, "loss": 0.0173, "step": 1032 }, { "epoch": 24.0, "eval_accuracy": 0.6166666666666667, "eval_f1": 0.5932522595359776, "eval_loss": 1.6227874755859375, "eval_runtime": 4.2004, "eval_samples_per_second": 28.569, "eval_steps_per_second": 1.905, "step": 1032 }, { "epoch": 25.0, "learning_rate": 6.698729810778065e-06, "loss": 0.016, "step": 1075 }, { "epoch": 25.0, "eval_accuracy": 0.6333333333333333, "eval_f1": 0.6072678358063984, "eval_loss": 1.6176973581314087, "eval_runtime": 4.0151, "eval_samples_per_second": 29.887, "eval_steps_per_second": 1.992, "step": 1075 }, { "epoch": 26.0, "learning_rate": 4.322727117869951e-06, "loss": 0.016, "step": 1118 }, { "epoch": 26.0, "eval_accuracy": 0.625, "eval_f1": 0.6009141292059813, "eval_loss": 1.6267857551574707, "eval_runtime": 4.055, "eval_samples_per_second": 29.593, "eval_steps_per_second": 1.973, "step": 1118 }, { "epoch": 27.0, "learning_rate": 2.4471741852423237e-06, "loss": 0.016, "step": 1161 }, { "epoch": 27.0, "eval_accuracy": 0.625, "eval_f1": 0.6009141292059813, "eval_loss": 1.6387226581573486, "eval_runtime": 4.2902, "eval_samples_per_second": 27.971, "eval_steps_per_second": 1.865, "step": 1161 }, { "epoch": 28.0, "learning_rate": 1.0926199633097157e-06, "loss": 0.0159, "step": 1204 }, { "epoch": 28.0, "eval_accuracy": 0.625, "eval_f1": 0.6009141292059813, "eval_loss": 1.6403223276138306, "eval_runtime": 4.1003, "eval_samples_per_second": 29.266, "eval_steps_per_second": 1.951, "step": 1204 }, { "epoch": 29.0, "learning_rate": 2.7390523158633554e-07, "loss": 0.0162, "step": 1247 }, { "epoch": 29.0, "eval_accuracy": 0.625, "eval_f1": 0.6009141292059813, "eval_loss": 1.6409173011779785, "eval_runtime": 4.0023, "eval_samples_per_second": 29.983, "eval_steps_per_second": 1.999, "step": 1247 }, { "epoch": 30.0, "learning_rate": 0.0, "loss": 0.018, "step": 1290 }, { "epoch": 30.0, "eval_accuracy": 0.625, "eval_f1": 0.6009141292059813, "eval_loss": 1.6411793231964111, "eval_runtime": 4.269, "eval_samples_per_second": 28.11, "eval_steps_per_second": 1.874, "step": 1290 }, { "epoch": 30.0, "step": 1290, "total_flos": 1.5809215993675776e+18, "train_loss": 0.25586533430934877, "train_runtime": 2391.6751, "train_samples_per_second": 8.53, "train_steps_per_second": 0.539 } ], "logging_steps": 500, "max_steps": 1290, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 1.5809215993675776e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }