{ "best_global_step": 80, "best_metric": 0.04149133339524269, "best_model_checkpoint": "models/me5-large-retraining\\checkpoint-80", "epoch": 4.0, "eval_steps": 5, "global_step": 148, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_loss": 0.10950354486703873, "eval_pearson_cosine": 0.7806081242807599, "eval_runtime": 154.7907, "eval_samples_per_second": 6.499, "eval_spearman_cosine": 0.7843279594448466, "eval_steps_per_second": 0.814, "step": 0 }, { "epoch": 0.13513513513513514, "grad_norm": 9.38223934173584, "learning_rate": 1e-05, "loss": 0.6784, "step": 5 }, { "epoch": 0.13513513513513514, "eval_loss": 0.07647562772035599, "eval_pearson_cosine": 0.8193313583571735, "eval_runtime": 154.4069, "eval_samples_per_second": 6.515, "eval_spearman_cosine": 0.8122999445241028, "eval_steps_per_second": 0.816, "step": 5 }, { "epoch": 0.2702702702702703, "grad_norm": 8.665059089660645, "learning_rate": 1e-05, "loss": 0.5088, "step": 10 }, { "epoch": 0.2702702702702703, "eval_loss": 0.05329965054988861, "eval_pearson_cosine": 0.846437709431274, "eval_runtime": 157.2408, "eval_samples_per_second": 6.398, "eval_spearman_cosine": 0.8303112726354404, "eval_steps_per_second": 0.801, "step": 10 }, { "epoch": 0.40540540540540543, "grad_norm": 21.96602439880371, "learning_rate": 1e-05, "loss": 0.4364, "step": 15 }, { "epoch": 0.40540540540540543, "eval_loss": 0.047497160732746124, "eval_pearson_cosine": 0.8497791216636505, "eval_runtime": 153.8649, "eval_samples_per_second": 6.538, "eval_spearman_cosine": 0.8338916292060913, "eval_steps_per_second": 0.819, "step": 15 }, { "epoch": 0.5405405405405406, "grad_norm": 10.698002815246582, "learning_rate": 1e-05, "loss": 0.3456, "step": 20 }, { "epoch": 0.5405405405405406, "eval_loss": 0.043546345084905624, "eval_pearson_cosine": 0.8481768671521202, "eval_runtime": 154.4013, "eval_samples_per_second": 6.515, "eval_spearman_cosine": 0.8344803713886917, "eval_steps_per_second": 0.816, "step": 20 }, { "epoch": 0.6756756756756757, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.1423, "step": 25 }, { "epoch": 0.6756756756756757, "eval_loss": 0.042350731790065765, "eval_pearson_cosine": 0.8419294236114762, "eval_runtime": 153.4308, "eval_samples_per_second": 6.557, "eval_spearman_cosine": 0.8324197823497284, "eval_steps_per_second": 0.821, "step": 25 }, { "epoch": 0.8108108108108109, "grad_norm": 11.502903938293457, "learning_rate": 1e-05, "loss": 0.2852, "step": 30 }, { "epoch": 0.8108108108108109, "eval_loss": 0.04431174322962761, "eval_pearson_cosine": 0.8311833878571917, "eval_runtime": 153.1117, "eval_samples_per_second": 6.57, "eval_spearman_cosine": 0.8270800450821792, "eval_steps_per_second": 0.823, "step": 30 }, { "epoch": 0.9459459459459459, "grad_norm": 13.956666946411133, "learning_rate": 1e-05, "loss": 0.2616, "step": 35 }, { "epoch": 0.9459459459459459, "eval_loss": 0.05144113302230835, "eval_pearson_cosine": 0.8323883964862309, "eval_runtime": 153.6257, "eval_samples_per_second": 6.548, "eval_spearman_cosine": 0.8261627064456549, "eval_steps_per_second": 0.82, "step": 35 }, { "epoch": 1.0810810810810811, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.1451, "step": 40 }, { "epoch": 1.0810810810810811, "eval_loss": 0.052108317613601685, "eval_pearson_cosine": 0.8322596480425719, "eval_runtime": 152.7732, "eval_samples_per_second": 6.585, "eval_spearman_cosine": 0.8232463910787938, "eval_steps_per_second": 0.825, "step": 40 }, { "epoch": 1.2162162162162162, "grad_norm": 11.031551361083984, "learning_rate": 1e-05, "loss": 0.2046, "step": 45 }, { "epoch": 1.2162162162162162, "eval_loss": 0.049648430198431015, "eval_pearson_cosine": 0.8345968763719022, "eval_runtime": 151.5419, "eval_samples_per_second": 6.638, "eval_spearman_cosine": 0.8220928743948337, "eval_steps_per_second": 0.831, "step": 45 }, { "epoch": 1.3513513513513513, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.055, "step": 50 }, { "epoch": 1.3513513513513513, "eval_loss": 0.05155247077345848, "eval_pearson_cosine": 0.8295312877735412, "eval_runtime": 152.4645, "eval_samples_per_second": 6.598, "eval_spearman_cosine": 0.8197002635410278, "eval_steps_per_second": 0.826, "step": 50 }, { "epoch": 1.4864864864864864, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.0956, "step": 55 }, { "epoch": 1.4864864864864864, "eval_loss": 0.05453842505812645, "eval_pearson_cosine": 0.8258612961974536, "eval_runtime": 151.2464, "eval_samples_per_second": 6.651, "eval_spearman_cosine": 0.8190362223126076, "eval_steps_per_second": 0.833, "step": 55 }, { "epoch": 1.6216216216216215, "grad_norm": 10.073129653930664, "learning_rate": 1e-05, "loss": 0.1213, "step": 60 }, { "epoch": 1.6216216216216215, "eval_loss": 0.05331311747431755, "eval_pearson_cosine": 0.8280490163439709, "eval_runtime": 151.9002, "eval_samples_per_second": 6.623, "eval_spearman_cosine": 0.8212679517806181, "eval_steps_per_second": 0.829, "step": 60 }, { "epoch": 1.7567567567567568, "grad_norm": 8.610294342041016, "learning_rate": 1e-05, "loss": 0.2378, "step": 65 }, { "epoch": 1.7567567567567568, "eval_loss": 0.04638493061065674, "eval_pearson_cosine": 0.8348764332747338, "eval_runtime": 151.3483, "eval_samples_per_second": 6.647, "eval_spearman_cosine": 0.8253343633484949, "eval_steps_per_second": 0.833, "step": 65 }, { "epoch": 1.8918918918918919, "grad_norm": 7.2265729904174805, "learning_rate": 1e-05, "loss": 0.2723, "step": 70 }, { "epoch": 1.8918918918918919, "eval_loss": 0.04580773040652275, "eval_pearson_cosine": 0.8327080612027193, "eval_runtime": 151.0951, "eval_samples_per_second": 6.658, "eval_spearman_cosine": 0.8249373111883099, "eval_steps_per_second": 0.834, "step": 70 }, { "epoch": 2.027027027027027, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.0603, "step": 75 }, { "epoch": 2.027027027027027, "eval_loss": 0.04667947068810463, "eval_pearson_cosine": 0.8292172905350813, "eval_runtime": 150.817, "eval_samples_per_second": 6.67, "eval_spearman_cosine": 0.8226234223032437, "eval_steps_per_second": 0.835, "step": 75 }, { "epoch": 2.1621621621621623, "grad_norm": 7.278922080993652, "learning_rate": 1e-05, "loss": 0.1089, "step": 80 }, { "epoch": 2.1621621621621623, "eval_loss": 0.04149133339524269, "eval_pearson_cosine": 0.8348787263877363, "eval_runtime": 151.9077, "eval_samples_per_second": 6.622, "eval_spearman_cosine": 0.8262517044196894, "eval_steps_per_second": 0.829, "step": 80 }, { "epoch": 2.2972972972972974, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.0813, "step": 85 }, { "epoch": 2.2972972972972974, "eval_loss": 0.041691090911626816, "eval_pearson_cosine": 0.834739922043313, "eval_runtime": 151.4069, "eval_samples_per_second": 6.644, "eval_spearman_cosine": 0.8269978977904043, "eval_steps_per_second": 0.832, "step": 85 }, { "epoch": 2.4324324324324325, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.0, "step": 90 }, { "epoch": 2.4324324324324325, "eval_loss": 0.043681543320417404, "eval_pearson_cosine": 0.8300953895591171, "eval_runtime": 151.2024, "eval_samples_per_second": 6.653, "eval_spearman_cosine": 0.8249920776743955, "eval_steps_per_second": 0.833, "step": 90 }, { "epoch": 2.5675675675675675, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.0436, "step": 95 }, { "epoch": 2.5675675675675675, "eval_loss": 0.04666070267558098, "eval_pearson_cosine": 0.8280036278693699, "eval_runtime": 150.6994, "eval_samples_per_second": 6.676, "eval_spearman_cosine": 0.8241911129581995, "eval_steps_per_second": 0.836, "step": 95 }, { "epoch": 2.7027027027027026, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.0, "step": 100 }, { "epoch": 2.7027027027027026, "eval_loss": 0.04513184353709221, "eval_pearson_cosine": 0.827662426497365, "eval_runtime": 151.7254, "eval_samples_per_second": 6.63, "eval_spearman_cosine": 0.8241911153867979, "eval_steps_per_second": 0.83, "step": 100 }, { "epoch": 2.8378378378378377, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.0, "step": 105 }, { "epoch": 2.8378378378378377, "eval_loss": 0.045100126415491104, "eval_pearson_cosine": 0.8271686930217653, "eval_runtime": 150.6791, "eval_samples_per_second": 6.676, "eval_spearman_cosine": 0.8242595734942031, "eval_steps_per_second": 0.836, "step": 105 }, { "epoch": 2.972972972972973, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.0271, "step": 110 }, { "epoch": 2.972972972972973, "eval_loss": 0.04326998442411423, "eval_pearson_cosine": 0.8242998475213165, "eval_runtime": 151.5644, "eval_samples_per_second": 6.637, "eval_spearman_cosine": 0.8243348749833265, "eval_steps_per_second": 0.831, "step": 110 }, { "epoch": 3.108108108108108, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.007, "step": 115 }, { "epoch": 3.108108108108108, "eval_loss": 0.050163887441158295, "eval_pearson_cosine": 0.8100599157021782, "eval_runtime": 149.8939, "eval_samples_per_second": 6.711, "eval_spearman_cosine": 0.8195085832550942, "eval_steps_per_second": 0.841, "step": 115 }, { "epoch": 3.2432432432432434, "grad_norm": 5.909173011779785, "learning_rate": 1e-05, "loss": 0.1025, "step": 120 }, { "epoch": 3.2432432432432434, "eval_loss": 0.052336592227220535, "eval_pearson_cosine": 0.8092739374985023, "eval_runtime": 151.2544, "eval_samples_per_second": 6.651, "eval_spearman_cosine": 0.8194743493718913, "eval_steps_per_second": 0.833, "step": 120 }, { "epoch": 3.3783783783783785, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.1244, "step": 125 }, { "epoch": 3.3783783783783785, "eval_loss": 0.05269436165690422, "eval_pearson_cosine": 0.8212737827789367, "eval_runtime": 150.843, "eval_samples_per_second": 6.669, "eval_spearman_cosine": 0.8250605382131625, "eval_steps_per_second": 0.835, "step": 125 }, { "epoch": 3.5135135135135136, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.0, "step": 130 }, { "epoch": 3.5135135135135136, "eval_loss": 0.05343884229660034, "eval_pearson_cosine": 0.8257663666576973, "eval_runtime": 150.9582, "eval_samples_per_second": 6.664, "eval_spearman_cosine": 0.8261900896885362, "eval_steps_per_second": 0.835, "step": 130 }, { "epoch": 3.6486486486486487, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.0259, "step": 135 }, { "epoch": 3.6486486486486487, "eval_loss": 0.05709109827876091, "eval_pearson_cosine": 0.8294106767298322, "eval_runtime": 151.0358, "eval_samples_per_second": 6.661, "eval_spearman_cosine": 0.826217475365987, "eval_steps_per_second": 0.834, "step": 135 }, { "epoch": 3.7837837837837838, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.0939, "step": 140 }, { "epoch": 3.7837837837837838, "eval_loss": 0.05264894291758537, "eval_pearson_cosine": 0.8333850683405307, "eval_runtime": 151.069, "eval_samples_per_second": 6.659, "eval_spearman_cosine": 0.8273128026466706, "eval_steps_per_second": 0.834, "step": 140 }, { "epoch": 3.918918918918919, "grad_norm": 7.900262832641602, "learning_rate": 1e-05, "loss": 0.1038, "step": 145 }, { "epoch": 3.918918918918919, "eval_loss": 0.0527377724647522, "eval_pearson_cosine": 0.8334934833047165, "eval_runtime": 150.8011, "eval_samples_per_second": 6.671, "eval_spearman_cosine": 0.8261353280714282, "eval_steps_per_second": 0.836, "step": 145 } ], "logging_steps": 5, "max_steps": 148, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }