| { | |
| "best_global_step": 80, | |
| "best_metric": 0.04149133339524269, | |
| "best_model_checkpoint": "models/me5-large-retraining\\checkpoint-80", | |
| "epoch": 4.0, | |
| "eval_steps": 5, | |
| "global_step": 148, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "eval_loss": 0.10950354486703873, | |
| "eval_pearson_cosine": 0.7806081242807599, | |
| "eval_runtime": 154.7907, | |
| "eval_samples_per_second": 6.499, | |
| "eval_spearman_cosine": 0.7843279594448466, | |
| "eval_steps_per_second": 0.814, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.13513513513513514, | |
| "grad_norm": 9.38223934173584, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6784, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.13513513513513514, | |
| "eval_loss": 0.07647562772035599, | |
| "eval_pearson_cosine": 0.8193313583571735, | |
| "eval_runtime": 154.4069, | |
| "eval_samples_per_second": 6.515, | |
| "eval_spearman_cosine": 0.8122999445241028, | |
| "eval_steps_per_second": 0.816, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 8.665059089660645, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5088, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "eval_loss": 0.05329965054988861, | |
| "eval_pearson_cosine": 0.846437709431274, | |
| "eval_runtime": 157.2408, | |
| "eval_samples_per_second": 6.398, | |
| "eval_spearman_cosine": 0.8303112726354404, | |
| "eval_steps_per_second": 0.801, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.40540540540540543, | |
| "grad_norm": 21.96602439880371, | |
| "learning_rate": 1e-05, | |
| "loss": 0.4364, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.40540540540540543, | |
| "eval_loss": 0.047497160732746124, | |
| "eval_pearson_cosine": 0.8497791216636505, | |
| "eval_runtime": 153.8649, | |
| "eval_samples_per_second": 6.538, | |
| "eval_spearman_cosine": 0.8338916292060913, | |
| "eval_steps_per_second": 0.819, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 10.698002815246582, | |
| "learning_rate": 1e-05, | |
| "loss": 0.3456, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "eval_loss": 0.043546345084905624, | |
| "eval_pearson_cosine": 0.8481768671521202, | |
| "eval_runtime": 154.4013, | |
| "eval_samples_per_second": 6.515, | |
| "eval_spearman_cosine": 0.8344803713886917, | |
| "eval_steps_per_second": 0.816, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.6756756756756757, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.1423, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.6756756756756757, | |
| "eval_loss": 0.042350731790065765, | |
| "eval_pearson_cosine": 0.8419294236114762, | |
| "eval_runtime": 153.4308, | |
| "eval_samples_per_second": 6.557, | |
| "eval_spearman_cosine": 0.8324197823497284, | |
| "eval_steps_per_second": 0.821, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "grad_norm": 11.502903938293457, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2852, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "eval_loss": 0.04431174322962761, | |
| "eval_pearson_cosine": 0.8311833878571917, | |
| "eval_runtime": 153.1117, | |
| "eval_samples_per_second": 6.57, | |
| "eval_spearman_cosine": 0.8270800450821792, | |
| "eval_steps_per_second": 0.823, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.9459459459459459, | |
| "grad_norm": 13.956666946411133, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2616, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.9459459459459459, | |
| "eval_loss": 0.05144113302230835, | |
| "eval_pearson_cosine": 0.8323883964862309, | |
| "eval_runtime": 153.6257, | |
| "eval_samples_per_second": 6.548, | |
| "eval_spearman_cosine": 0.8261627064456549, | |
| "eval_steps_per_second": 0.82, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.0810810810810811, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.1451, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.0810810810810811, | |
| "eval_loss": 0.052108317613601685, | |
| "eval_pearson_cosine": 0.8322596480425719, | |
| "eval_runtime": 152.7732, | |
| "eval_samples_per_second": 6.585, | |
| "eval_spearman_cosine": 0.8232463910787938, | |
| "eval_steps_per_second": 0.825, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.2162162162162162, | |
| "grad_norm": 11.031551361083984, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2046, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.2162162162162162, | |
| "eval_loss": 0.049648430198431015, | |
| "eval_pearson_cosine": 0.8345968763719022, | |
| "eval_runtime": 151.5419, | |
| "eval_samples_per_second": 6.638, | |
| "eval_spearman_cosine": 0.8220928743948337, | |
| "eval_steps_per_second": 0.831, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.3513513513513513, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.055, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.3513513513513513, | |
| "eval_loss": 0.05155247077345848, | |
| "eval_pearson_cosine": 0.8295312877735412, | |
| "eval_runtime": 152.4645, | |
| "eval_samples_per_second": 6.598, | |
| "eval_spearman_cosine": 0.8197002635410278, | |
| "eval_steps_per_second": 0.826, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.4864864864864864, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0956, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.4864864864864864, | |
| "eval_loss": 0.05453842505812645, | |
| "eval_pearson_cosine": 0.8258612961974536, | |
| "eval_runtime": 151.2464, | |
| "eval_samples_per_second": 6.651, | |
| "eval_spearman_cosine": 0.8190362223126076, | |
| "eval_steps_per_second": 0.833, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.6216216216216215, | |
| "grad_norm": 10.073129653930664, | |
| "learning_rate": 1e-05, | |
| "loss": 0.1213, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.6216216216216215, | |
| "eval_loss": 0.05331311747431755, | |
| "eval_pearson_cosine": 0.8280490163439709, | |
| "eval_runtime": 151.9002, | |
| "eval_samples_per_second": 6.623, | |
| "eval_spearman_cosine": 0.8212679517806181, | |
| "eval_steps_per_second": 0.829, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.7567567567567568, | |
| "grad_norm": 8.610294342041016, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2378, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.7567567567567568, | |
| "eval_loss": 0.04638493061065674, | |
| "eval_pearson_cosine": 0.8348764332747338, | |
| "eval_runtime": 151.3483, | |
| "eval_samples_per_second": 6.647, | |
| "eval_spearman_cosine": 0.8253343633484949, | |
| "eval_steps_per_second": 0.833, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.8918918918918919, | |
| "grad_norm": 7.2265729904174805, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2723, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.8918918918918919, | |
| "eval_loss": 0.04580773040652275, | |
| "eval_pearson_cosine": 0.8327080612027193, | |
| "eval_runtime": 151.0951, | |
| "eval_samples_per_second": 6.658, | |
| "eval_spearman_cosine": 0.8249373111883099, | |
| "eval_steps_per_second": 0.834, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.027027027027027, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0603, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 2.027027027027027, | |
| "eval_loss": 0.04667947068810463, | |
| "eval_pearson_cosine": 0.8292172905350813, | |
| "eval_runtime": 150.817, | |
| "eval_samples_per_second": 6.67, | |
| "eval_spearman_cosine": 0.8226234223032437, | |
| "eval_steps_per_second": 0.835, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 2.1621621621621623, | |
| "grad_norm": 7.278922080993652, | |
| "learning_rate": 1e-05, | |
| "loss": 0.1089, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.1621621621621623, | |
| "eval_loss": 0.04149133339524269, | |
| "eval_pearson_cosine": 0.8348787263877363, | |
| "eval_runtime": 151.9077, | |
| "eval_samples_per_second": 6.622, | |
| "eval_spearman_cosine": 0.8262517044196894, | |
| "eval_steps_per_second": 0.829, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.2972972972972974, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0813, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 2.2972972972972974, | |
| "eval_loss": 0.041691090911626816, | |
| "eval_pearson_cosine": 0.834739922043313, | |
| "eval_runtime": 151.4069, | |
| "eval_samples_per_second": 6.644, | |
| "eval_spearman_cosine": 0.8269978977904043, | |
| "eval_steps_per_second": 0.832, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 2.4324324324324325, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.4324324324324325, | |
| "eval_loss": 0.043681543320417404, | |
| "eval_pearson_cosine": 0.8300953895591171, | |
| "eval_runtime": 151.2024, | |
| "eval_samples_per_second": 6.653, | |
| "eval_spearman_cosine": 0.8249920776743955, | |
| "eval_steps_per_second": 0.833, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.5675675675675675, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0436, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 2.5675675675675675, | |
| "eval_loss": 0.04666070267558098, | |
| "eval_pearson_cosine": 0.8280036278693699, | |
| "eval_runtime": 150.6994, | |
| "eval_samples_per_second": 6.676, | |
| "eval_spearman_cosine": 0.8241911129581995, | |
| "eval_steps_per_second": 0.836, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 2.7027027027027026, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.7027027027027026, | |
| "eval_loss": 0.04513184353709221, | |
| "eval_pearson_cosine": 0.827662426497365, | |
| "eval_runtime": 151.7254, | |
| "eval_samples_per_second": 6.63, | |
| "eval_spearman_cosine": 0.8241911153867979, | |
| "eval_steps_per_second": 0.83, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.8378378378378377, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.8378378378378377, | |
| "eval_loss": 0.045100126415491104, | |
| "eval_pearson_cosine": 0.8271686930217653, | |
| "eval_runtime": 150.6791, | |
| "eval_samples_per_second": 6.676, | |
| "eval_spearman_cosine": 0.8242595734942031, | |
| "eval_steps_per_second": 0.836, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.972972972972973, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0271, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.972972972972973, | |
| "eval_loss": 0.04326998442411423, | |
| "eval_pearson_cosine": 0.8242998475213165, | |
| "eval_runtime": 151.5644, | |
| "eval_samples_per_second": 6.637, | |
| "eval_spearman_cosine": 0.8243348749833265, | |
| "eval_steps_per_second": 0.831, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 3.108108108108108, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.007, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 3.108108108108108, | |
| "eval_loss": 0.050163887441158295, | |
| "eval_pearson_cosine": 0.8100599157021782, | |
| "eval_runtime": 149.8939, | |
| "eval_samples_per_second": 6.711, | |
| "eval_spearman_cosine": 0.8195085832550942, | |
| "eval_steps_per_second": 0.841, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 3.2432432432432434, | |
| "grad_norm": 5.909173011779785, | |
| "learning_rate": 1e-05, | |
| "loss": 0.1025, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.2432432432432434, | |
| "eval_loss": 0.052336592227220535, | |
| "eval_pearson_cosine": 0.8092739374985023, | |
| "eval_runtime": 151.2544, | |
| "eval_samples_per_second": 6.651, | |
| "eval_spearman_cosine": 0.8194743493718913, | |
| "eval_steps_per_second": 0.833, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.3783783783783785, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.1244, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 3.3783783783783785, | |
| "eval_loss": 0.05269436165690422, | |
| "eval_pearson_cosine": 0.8212737827789367, | |
| "eval_runtime": 150.843, | |
| "eval_samples_per_second": 6.669, | |
| "eval_spearman_cosine": 0.8250605382131625, | |
| "eval_steps_per_second": 0.835, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 3.5135135135135136, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 3.5135135135135136, | |
| "eval_loss": 0.05343884229660034, | |
| "eval_pearson_cosine": 0.8257663666576973, | |
| "eval_runtime": 150.9582, | |
| "eval_samples_per_second": 6.664, | |
| "eval_spearman_cosine": 0.8261900896885362, | |
| "eval_steps_per_second": 0.835, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 3.6486486486486487, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0259, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 3.6486486486486487, | |
| "eval_loss": 0.05709109827876091, | |
| "eval_pearson_cosine": 0.8294106767298322, | |
| "eval_runtime": 151.0358, | |
| "eval_samples_per_second": 6.661, | |
| "eval_spearman_cosine": 0.826217475365987, | |
| "eval_steps_per_second": 0.834, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 3.7837837837837838, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0939, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.7837837837837838, | |
| "eval_loss": 0.05264894291758537, | |
| "eval_pearson_cosine": 0.8333850683405307, | |
| "eval_runtime": 151.069, | |
| "eval_samples_per_second": 6.659, | |
| "eval_spearman_cosine": 0.8273128026466706, | |
| "eval_steps_per_second": 0.834, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.918918918918919, | |
| "grad_norm": 7.900262832641602, | |
| "learning_rate": 1e-05, | |
| "loss": 0.1038, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 3.918918918918919, | |
| "eval_loss": 0.0527377724647522, | |
| "eval_pearson_cosine": 0.8334934833047165, | |
| "eval_runtime": 150.8011, | |
| "eval_samples_per_second": 6.671, | |
| "eval_spearman_cosine": 0.8261353280714282, | |
| "eval_steps_per_second": 0.836, | |
| "step": 145 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 148, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |