{ "best_metric": 1.084230661392212, "best_model_checkpoint": "narrativesAnalogues-MPNet/checkpoint-396", "epoch": 2.0, "eval_steps": 500, "global_step": 396, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12626262626262627, "grad_norm": 0.0, "learning_rate": 1.25e-05, "loss": 1.3871, "step": 25 }, { "epoch": 0.25252525252525254, "grad_norm": 53.9383430480957, "learning_rate": 2.5e-05, "loss": 0.9039, "step": 50 }, { "epoch": 0.3787878787878788, "grad_norm": 6.326577663421631, "learning_rate": 2.915730337078652e-05, "loss": 0.766, "step": 75 }, { "epoch": 0.5050505050505051, "grad_norm": 132.8404083251953, "learning_rate": 2.7752808988764048e-05, "loss": 0.9601, "step": 100 }, { "epoch": 0.6313131313131313, "grad_norm": 17.578922271728516, "learning_rate": 2.6348314606741574e-05, "loss": 0.2709, "step": 125 }, { "epoch": 0.7575757575757576, "grad_norm": 0.053114086389541626, "learning_rate": 2.4943820224719103e-05, "loss": 0.1721, "step": 150 }, { "epoch": 0.8838383838383839, "grad_norm": 0.048109784722328186, "learning_rate": 2.353932584269663e-05, "loss": 0.3169, "step": 175 }, { "epoch": 1.0, "eval_loss": 1.2392297983169556, "eval_runtime": 150.6481, "eval_samples_per_second": 1.274, "eval_steps_per_second": 0.08, "eval_validation_pearson_cosine": 0.8564219550563432, "eval_validation_spearman_cosine": 0.7226767730845685, "step": 198 }, { "epoch": 1.0101010101010102, "grad_norm": 0.04682952165603638, "learning_rate": 2.2134831460674157e-05, "loss": 0.1533, "step": 200 }, { "epoch": 1.1363636363636362, "grad_norm": 0.007105494383722544, "learning_rate": 2.0730337078651686e-05, "loss": 0.0193, "step": 225 }, { "epoch": 1.2626262626262625, "grad_norm": 5.023591041564941, "learning_rate": 1.9325842696629215e-05, "loss": 0.0494, "step": 250 }, { "epoch": 1.3888888888888888, "grad_norm": 0.0029278132133185863, "learning_rate": 1.792134831460674e-05, "loss": 0.0029, "step": 275 }, { "epoch": 1.5151515151515151, "grad_norm": 58.362091064453125, "learning_rate": 1.651685393258427e-05, "loss": 0.0173, "step": 300 }, { "epoch": 1.6414141414141414, "grad_norm": 0.03666149452328682, "learning_rate": 1.51123595505618e-05, "loss": 0.0201, "step": 325 }, { "epoch": 1.7676767676767677, "grad_norm": 5.760815143585205, "learning_rate": 1.3707865168539327e-05, "loss": 0.0031, "step": 350 }, { "epoch": 1.893939393939394, "grad_norm": 54.69737243652344, "learning_rate": 1.2303370786516854e-05, "loss": 0.2825, "step": 375 }, { "epoch": 2.0, "eval_loss": 1.084230661392212, "eval_runtime": 181.003, "eval_samples_per_second": 1.061, "eval_steps_per_second": 0.066, "eval_validation_pearson_cosine": 0.8520244193617519, "eval_validation_spearman_cosine": 0.7196384923568855, "step": 396 } ], "logging_steps": 25, "max_steps": 594, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }