| { |
| "best_metric": 1.084230661392212, |
| "best_model_checkpoint": "narrativesAnalogues-MPNet/checkpoint-396", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 396, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.12626262626262627, |
| "grad_norm": 0.0, |
| "learning_rate": 1.25e-05, |
| "loss": 1.3871, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.25252525252525254, |
| "grad_norm": 53.9383430480957, |
| "learning_rate": 2.5e-05, |
| "loss": 0.9039, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3787878787878788, |
| "grad_norm": 6.326577663421631, |
| "learning_rate": 2.915730337078652e-05, |
| "loss": 0.766, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.5050505050505051, |
| "grad_norm": 132.8404083251953, |
| "learning_rate": 2.7752808988764048e-05, |
| "loss": 0.9601, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6313131313131313, |
| "grad_norm": 17.578922271728516, |
| "learning_rate": 2.6348314606741574e-05, |
| "loss": 0.2709, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.7575757575757576, |
| "grad_norm": 0.053114086389541626, |
| "learning_rate": 2.4943820224719103e-05, |
| "loss": 0.1721, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8838383838383839, |
| "grad_norm": 0.048109784722328186, |
| "learning_rate": 2.353932584269663e-05, |
| "loss": 0.3169, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.2392297983169556, |
| "eval_runtime": 150.6481, |
| "eval_samples_per_second": 1.274, |
| "eval_steps_per_second": 0.08, |
| "eval_validation_pearson_cosine": 0.8564219550563432, |
| "eval_validation_spearman_cosine": 0.7226767730845685, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.0101010101010102, |
| "grad_norm": 0.04682952165603638, |
| "learning_rate": 2.2134831460674157e-05, |
| "loss": 0.1533, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.1363636363636362, |
| "grad_norm": 0.007105494383722544, |
| "learning_rate": 2.0730337078651686e-05, |
| "loss": 0.0193, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.2626262626262625, |
| "grad_norm": 5.023591041564941, |
| "learning_rate": 1.9325842696629215e-05, |
| "loss": 0.0494, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.3888888888888888, |
| "grad_norm": 0.0029278132133185863, |
| "learning_rate": 1.792134831460674e-05, |
| "loss": 0.0029, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.5151515151515151, |
| "grad_norm": 58.362091064453125, |
| "learning_rate": 1.651685393258427e-05, |
| "loss": 0.0173, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.6414141414141414, |
| "grad_norm": 0.03666149452328682, |
| "learning_rate": 1.51123595505618e-05, |
| "loss": 0.0201, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.7676767676767677, |
| "grad_norm": 5.760815143585205, |
| "learning_rate": 1.3707865168539327e-05, |
| "loss": 0.0031, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.893939393939394, |
| "grad_norm": 54.69737243652344, |
| "learning_rate": 1.2303370786516854e-05, |
| "loss": 0.2825, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 1.084230661392212, |
| "eval_runtime": 181.003, |
| "eval_samples_per_second": 1.061, |
| "eval_steps_per_second": 0.066, |
| "eval_validation_pearson_cosine": 0.8520244193617519, |
| "eval_validation_spearman_cosine": 0.7196384923568855, |
| "step": 396 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 594, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.01 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|