{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 79, "global_step": 782, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_cosine_accuracy@1": 0.232, "eval_cosine_accuracy@10": 0.456, "eval_cosine_accuracy@3": 0.3365, "eval_cosine_accuracy@5": 0.389, "eval_cosine_map@100": 0.14334533833340427, "eval_cosine_mrr@10": 0.2988200396825396, "eval_cosine_ndcg@10": 0.19590710853524823, "eval_cosine_precision@1": 0.232, "eval_cosine_precision@10": 0.10355000000000002, "eval_cosine_precision@3": 0.177, "eval_cosine_precision@5": 0.1452, "eval_cosine_recall@1": 0.0548117388039026, "eval_cosine_recall@10": 0.18984275866485092, "eval_cosine_recall@3": 0.11065666057433865, "eval_cosine_recall@5": 0.1464970398055657, "eval_loss": 2.950530767440796, "eval_runtime": 28.7759, "eval_samples_per_second": 69.502, "eval_steps_per_second": 1.112, "step": 0 }, { "epoch": 0.10112, "grad_norm": 4.060101509094238, "learning_rate": 4.936708860759494e-05, "loss": 1.7256, "step": 79 }, { "epoch": 0.10112, "eval_cosine_accuracy@1": 0.4915, "eval_cosine_accuracy@10": 0.709, "eval_cosine_accuracy@3": 0.6055, "eval_cosine_accuracy@5": 0.65, "eval_cosine_map@100": 0.37501954096960194, "eval_cosine_mrr@10": 0.5594632936507937, "eval_cosine_ndcg@10": 0.4471963096331387, "eval_cosine_precision@1": 0.4915, "eval_cosine_precision@10": 0.20455, "eval_cosine_precision@3": 0.3835, "eval_cosine_precision@5": 0.30490000000000006, "eval_cosine_recall@1": 0.15563947633799366, "eval_cosine_recall@10": 0.4411529898745371, "eval_cosine_recall@3": 0.30525816129147315, "eval_cosine_recall@5": 0.3661646057217302, "eval_loss": 0.869914174079895, "eval_runtime": 32.9054, "eval_samples_per_second": 60.78, "eval_steps_per_second": 0.972, "step": 79 }, { "epoch": 0.20224, "grad_norm": 3.455092668533325, "learning_rate": 4.4452347083926033e-05, "loss": 0.8044, "step": 158 }, { "epoch": 0.20224, "eval_cosine_accuracy@1": 0.538, "eval_cosine_accuracy@10": 0.7835, "eval_cosine_accuracy@3": 0.671, "eval_cosine_accuracy@5": 0.72, "eval_cosine_map@100": 0.4288531840469143, "eval_cosine_mrr@10": 0.6170097222222221, "eval_cosine_ndcg@10": 0.5071737492167014, "eval_cosine_precision@1": 0.538, "eval_cosine_precision@10": 0.22805, "eval_cosine_precision@3": 0.419, "eval_cosine_precision@5": 0.336, "eval_cosine_recall@1": 0.18509011905489597, "eval_cosine_recall@10": 0.5088450856238615, "eval_cosine_recall@3": 0.35023499358379173, "eval_cosine_recall@5": 0.42149330607177377, "eval_loss": 0.6144608855247498, "eval_runtime": 33.0639, "eval_samples_per_second": 60.489, "eval_steps_per_second": 0.968, "step": 158 }, { "epoch": 0.30336, "grad_norm": 3.299586534500122, "learning_rate": 3.883357041251778e-05, "loss": 0.6254, "step": 237 }, { "epoch": 0.30336, "eval_cosine_accuracy@1": 0.571, "eval_cosine_accuracy@10": 0.807, "eval_cosine_accuracy@3": 0.697, "eval_cosine_accuracy@5": 0.747, "eval_cosine_map@100": 0.45768471305297626, "eval_cosine_mrr@10": 0.6465369047619048, "eval_cosine_ndcg@10": 0.5352649581411899, "eval_cosine_precision@1": 0.571, "eval_cosine_precision@10": 0.23940000000000003, "eval_cosine_precision@3": 0.446, "eval_cosine_precision@5": 0.3534000000000001, "eval_cosine_recall@1": 0.19728028074863738, "eval_cosine_recall@10": 0.5336122046159524, "eval_cosine_recall@3": 0.3745295533176793, "eval_cosine_recall@5": 0.44429268806860206, "eval_loss": 0.5068339705467224, "eval_runtime": 32.9503, "eval_samples_per_second": 60.697, "eval_steps_per_second": 0.971, "step": 237 }, { "epoch": 0.40448, "grad_norm": 2.563180685043335, "learning_rate": 3.321479374110953e-05, "loss": 0.5215, "step": 316 }, { "epoch": 0.40448, "eval_cosine_accuracy@1": 0.6075, "eval_cosine_accuracy@10": 0.855, "eval_cosine_accuracy@3": 0.7495, "eval_cosine_accuracy@5": 0.8005, "eval_cosine_map@100": 0.4886299583708059, "eval_cosine_mrr@10": 0.6879220238095239, "eval_cosine_ndcg@10": 0.5725427973832125, "eval_cosine_precision@1": 0.6075, "eval_cosine_precision@10": 0.25585, "eval_cosine_precision@3": 0.4696666666666666, "eval_cosine_precision@5": 0.3763000000000001, "eval_cosine_recall@1": 0.21129895871002893, "eval_cosine_recall@10": 0.5768064108469603, "eval_cosine_recall@3": 0.3986053442055735, "eval_cosine_recall@5": 0.4793458893809688, "eval_loss": 0.4230406582355499, "eval_runtime": 32.9072, "eval_samples_per_second": 60.777, "eval_steps_per_second": 0.972, "step": 316 }, { "epoch": 0.5056, "grad_norm": 2.914031744003296, "learning_rate": 2.759601706970128e-05, "loss": 0.4507, "step": 395 }, { "epoch": 0.5056, "eval_cosine_accuracy@1": 0.6285, "eval_cosine_accuracy@10": 0.8675, "eval_cosine_accuracy@3": 0.753, "eval_cosine_accuracy@5": 0.812, "eval_cosine_map@100": 0.5064669285677783, "eval_cosine_mrr@10": 0.7049886904761903, "eval_cosine_ndcg@10": 0.5906093986384608, "eval_cosine_precision@1": 0.6285, "eval_cosine_precision@10": 0.26185, "eval_cosine_precision@3": 0.4774999999999999, "eval_cosine_precision@5": 0.38320000000000004, "eval_cosine_recall@1": 0.22386914380224288, "eval_cosine_recall@10": 0.5924252077258059, "eval_cosine_recall@3": 0.4076045673568921, "eval_cosine_recall@5": 0.4945358123439019, "eval_loss": 0.3786991834640503, "eval_runtime": 32.9486, "eval_samples_per_second": 60.701, "eval_steps_per_second": 0.971, "step": 395 }, { "epoch": 0.60672, "grad_norm": 2.8829078674316406, "learning_rate": 2.197724039829303e-05, "loss": 0.4014, "step": 474 }, { "epoch": 0.60672, "eval_cosine_accuracy@1": 0.6365, "eval_cosine_accuracy@10": 0.876, "eval_cosine_accuracy@3": 0.771, "eval_cosine_accuracy@5": 0.8255, "eval_cosine_map@100": 0.5197156334384408, "eval_cosine_mrr@10": 0.7141259920634916, "eval_cosine_ndcg@10": 0.601418022062486, "eval_cosine_precision@1": 0.6365, "eval_cosine_precision@10": 0.26645, "eval_cosine_precision@3": 0.48833333333333334, "eval_cosine_precision@5": 0.3937, "eval_cosine_recall@1": 0.22605882194620797, "eval_cosine_recall@10": 0.6025888884821827, "eval_cosine_recall@3": 0.42357208114816375, "eval_cosine_recall@5": 0.509917211803928, "eval_loss": 0.3428954482078552, "eval_runtime": 33.015, "eval_samples_per_second": 60.579, "eval_steps_per_second": 0.969, "step": 474 }, { "epoch": 0.70784, "grad_norm": 2.7049639225006104, "learning_rate": 1.6358463726884778e-05, "loss": 0.3779, "step": 553 }, { "epoch": 0.70784, "eval_cosine_accuracy@1": 0.6455, "eval_cosine_accuracy@10": 0.8815, "eval_cosine_accuracy@3": 0.784, "eval_cosine_accuracy@5": 0.8345, "eval_cosine_map@100": 0.5322933983250332, "eval_cosine_mrr@10": 0.7239722222222216, "eval_cosine_ndcg@10": 0.6129085102767301, "eval_cosine_precision@1": 0.6455, "eval_cosine_precision@10": 0.27020000000000005, "eval_cosine_precision@3": 0.49716666666666665, "eval_cosine_precision@5": 0.39830000000000004, "eval_cosine_recall@1": 0.2337385415837256, "eval_cosine_recall@10": 0.6114790331062327, "eval_cosine_recall@3": 0.43581210236649, "eval_cosine_recall@5": 0.5201249831864138, "eval_loss": 0.32787367701530457, "eval_runtime": 33.0268, "eval_samples_per_second": 60.557, "eval_steps_per_second": 0.969, "step": 553 }, { "epoch": 0.80896, "grad_norm": 2.0675265789031982, "learning_rate": 1.073968705547653e-05, "loss": 0.3463, "step": 632 }, { "epoch": 0.80896, "eval_cosine_accuracy@1": 0.656, "eval_cosine_accuracy@10": 0.889, "eval_cosine_accuracy@3": 0.793, "eval_cosine_accuracy@5": 0.839, "eval_cosine_map@100": 0.5425920958735082, "eval_cosine_mrr@10": 0.7340821428571421, "eval_cosine_ndcg@10": 0.6223116530478937, "eval_cosine_precision@1": 0.656, "eval_cosine_precision@10": 0.27415, "eval_cosine_precision@3": 0.504, "eval_cosine_precision@5": 0.4019000000000001, "eval_cosine_recall@1": 0.23843478790276604, "eval_cosine_recall@10": 0.6201721618621493, "eval_cosine_recall@3": 0.442197840531875, "eval_cosine_recall@5": 0.5237564546607093, "eval_loss": 0.30987244844436646, "eval_runtime": 33.1417, "eval_samples_per_second": 60.347, "eval_steps_per_second": 0.966, "step": 632 }, { "epoch": 0.91008, "grad_norm": 1.9332996606826782, "learning_rate": 5.120910384068279e-06, "loss": 0.3202, "step": 711 }, { "epoch": 0.91008, "eval_cosine_accuracy@1": 0.6545, "eval_cosine_accuracy@10": 0.89, "eval_cosine_accuracy@3": 0.7925, "eval_cosine_accuracy@5": 0.841, "eval_cosine_map@100": 0.5411107394124216, "eval_cosine_mrr@10": 0.7325833333333327, "eval_cosine_ndcg@10": 0.6220169640997574, "eval_cosine_precision@1": 0.6545, "eval_cosine_precision@10": 0.27425, "eval_cosine_precision@3": 0.5031666666666667, "eval_cosine_precision@5": 0.4011, "eval_cosine_recall@1": 0.23852962917260728, "eval_cosine_recall@10": 0.6212349911293668, "eval_cosine_recall@3": 0.4422107764041644, "eval_cosine_recall@5": 0.5245186089315591, "eval_loss": 0.29744598269462585, "eval_runtime": 33.0411, "eval_samples_per_second": 60.531, "eval_steps_per_second": 0.968, "step": 711 } ], "logging_steps": 79, "max_steps": 782, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 79, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }