| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 79, | |
| "global_step": 782, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "eval_cosine_accuracy@1": 0.232, | |
| "eval_cosine_accuracy@10": 0.456, | |
| "eval_cosine_accuracy@3": 0.3365, | |
| "eval_cosine_accuracy@5": 0.389, | |
| "eval_cosine_map@100": 0.14334533833340427, | |
| "eval_cosine_mrr@10": 0.2988200396825396, | |
| "eval_cosine_ndcg@10": 0.19590710853524823, | |
| "eval_cosine_precision@1": 0.232, | |
| "eval_cosine_precision@10": 0.10355000000000002, | |
| "eval_cosine_precision@3": 0.177, | |
| "eval_cosine_precision@5": 0.1452, | |
| "eval_cosine_recall@1": 0.0548117388039026, | |
| "eval_cosine_recall@10": 0.18984275866485092, | |
| "eval_cosine_recall@3": 0.11065666057433865, | |
| "eval_cosine_recall@5": 0.1464970398055657, | |
| "eval_loss": 2.950530767440796, | |
| "eval_runtime": 28.7759, | |
| "eval_samples_per_second": 69.502, | |
| "eval_steps_per_second": 1.112, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.10112, | |
| "grad_norm": 4.060101509094238, | |
| "learning_rate": 4.936708860759494e-05, | |
| "loss": 1.7256, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.10112, | |
| "eval_cosine_accuracy@1": 0.4915, | |
| "eval_cosine_accuracy@10": 0.709, | |
| "eval_cosine_accuracy@3": 0.6055, | |
| "eval_cosine_accuracy@5": 0.65, | |
| "eval_cosine_map@100": 0.37501954096960194, | |
| "eval_cosine_mrr@10": 0.5594632936507937, | |
| "eval_cosine_ndcg@10": 0.4471963096331387, | |
| "eval_cosine_precision@1": 0.4915, | |
| "eval_cosine_precision@10": 0.20455, | |
| "eval_cosine_precision@3": 0.3835, | |
| "eval_cosine_precision@5": 0.30490000000000006, | |
| "eval_cosine_recall@1": 0.15563947633799366, | |
| "eval_cosine_recall@10": 0.4411529898745371, | |
| "eval_cosine_recall@3": 0.30525816129147315, | |
| "eval_cosine_recall@5": 0.3661646057217302, | |
| "eval_loss": 0.869914174079895, | |
| "eval_runtime": 32.9054, | |
| "eval_samples_per_second": 60.78, | |
| "eval_steps_per_second": 0.972, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.20224, | |
| "grad_norm": 3.455092668533325, | |
| "learning_rate": 4.4452347083926033e-05, | |
| "loss": 0.8044, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.20224, | |
| "eval_cosine_accuracy@1": 0.538, | |
| "eval_cosine_accuracy@10": 0.7835, | |
| "eval_cosine_accuracy@3": 0.671, | |
| "eval_cosine_accuracy@5": 0.72, | |
| "eval_cosine_map@100": 0.4288531840469143, | |
| "eval_cosine_mrr@10": 0.6170097222222221, | |
| "eval_cosine_ndcg@10": 0.5071737492167014, | |
| "eval_cosine_precision@1": 0.538, | |
| "eval_cosine_precision@10": 0.22805, | |
| "eval_cosine_precision@3": 0.419, | |
| "eval_cosine_precision@5": 0.336, | |
| "eval_cosine_recall@1": 0.18509011905489597, | |
| "eval_cosine_recall@10": 0.5088450856238615, | |
| "eval_cosine_recall@3": 0.35023499358379173, | |
| "eval_cosine_recall@5": 0.42149330607177377, | |
| "eval_loss": 0.6144608855247498, | |
| "eval_runtime": 33.0639, | |
| "eval_samples_per_second": 60.489, | |
| "eval_steps_per_second": 0.968, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.30336, | |
| "grad_norm": 3.299586534500122, | |
| "learning_rate": 3.883357041251778e-05, | |
| "loss": 0.6254, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.30336, | |
| "eval_cosine_accuracy@1": 0.571, | |
| "eval_cosine_accuracy@10": 0.807, | |
| "eval_cosine_accuracy@3": 0.697, | |
| "eval_cosine_accuracy@5": 0.747, | |
| "eval_cosine_map@100": 0.45768471305297626, | |
| "eval_cosine_mrr@10": 0.6465369047619048, | |
| "eval_cosine_ndcg@10": 0.5352649581411899, | |
| "eval_cosine_precision@1": 0.571, | |
| "eval_cosine_precision@10": 0.23940000000000003, | |
| "eval_cosine_precision@3": 0.446, | |
| "eval_cosine_precision@5": 0.3534000000000001, | |
| "eval_cosine_recall@1": 0.19728028074863738, | |
| "eval_cosine_recall@10": 0.5336122046159524, | |
| "eval_cosine_recall@3": 0.3745295533176793, | |
| "eval_cosine_recall@5": 0.44429268806860206, | |
| "eval_loss": 0.5068339705467224, | |
| "eval_runtime": 32.9503, | |
| "eval_samples_per_second": 60.697, | |
| "eval_steps_per_second": 0.971, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.40448, | |
| "grad_norm": 2.563180685043335, | |
| "learning_rate": 3.321479374110953e-05, | |
| "loss": 0.5215, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.40448, | |
| "eval_cosine_accuracy@1": 0.6075, | |
| "eval_cosine_accuracy@10": 0.855, | |
| "eval_cosine_accuracy@3": 0.7495, | |
| "eval_cosine_accuracy@5": 0.8005, | |
| "eval_cosine_map@100": 0.4886299583708059, | |
| "eval_cosine_mrr@10": 0.6879220238095239, | |
| "eval_cosine_ndcg@10": 0.5725427973832125, | |
| "eval_cosine_precision@1": 0.6075, | |
| "eval_cosine_precision@10": 0.25585, | |
| "eval_cosine_precision@3": 0.4696666666666666, | |
| "eval_cosine_precision@5": 0.3763000000000001, | |
| "eval_cosine_recall@1": 0.21129895871002893, | |
| "eval_cosine_recall@10": 0.5768064108469603, | |
| "eval_cosine_recall@3": 0.3986053442055735, | |
| "eval_cosine_recall@5": 0.4793458893809688, | |
| "eval_loss": 0.4230406582355499, | |
| "eval_runtime": 32.9072, | |
| "eval_samples_per_second": 60.777, | |
| "eval_steps_per_second": 0.972, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.5056, | |
| "grad_norm": 2.914031744003296, | |
| "learning_rate": 2.759601706970128e-05, | |
| "loss": 0.4507, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.5056, | |
| "eval_cosine_accuracy@1": 0.6285, | |
| "eval_cosine_accuracy@10": 0.8675, | |
| "eval_cosine_accuracy@3": 0.753, | |
| "eval_cosine_accuracy@5": 0.812, | |
| "eval_cosine_map@100": 0.5064669285677783, | |
| "eval_cosine_mrr@10": 0.7049886904761903, | |
| "eval_cosine_ndcg@10": 0.5906093986384608, | |
| "eval_cosine_precision@1": 0.6285, | |
| "eval_cosine_precision@10": 0.26185, | |
| "eval_cosine_precision@3": 0.4774999999999999, | |
| "eval_cosine_precision@5": 0.38320000000000004, | |
| "eval_cosine_recall@1": 0.22386914380224288, | |
| "eval_cosine_recall@10": 0.5924252077258059, | |
| "eval_cosine_recall@3": 0.4076045673568921, | |
| "eval_cosine_recall@5": 0.4945358123439019, | |
| "eval_loss": 0.3786991834640503, | |
| "eval_runtime": 32.9486, | |
| "eval_samples_per_second": 60.701, | |
| "eval_steps_per_second": 0.971, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.60672, | |
| "grad_norm": 2.8829078674316406, | |
| "learning_rate": 2.197724039829303e-05, | |
| "loss": 0.4014, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.60672, | |
| "eval_cosine_accuracy@1": 0.6365, | |
| "eval_cosine_accuracy@10": 0.876, | |
| "eval_cosine_accuracy@3": 0.771, | |
| "eval_cosine_accuracy@5": 0.8255, | |
| "eval_cosine_map@100": 0.5197156334384408, | |
| "eval_cosine_mrr@10": 0.7141259920634916, | |
| "eval_cosine_ndcg@10": 0.601418022062486, | |
| "eval_cosine_precision@1": 0.6365, | |
| "eval_cosine_precision@10": 0.26645, | |
| "eval_cosine_precision@3": 0.48833333333333334, | |
| "eval_cosine_precision@5": 0.3937, | |
| "eval_cosine_recall@1": 0.22605882194620797, | |
| "eval_cosine_recall@10": 0.6025888884821827, | |
| "eval_cosine_recall@3": 0.42357208114816375, | |
| "eval_cosine_recall@5": 0.509917211803928, | |
| "eval_loss": 0.3428954482078552, | |
| "eval_runtime": 33.015, | |
| "eval_samples_per_second": 60.579, | |
| "eval_steps_per_second": 0.969, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.70784, | |
| "grad_norm": 2.7049639225006104, | |
| "learning_rate": 1.6358463726884778e-05, | |
| "loss": 0.3779, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.70784, | |
| "eval_cosine_accuracy@1": 0.6455, | |
| "eval_cosine_accuracy@10": 0.8815, | |
| "eval_cosine_accuracy@3": 0.784, | |
| "eval_cosine_accuracy@5": 0.8345, | |
| "eval_cosine_map@100": 0.5322933983250332, | |
| "eval_cosine_mrr@10": 0.7239722222222216, | |
| "eval_cosine_ndcg@10": 0.6129085102767301, | |
| "eval_cosine_precision@1": 0.6455, | |
| "eval_cosine_precision@10": 0.27020000000000005, | |
| "eval_cosine_precision@3": 0.49716666666666665, | |
| "eval_cosine_precision@5": 0.39830000000000004, | |
| "eval_cosine_recall@1": 0.2337385415837256, | |
| "eval_cosine_recall@10": 0.6114790331062327, | |
| "eval_cosine_recall@3": 0.43581210236649, | |
| "eval_cosine_recall@5": 0.5201249831864138, | |
| "eval_loss": 0.32787367701530457, | |
| "eval_runtime": 33.0268, | |
| "eval_samples_per_second": 60.557, | |
| "eval_steps_per_second": 0.969, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.80896, | |
| "grad_norm": 2.0675265789031982, | |
| "learning_rate": 1.073968705547653e-05, | |
| "loss": 0.3463, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.80896, | |
| "eval_cosine_accuracy@1": 0.656, | |
| "eval_cosine_accuracy@10": 0.889, | |
| "eval_cosine_accuracy@3": 0.793, | |
| "eval_cosine_accuracy@5": 0.839, | |
| "eval_cosine_map@100": 0.5425920958735082, | |
| "eval_cosine_mrr@10": 0.7340821428571421, | |
| "eval_cosine_ndcg@10": 0.6223116530478937, | |
| "eval_cosine_precision@1": 0.656, | |
| "eval_cosine_precision@10": 0.27415, | |
| "eval_cosine_precision@3": 0.504, | |
| "eval_cosine_precision@5": 0.4019000000000001, | |
| "eval_cosine_recall@1": 0.23843478790276604, | |
| "eval_cosine_recall@10": 0.6201721618621493, | |
| "eval_cosine_recall@3": 0.442197840531875, | |
| "eval_cosine_recall@5": 0.5237564546607093, | |
| "eval_loss": 0.30987244844436646, | |
| "eval_runtime": 33.1417, | |
| "eval_samples_per_second": 60.347, | |
| "eval_steps_per_second": 0.966, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.91008, | |
| "grad_norm": 1.9332996606826782, | |
| "learning_rate": 5.120910384068279e-06, | |
| "loss": 0.3202, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.91008, | |
| "eval_cosine_accuracy@1": 0.6545, | |
| "eval_cosine_accuracy@10": 0.89, | |
| "eval_cosine_accuracy@3": 0.7925, | |
| "eval_cosine_accuracy@5": 0.841, | |
| "eval_cosine_map@100": 0.5411107394124216, | |
| "eval_cosine_mrr@10": 0.7325833333333327, | |
| "eval_cosine_ndcg@10": 0.6220169640997574, | |
| "eval_cosine_precision@1": 0.6545, | |
| "eval_cosine_precision@10": 0.27425, | |
| "eval_cosine_precision@3": 0.5031666666666667, | |
| "eval_cosine_precision@5": 0.4011, | |
| "eval_cosine_recall@1": 0.23852962917260728, | |
| "eval_cosine_recall@10": 0.6212349911293668, | |
| "eval_cosine_recall@3": 0.4422107764041644, | |
| "eval_cosine_recall@5": 0.5245186089315591, | |
| "eval_loss": 0.29744598269462585, | |
| "eval_runtime": 33.0411, | |
| "eval_samples_per_second": 60.531, | |
| "eval_steps_per_second": 0.968, | |
| "step": 711 | |
| } | |
| ], | |
| "logging_steps": 79, | |
| "max_steps": 782, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 79, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |