{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 79, "global_step": 782, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_cosine_accuracy@1": 0.3175, "eval_cosine_accuracy@10": 0.5405, "eval_cosine_accuracy@3": 0.4285, "eval_cosine_accuracy@5": 0.472, "eval_cosine_map@100": 0.20295798777707474, "eval_cosine_mrr@10": 0.38407797619047646, "eval_cosine_ndcg@10": 0.26664320865382884, "eval_cosine_precision@1": 0.3175, "eval_cosine_precision@10": 0.13325, "eval_cosine_precision@3": 0.239, "eval_cosine_precision@5": 0.192, "eval_cosine_recall@1": 0.08617124715273779, "eval_cosine_recall@10": 0.25484566308493556, "eval_cosine_recall@3": 0.16493508136064844, "eval_cosine_recall@5": 0.20128376057243394, "eval_loss": 2.716991424560547, "eval_runtime": 93.6318, "eval_samples_per_second": 21.36, "eval_steps_per_second": 0.342, "step": 0 }, { "epoch": 0.10112, "grad_norm": 15.54442024230957, "learning_rate": 4.936708860759494e-05, "loss": 0.6977, "step": 79 }, { "epoch": 0.10112, "eval_cosine_accuracy@1": 0.611, "eval_cosine_accuracy@10": 0.867, "eval_cosine_accuracy@3": 0.7485, "eval_cosine_accuracy@5": 0.802, "eval_cosine_map@100": 0.4842158123573022, "eval_cosine_mrr@10": 0.6918936507936507, "eval_cosine_ndcg@10": 0.5731052776794775, "eval_cosine_precision@1": 0.611, "eval_cosine_precision@10": 0.26645, "eval_cosine_precision@3": 0.48033333333333333, "eval_cosine_precision@5": 0.3886, "eval_cosine_recall@1": 0.19547386901725833, "eval_cosine_recall@10": 0.582591867678025, "eval_cosine_recall@3": 0.38178869386096725, "eval_cosine_recall@5": 0.47092166449135925, "eval_loss": 0.3820127546787262, "eval_runtime": 94.5547, "eval_samples_per_second": 21.152, "eval_steps_per_second": 0.338, "step": 79 }, { "epoch": 0.20224, "grad_norm": 11.99033260345459, "learning_rate": 4.4452347083926033e-05, "loss": 0.3696, "step": 158 }, { "epoch": 0.20224, "eval_cosine_accuracy@1": 0.6395, "eval_cosine_accuracy@10": 0.8815, "eval_cosine_accuracy@3": 0.771, "eval_cosine_accuracy@5": 0.8215, "eval_cosine_map@100": 0.518232118499808, "eval_cosine_mrr@10": 0.7167617063492063, "eval_cosine_ndcg@10": 0.6015016271902943, "eval_cosine_precision@1": 0.6395, "eval_cosine_precision@10": 0.27385000000000004, "eval_cosine_precision@3": 0.4965, "eval_cosine_precision@5": 0.39940000000000003, "eval_cosine_recall@1": 0.21741346374345538, "eval_cosine_recall@10": 0.6032130601430059, "eval_cosine_recall@3": 0.411628698268123, "eval_cosine_recall@5": 0.49754964310983035, "eval_loss": 0.33004653453826904, "eval_runtime": 94.5446, "eval_samples_per_second": 21.154, "eval_steps_per_second": 0.338, "step": 158 }, { "epoch": 0.30336, "grad_norm": 7.6528425216674805, "learning_rate": 3.883357041251778e-05, "loss": 0.3079, "step": 237 }, { "epoch": 0.30336, "eval_cosine_accuracy@1": 0.6435, "eval_cosine_accuracy@10": 0.878, "eval_cosine_accuracy@3": 0.775, "eval_cosine_accuracy@5": 0.8215, "eval_cosine_map@100": 0.5383738084900617, "eval_cosine_mrr@10": 0.7194515873015865, "eval_cosine_ndcg@10": 0.6155692521247632, "eval_cosine_precision@1": 0.6435, "eval_cosine_precision@10": 0.2824, "eval_cosine_precision@3": 0.5153333333333333, "eval_cosine_precision@5": 0.4173, "eval_cosine_recall@1": 0.2183039885889674, "eval_cosine_recall@10": 0.6148750852993538, "eval_cosine_recall@3": 0.4253399672316602, "eval_cosine_recall@5": 0.5135875884980249, "eval_loss": 0.26221561431884766, "eval_runtime": 94.3747, "eval_samples_per_second": 21.192, "eval_steps_per_second": 0.339, "step": 237 }, { "epoch": 0.40448, "grad_norm": 7.216766834259033, "learning_rate": 3.321479374110953e-05, "loss": 0.2471, "step": 316 }, { "epoch": 0.40448, "eval_cosine_accuracy@1": 0.6615, "eval_cosine_accuracy@10": 0.9055, "eval_cosine_accuracy@3": 0.8035, "eval_cosine_accuracy@5": 0.8565, "eval_cosine_map@100": 0.5627360990015163, "eval_cosine_mrr@10": 0.7432835317460309, "eval_cosine_ndcg@10": 0.6395267347742315, "eval_cosine_precision@1": 0.6615, "eval_cosine_precision@10": 0.2906, "eval_cosine_precision@3": 0.524, "eval_cosine_precision@5": 0.4232, "eval_cosine_recall@1": 0.23236891554586947, "eval_cosine_recall@10": 0.6438819108166526, "eval_cosine_recall@3": 0.44429773626339913, "eval_cosine_recall@5": 0.536914325811876, "eval_loss": 0.23154108226299286, "eval_runtime": 94.3188, "eval_samples_per_second": 21.205, "eval_steps_per_second": 0.339, "step": 316 }, { "epoch": 0.5056, "grad_norm": 6.026930332183838, "learning_rate": 2.759601706970128e-05, "loss": 0.2129, "step": 395 }, { "epoch": 0.5056, "eval_cosine_accuracy@1": 0.691, "eval_cosine_accuracy@10": 0.923, "eval_cosine_accuracy@3": 0.8285, "eval_cosine_accuracy@5": 0.877, "eval_cosine_map@100": 0.5988053929521469, "eval_cosine_mrr@10": 0.7697992063492058, "eval_cosine_ndcg@10": 0.6734974207335584, "eval_cosine_precision@1": 0.691, "eval_cosine_precision@10": 0.30469999999999997, "eval_cosine_precision@3": 0.5516666666666665, "eval_cosine_precision@5": 0.4453000000000001, "eval_cosine_recall@1": 0.2451002751627125, "eval_cosine_recall@10": 0.6720344352159927, "eval_cosine_recall@3": 0.4752111086795837, "eval_cosine_recall@5": 0.5689998210708447, "eval_loss": 0.1887313276529312, "eval_runtime": 94.6158, "eval_samples_per_second": 21.138, "eval_steps_per_second": 0.338, "step": 395 }, { "epoch": 0.60672, "grad_norm": 6.972903728485107, "learning_rate": 2.197724039829303e-05, "loss": 0.1782, "step": 474 }, { "epoch": 0.60672, "eval_cosine_accuracy@1": 0.7035, "eval_cosine_accuracy@10": 0.9305, "eval_cosine_accuracy@3": 0.842, "eval_cosine_accuracy@5": 0.888, "eval_cosine_map@100": 0.6071005790786185, "eval_cosine_mrr@10": 0.781702777777777, "eval_cosine_ndcg@10": 0.6808092483733388, "eval_cosine_precision@1": 0.7035, "eval_cosine_precision@10": 0.3067, "eval_cosine_precision@3": 0.562, "eval_cosine_precision@5": 0.4495, "eval_cosine_recall@1": 0.25034756468684327, "eval_cosine_recall@10": 0.6768599428870521, "eval_cosine_recall@3": 0.4826217978486824, "eval_cosine_recall@5": 0.5710397017343101, "eval_loss": 0.17932943999767303, "eval_runtime": 94.3886, "eval_samples_per_second": 21.189, "eval_steps_per_second": 0.339, "step": 474 }, { "epoch": 0.70784, "grad_norm": 5.119401931762695, "learning_rate": 1.6358463726884778e-05, "loss": 0.1601, "step": 553 }, { "epoch": 0.70784, "eval_cosine_accuracy@1": 0.7265, "eval_cosine_accuracy@10": 0.942, "eval_cosine_accuracy@3": 0.862, "eval_cosine_accuracy@5": 0.903, "eval_cosine_map@100": 0.6338351727535749, "eval_cosine_mrr@10": 0.8012726190476185, "eval_cosine_ndcg@10": 0.7052858216291891, "eval_cosine_precision@1": 0.7265, "eval_cosine_precision@10": 0.31645000000000006, "eval_cosine_precision@3": 0.5746666666666667, "eval_cosine_precision@5": 0.4643, "eval_cosine_recall@1": 0.26731400185268916, "eval_cosine_recall@10": 0.6964398448419685, "eval_cosine_recall@3": 0.5006986582097148, "eval_cosine_recall@5": 0.5936279161380148, "eval_loss": 0.1466195434331894, "eval_runtime": 94.3179, "eval_samples_per_second": 21.205, "eval_steps_per_second": 0.339, "step": 553 }, { "epoch": 0.80896, "grad_norm": 5.0713982582092285, "learning_rate": 1.073968705547653e-05, "loss": 0.1269, "step": 632 }, { "epoch": 0.80896, "eval_cosine_accuracy@1": 0.7235, "eval_cosine_accuracy@10": 0.9405, "eval_cosine_accuracy@3": 0.858, "eval_cosine_accuracy@5": 0.901, "eval_cosine_map@100": 0.6337029110249771, "eval_cosine_mrr@10": 0.7987942460317458, "eval_cosine_ndcg@10": 0.702721958317939, "eval_cosine_precision@1": 0.7235, "eval_cosine_precision@10": 0.31520000000000004, "eval_cosine_precision@3": 0.573, "eval_cosine_precision@5": 0.462, "eval_cosine_recall@1": 0.2652290650574583, "eval_cosine_recall@10": 0.6960502030808166, "eval_cosine_recall@3": 0.4984801971314316, "eval_cosine_recall@5": 0.5914106569114703, "eval_loss": 0.14512212574481964, "eval_runtime": 94.7293, "eval_samples_per_second": 21.113, "eval_steps_per_second": 0.338, "step": 632 }, { "epoch": 0.91008, "grad_norm": 4.609349250793457, "learning_rate": 5.120910384068279e-06, "loss": 0.1047, "step": 711 }, { "epoch": 0.91008, "eval_cosine_accuracy@1": 0.744, "eval_cosine_accuracy@10": 0.9465, "eval_cosine_accuracy@3": 0.873, "eval_cosine_accuracy@5": 0.914, "eval_cosine_map@100": 0.6546158733411345, "eval_cosine_mrr@10": 0.8165956349206344, "eval_cosine_ndcg@10": 0.7225101978855893, "eval_cosine_precision@1": 0.744, "eval_cosine_precision@10": 0.323, "eval_cosine_precision@3": 0.5885, "eval_cosine_precision@5": 0.4711, "eval_cosine_recall@1": 0.27657015412151464, "eval_cosine_recall@10": 0.7128635786880844, "eval_cosine_recall@3": 0.5135658345260549, "eval_cosine_recall@5": 0.6083090142946717, "eval_loss": 0.11803647130727768, "eval_runtime": 94.4018, "eval_samples_per_second": 21.186, "eval_steps_per_second": 0.339, "step": 711 } ], "logging_steps": 79, "max_steps": 782, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 79, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }