| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 79, | |
| "global_step": 782, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "eval_cosine_accuracy@1": 0.3175, | |
| "eval_cosine_accuracy@10": 0.5405, | |
| "eval_cosine_accuracy@3": 0.4285, | |
| "eval_cosine_accuracy@5": 0.472, | |
| "eval_cosine_map@100": 0.20295798777707474, | |
| "eval_cosine_mrr@10": 0.38407797619047646, | |
| "eval_cosine_ndcg@10": 0.26664320865382884, | |
| "eval_cosine_precision@1": 0.3175, | |
| "eval_cosine_precision@10": 0.13325, | |
| "eval_cosine_precision@3": 0.239, | |
| "eval_cosine_precision@5": 0.192, | |
| "eval_cosine_recall@1": 0.08617124715273779, | |
| "eval_cosine_recall@10": 0.25484566308493556, | |
| "eval_cosine_recall@3": 0.16493508136064844, | |
| "eval_cosine_recall@5": 0.20128376057243394, | |
| "eval_loss": 2.716991424560547, | |
| "eval_runtime": 93.6318, | |
| "eval_samples_per_second": 21.36, | |
| "eval_steps_per_second": 0.342, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.10112, | |
| "grad_norm": 15.54442024230957, | |
| "learning_rate": 4.936708860759494e-05, | |
| "loss": 0.6977, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.10112, | |
| "eval_cosine_accuracy@1": 0.611, | |
| "eval_cosine_accuracy@10": 0.867, | |
| "eval_cosine_accuracy@3": 0.7485, | |
| "eval_cosine_accuracy@5": 0.802, | |
| "eval_cosine_map@100": 0.4842158123573022, | |
| "eval_cosine_mrr@10": 0.6918936507936507, | |
| "eval_cosine_ndcg@10": 0.5731052776794775, | |
| "eval_cosine_precision@1": 0.611, | |
| "eval_cosine_precision@10": 0.26645, | |
| "eval_cosine_precision@3": 0.48033333333333333, | |
| "eval_cosine_precision@5": 0.3886, | |
| "eval_cosine_recall@1": 0.19547386901725833, | |
| "eval_cosine_recall@10": 0.582591867678025, | |
| "eval_cosine_recall@3": 0.38178869386096725, | |
| "eval_cosine_recall@5": 0.47092166449135925, | |
| "eval_loss": 0.3820127546787262, | |
| "eval_runtime": 94.5547, | |
| "eval_samples_per_second": 21.152, | |
| "eval_steps_per_second": 0.338, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.20224, | |
| "grad_norm": 11.99033260345459, | |
| "learning_rate": 4.4452347083926033e-05, | |
| "loss": 0.3696, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.20224, | |
| "eval_cosine_accuracy@1": 0.6395, | |
| "eval_cosine_accuracy@10": 0.8815, | |
| "eval_cosine_accuracy@3": 0.771, | |
| "eval_cosine_accuracy@5": 0.8215, | |
| "eval_cosine_map@100": 0.518232118499808, | |
| "eval_cosine_mrr@10": 0.7167617063492063, | |
| "eval_cosine_ndcg@10": 0.6015016271902943, | |
| "eval_cosine_precision@1": 0.6395, | |
| "eval_cosine_precision@10": 0.27385000000000004, | |
| "eval_cosine_precision@3": 0.4965, | |
| "eval_cosine_precision@5": 0.39940000000000003, | |
| "eval_cosine_recall@1": 0.21741346374345538, | |
| "eval_cosine_recall@10": 0.6032130601430059, | |
| "eval_cosine_recall@3": 0.411628698268123, | |
| "eval_cosine_recall@5": 0.49754964310983035, | |
| "eval_loss": 0.33004653453826904, | |
| "eval_runtime": 94.5446, | |
| "eval_samples_per_second": 21.154, | |
| "eval_steps_per_second": 0.338, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.30336, | |
| "grad_norm": 7.6528425216674805, | |
| "learning_rate": 3.883357041251778e-05, | |
| "loss": 0.3079, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.30336, | |
| "eval_cosine_accuracy@1": 0.6435, | |
| "eval_cosine_accuracy@10": 0.878, | |
| "eval_cosine_accuracy@3": 0.775, | |
| "eval_cosine_accuracy@5": 0.8215, | |
| "eval_cosine_map@100": 0.5383738084900617, | |
| "eval_cosine_mrr@10": 0.7194515873015865, | |
| "eval_cosine_ndcg@10": 0.6155692521247632, | |
| "eval_cosine_precision@1": 0.6435, | |
| "eval_cosine_precision@10": 0.2824, | |
| "eval_cosine_precision@3": 0.5153333333333333, | |
| "eval_cosine_precision@5": 0.4173, | |
| "eval_cosine_recall@1": 0.2183039885889674, | |
| "eval_cosine_recall@10": 0.6148750852993538, | |
| "eval_cosine_recall@3": 0.4253399672316602, | |
| "eval_cosine_recall@5": 0.5135875884980249, | |
| "eval_loss": 0.26221561431884766, | |
| "eval_runtime": 94.3747, | |
| "eval_samples_per_second": 21.192, | |
| "eval_steps_per_second": 0.339, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.40448, | |
| "grad_norm": 7.216766834259033, | |
| "learning_rate": 3.321479374110953e-05, | |
| "loss": 0.2471, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.40448, | |
| "eval_cosine_accuracy@1": 0.6615, | |
| "eval_cosine_accuracy@10": 0.9055, | |
| "eval_cosine_accuracy@3": 0.8035, | |
| "eval_cosine_accuracy@5": 0.8565, | |
| "eval_cosine_map@100": 0.5627360990015163, | |
| "eval_cosine_mrr@10": 0.7432835317460309, | |
| "eval_cosine_ndcg@10": 0.6395267347742315, | |
| "eval_cosine_precision@1": 0.6615, | |
| "eval_cosine_precision@10": 0.2906, | |
| "eval_cosine_precision@3": 0.524, | |
| "eval_cosine_precision@5": 0.4232, | |
| "eval_cosine_recall@1": 0.23236891554586947, | |
| "eval_cosine_recall@10": 0.6438819108166526, | |
| "eval_cosine_recall@3": 0.44429773626339913, | |
| "eval_cosine_recall@5": 0.536914325811876, | |
| "eval_loss": 0.23154108226299286, | |
| "eval_runtime": 94.3188, | |
| "eval_samples_per_second": 21.205, | |
| "eval_steps_per_second": 0.339, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.5056, | |
| "grad_norm": 6.026930332183838, | |
| "learning_rate": 2.759601706970128e-05, | |
| "loss": 0.2129, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.5056, | |
| "eval_cosine_accuracy@1": 0.691, | |
| "eval_cosine_accuracy@10": 0.923, | |
| "eval_cosine_accuracy@3": 0.8285, | |
| "eval_cosine_accuracy@5": 0.877, | |
| "eval_cosine_map@100": 0.5988053929521469, | |
| "eval_cosine_mrr@10": 0.7697992063492058, | |
| "eval_cosine_ndcg@10": 0.6734974207335584, | |
| "eval_cosine_precision@1": 0.691, | |
| "eval_cosine_precision@10": 0.30469999999999997, | |
| "eval_cosine_precision@3": 0.5516666666666665, | |
| "eval_cosine_precision@5": 0.4453000000000001, | |
| "eval_cosine_recall@1": 0.2451002751627125, | |
| "eval_cosine_recall@10": 0.6720344352159927, | |
| "eval_cosine_recall@3": 0.4752111086795837, | |
| "eval_cosine_recall@5": 0.5689998210708447, | |
| "eval_loss": 0.1887313276529312, | |
| "eval_runtime": 94.6158, | |
| "eval_samples_per_second": 21.138, | |
| "eval_steps_per_second": 0.338, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.60672, | |
| "grad_norm": 6.972903728485107, | |
| "learning_rate": 2.197724039829303e-05, | |
| "loss": 0.1782, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.60672, | |
| "eval_cosine_accuracy@1": 0.7035, | |
| "eval_cosine_accuracy@10": 0.9305, | |
| "eval_cosine_accuracy@3": 0.842, | |
| "eval_cosine_accuracy@5": 0.888, | |
| "eval_cosine_map@100": 0.6071005790786185, | |
| "eval_cosine_mrr@10": 0.781702777777777, | |
| "eval_cosine_ndcg@10": 0.6808092483733388, | |
| "eval_cosine_precision@1": 0.7035, | |
| "eval_cosine_precision@10": 0.3067, | |
| "eval_cosine_precision@3": 0.562, | |
| "eval_cosine_precision@5": 0.4495, | |
| "eval_cosine_recall@1": 0.25034756468684327, | |
| "eval_cosine_recall@10": 0.6768599428870521, | |
| "eval_cosine_recall@3": 0.4826217978486824, | |
| "eval_cosine_recall@5": 0.5710397017343101, | |
| "eval_loss": 0.17932943999767303, | |
| "eval_runtime": 94.3886, | |
| "eval_samples_per_second": 21.189, | |
| "eval_steps_per_second": 0.339, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.70784, | |
| "grad_norm": 5.119401931762695, | |
| "learning_rate": 1.6358463726884778e-05, | |
| "loss": 0.1601, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.70784, | |
| "eval_cosine_accuracy@1": 0.7265, | |
| "eval_cosine_accuracy@10": 0.942, | |
| "eval_cosine_accuracy@3": 0.862, | |
| "eval_cosine_accuracy@5": 0.903, | |
| "eval_cosine_map@100": 0.6338351727535749, | |
| "eval_cosine_mrr@10": 0.8012726190476185, | |
| "eval_cosine_ndcg@10": 0.7052858216291891, | |
| "eval_cosine_precision@1": 0.7265, | |
| "eval_cosine_precision@10": 0.31645000000000006, | |
| "eval_cosine_precision@3": 0.5746666666666667, | |
| "eval_cosine_precision@5": 0.4643, | |
| "eval_cosine_recall@1": 0.26731400185268916, | |
| "eval_cosine_recall@10": 0.6964398448419685, | |
| "eval_cosine_recall@3": 0.5006986582097148, | |
| "eval_cosine_recall@5": 0.5936279161380148, | |
| "eval_loss": 0.1466195434331894, | |
| "eval_runtime": 94.3179, | |
| "eval_samples_per_second": 21.205, | |
| "eval_steps_per_second": 0.339, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.80896, | |
| "grad_norm": 5.0713982582092285, | |
| "learning_rate": 1.073968705547653e-05, | |
| "loss": 0.1269, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.80896, | |
| "eval_cosine_accuracy@1": 0.7235, | |
| "eval_cosine_accuracy@10": 0.9405, | |
| "eval_cosine_accuracy@3": 0.858, | |
| "eval_cosine_accuracy@5": 0.901, | |
| "eval_cosine_map@100": 0.6337029110249771, | |
| "eval_cosine_mrr@10": 0.7987942460317458, | |
| "eval_cosine_ndcg@10": 0.702721958317939, | |
| "eval_cosine_precision@1": 0.7235, | |
| "eval_cosine_precision@10": 0.31520000000000004, | |
| "eval_cosine_precision@3": 0.573, | |
| "eval_cosine_precision@5": 0.462, | |
| "eval_cosine_recall@1": 0.2652290650574583, | |
| "eval_cosine_recall@10": 0.6960502030808166, | |
| "eval_cosine_recall@3": 0.4984801971314316, | |
| "eval_cosine_recall@5": 0.5914106569114703, | |
| "eval_loss": 0.14512212574481964, | |
| "eval_runtime": 94.7293, | |
| "eval_samples_per_second": 21.113, | |
| "eval_steps_per_second": 0.338, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.91008, | |
| "grad_norm": 4.609349250793457, | |
| "learning_rate": 5.120910384068279e-06, | |
| "loss": 0.1047, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.91008, | |
| "eval_cosine_accuracy@1": 0.744, | |
| "eval_cosine_accuracy@10": 0.9465, | |
| "eval_cosine_accuracy@3": 0.873, | |
| "eval_cosine_accuracy@5": 0.914, | |
| "eval_cosine_map@100": 0.6546158733411345, | |
| "eval_cosine_mrr@10": 0.8165956349206344, | |
| "eval_cosine_ndcg@10": 0.7225101978855893, | |
| "eval_cosine_precision@1": 0.744, | |
| "eval_cosine_precision@10": 0.323, | |
| "eval_cosine_precision@3": 0.5885, | |
| "eval_cosine_precision@5": 0.4711, | |
| "eval_cosine_recall@1": 0.27657015412151464, | |
| "eval_cosine_recall@10": 0.7128635786880844, | |
| "eval_cosine_recall@3": 0.5135658345260549, | |
| "eval_cosine_recall@5": 0.6083090142946717, | |
| "eval_loss": 0.11803647130727768, | |
| "eval_runtime": 94.4018, | |
| "eval_samples_per_second": 21.186, | |
| "eval_steps_per_second": 0.339, | |
| "step": 711 | |
| } | |
| ], | |
| "logging_steps": 79, | |
| "max_steps": 782, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 79, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |