| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8103727714748784, |
| "eval_steps": 100, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0, |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.5733333333333334, |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.88, |
| "eval_NanoBEIR_mean_cosine_map@100": 0.6499700198395969, |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.7025343915343916, |
| "eval_NanoBEIR_mean_cosine_ndcg@1": 0.5733333333333334, |
| "eval_NanoBEIR_mean_cosine_ndcg@5": 0.6933983700329899, |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.5733333333333334, |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.21599999999999997, |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.44222222222222224, |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.8077777777777778, |
| "eval_NanoFEVER_cosine_accuracy@1": 0.66, |
| "eval_NanoFEVER_cosine_accuracy@5": 0.94, |
| "eval_NanoFEVER_cosine_map@100": 0.7456105053991163, |
| "eval_NanoFEVER_cosine_mrr@10": 0.7750000000000001, |
| "eval_NanoFEVER_cosine_ndcg@1": 0.66, |
| "eval_NanoFEVER_cosine_ndcg@5": 0.7909186074531693, |
| "eval_NanoFEVER_cosine_precision@1": 0.66, |
| "eval_NanoFEVER_cosine_precision@5": 0.19999999999999996, |
| "eval_NanoFEVER_cosine_recall@1": 0.6166666666666667, |
| "eval_NanoFEVER_cosine_recall@5": 0.9133333333333333, |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.7, |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.92, |
| "eval_NanoHotpotQA_cosine_map@100": 0.659131307951428, |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.795, |
| "eval_NanoHotpotQA_cosine_ndcg@1": 0.7, |
| "eval_NanoHotpotQA_cosine_ndcg@5": 0.7005437199248264, |
| "eval_NanoHotpotQA_cosine_precision@1": 0.7, |
| "eval_NanoHotpotQA_cosine_precision@5": 0.292, |
| "eval_NanoHotpotQA_cosine_recall@1": 0.35, |
| "eval_NanoHotpotQA_cosine_recall@5": 0.73, |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.36, |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.78, |
| "eval_NanoMSMARCO_cosine_map@100": 0.5451682461682461, |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.5376031746031745, |
| "eval_NanoMSMARCO_cosine_ndcg@1": 0.36, |
| "eval_NanoMSMARCO_cosine_ndcg@5": 0.5887327827209738, |
| "eval_NanoMSMARCO_cosine_precision@1": 0.36, |
| "eval_NanoMSMARCO_cosine_precision@5": 0.156, |
| "eval_NanoMSMARCO_cosine_recall@1": 0.36, |
| "eval_NanoMSMARCO_cosine_recall@5": 0.78, |
| "eval_mldr_loss": 98.74517822265625, |
| "eval_mldr_runtime": 49.7129, |
| "eval_mldr_samples_per_second": 10.058, |
| "eval_mldr_steps_per_second": 0.644, |
| "step": 0 |
| }, |
| { |
| "epoch": 0, |
| "eval_squad_loss": 4.950023174285889, |
| "eval_squad_runtime": 0.9354, |
| "eval_squad_samples_per_second": 51.314, |
| "eval_squad_steps_per_second": 3.207, |
| "step": 0 |
| }, |
| { |
| "epoch": 0, |
| "eval_narrative_qa_loss": 49.38987731933594, |
| "eval_narrative_qa_runtime": 1.0306, |
| "eval_narrative_qa_samples_per_second": 290.117, |
| "eval_narrative_qa_steps_per_second": 18.436, |
| "step": 0 |
| }, |
| { |
| "epoch": 0.01620745542949757, |
| "grad_norm": 1336.0, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 86.6132, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03241491085899514, |
| "grad_norm": 828.0, |
| "learning_rate": 1.8181818181818182e-05, |
| "loss": 83.7595, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04862236628849271, |
| "grad_norm": 388.0, |
| "learning_rate": 2.7272727272727273e-05, |
| "loss": 65.5413, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06482982171799027, |
| "grad_norm": 320.0, |
| "learning_rate": 3.6363636363636364e-05, |
| "loss": 40.6319, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08103727714748785, |
| "grad_norm": 296.0, |
| "learning_rate": 4.545454545454546e-05, |
| "loss": 26.2768, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09724473257698542, |
| "grad_norm": 290.0, |
| "learning_rate": 4.9997781212386585e-05, |
| "loss": 14.4879, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11345218800648298, |
| "grad_norm": 160.0, |
| "learning_rate": 4.9980033274458193e-05, |
| "loss": 9.4888, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.12965964343598055, |
| "grad_norm": 212.0, |
| "learning_rate": 4.994454999929178e-05, |
| "loss": 8.831, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1458670988654781, |
| "grad_norm": 109.0, |
| "learning_rate": 4.98913565793218e-05, |
| "loss": 7.6776, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1620745542949757, |
| "grad_norm": 71.5, |
| "learning_rate": 4.982049078084071e-05, |
| "loss": 6.4716, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1620745542949757, |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.32666666666666666, |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6333333333333333, |
| "eval_NanoBEIR_mean_cosine_map@100": 0.39934768753390587, |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.4495740740740741, |
| "eval_NanoBEIR_mean_cosine_ndcg@1": 0.32666666666666666, |
| "eval_NanoBEIR_mean_cosine_ndcg@5": 0.43241374431385005, |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.32666666666666666, |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.13466666666666668, |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.25666666666666665, |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.5466666666666667, |
| "eval_NanoFEVER_cosine_accuracy@1": 0.24, |
| "eval_NanoFEVER_cosine_accuracy@5": 0.66, |
| "eval_NanoFEVER_cosine_map@100": 0.3935747452453245, |
| "eval_NanoFEVER_cosine_mrr@10": 0.39322222222222225, |
| "eval_NanoFEVER_cosine_ndcg@1": 0.24, |
| "eval_NanoFEVER_cosine_ndcg@5": 0.4387408055008589, |
| "eval_NanoFEVER_cosine_precision@1": 0.24, |
| "eval_NanoFEVER_cosine_precision@5": 0.132, |
| "eval_NanoFEVER_cosine_recall@1": 0.24, |
| "eval_NanoFEVER_cosine_recall@5": 0.63, |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.42, |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.58, |
| "eval_NanoHotpotQA_cosine_map@100": 0.3274226457297244, |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.4911666666666667, |
| "eval_NanoHotpotQA_cosine_ndcg@1": 0.42, |
| "eval_NanoHotpotQA_cosine_ndcg@5": 0.35120107956309937, |
| "eval_NanoHotpotQA_cosine_precision@1": 0.42, |
| "eval_NanoHotpotQA_cosine_precision@5": 0.14, |
| "eval_NanoHotpotQA_cosine_recall@1": 0.21, |
| "eval_NanoHotpotQA_cosine_recall@5": 0.35, |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.32, |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.66, |
| "eval_NanoMSMARCO_cosine_map@100": 0.47704567162666855, |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.4643333333333334, |
| "eval_NanoMSMARCO_cosine_ndcg@1": 0.32, |
| "eval_NanoMSMARCO_cosine_ndcg@5": 0.5072993478775919, |
| "eval_NanoMSMARCO_cosine_precision@1": 0.32, |
| "eval_NanoMSMARCO_cosine_precision@5": 0.132, |
| "eval_NanoMSMARCO_cosine_recall@1": 0.32, |
| "eval_NanoMSMARCO_cosine_recall@5": 0.66, |
| "eval_mldr_loss": 4.950920581817627, |
| "eval_mldr_runtime": 14.3444, |
| "eval_mldr_samples_per_second": 34.857, |
| "eval_mldr_steps_per_second": 2.231, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1620745542949757, |
| "eval_squad_loss": 2.0146381855010986, |
| "eval_squad_runtime": 0.9385, |
| "eval_squad_samples_per_second": 51.146, |
| "eval_squad_steps_per_second": 3.197, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1620745542949757, |
| "eval_narrative_qa_loss": 4.486134052276611, |
| "eval_narrative_qa_runtime": 1.1506, |
| "eval_narrative_qa_samples_per_second": 259.856, |
| "eval_narrative_qa_steps_per_second": 16.513, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.17828200972447325, |
| "grad_norm": 107.5, |
| "learning_rate": 4.973200291718561e-05, |
| "loss": 6.1959, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.19448946515397084, |
| "grad_norm": 50.75, |
| "learning_rate": 4.962595581301673e-05, |
| "loss": 4.5491, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2106969205834684, |
| "grad_norm": 50.25, |
| "learning_rate": 4.95024247597132e-05, |
| "loss": 4.0871, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.22690437601296595, |
| "grad_norm": 53.25, |
| "learning_rate": 4.936149746191758e-05, |
| "loss": 3.0861, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.24311183144246354, |
| "grad_norm": 25.5, |
| "learning_rate": 4.920327397526731e-05, |
| "loss": 2.7792, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2593192868719611, |
| "grad_norm": 122.0, |
| "learning_rate": 4.9027866635357136e-05, |
| "loss": 2.2107, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2755267423014587, |
| "grad_norm": 410.0, |
| "learning_rate": 4.883539997798303e-05, |
| "loss": 1.6997, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2917341977309562, |
| "grad_norm": 66.0, |
| "learning_rate": 4.8626010650724204e-05, |
| "loss": 1.4911, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3079416531604538, |
| "grad_norm": 1072.0, |
| "learning_rate": 4.8399847315926e-05, |
| "loss": 1.6304, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3241491085899514, |
| "grad_norm": 16.5, |
| "learning_rate": 4.815707054515248e-05, |
| "loss": 1.5042, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3241491085899514, |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.24666666666666667, |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.4533333333333333, |
| "eval_NanoBEIR_mean_cosine_map@100": 0.2917657022707883, |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.3391984126984127, |
| "eval_NanoBEIR_mean_cosine_ndcg@1": 0.24666666666666667, |
| "eval_NanoBEIR_mean_cosine_ndcg@5": 0.3047832475255773, |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.24666666666666667, |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.09466666666666669, |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.18333333333333335, |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.37333333333333335, |
| "eval_NanoFEVER_cosine_accuracy@1": 0.08, |
| "eval_NanoFEVER_cosine_accuracy@5": 0.26, |
| "eval_NanoFEVER_cosine_map@100": 0.15831046306304816, |
| "eval_NanoFEVER_cosine_mrr@10": 0.1541904761904762, |
| "eval_NanoFEVER_cosine_ndcg@1": 0.08, |
| "eval_NanoFEVER_cosine_ndcg@5": 0.15530091408199076, |
| "eval_NanoFEVER_cosine_precision@1": 0.08, |
| "eval_NanoFEVER_cosine_precision@5": 0.052000000000000005, |
| "eval_NanoFEVER_cosine_recall@1": 0.07, |
| "eval_NanoFEVER_cosine_recall@5": 0.24, |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.36, |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.5, |
| "eval_NanoHotpotQA_cosine_map@100": 0.2657203181494037, |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.4257142857142857, |
| "eval_NanoHotpotQA_cosine_ndcg@1": 0.36, |
| "eval_NanoHotpotQA_cosine_ndcg@5": 0.2891308855457201, |
| "eval_NanoHotpotQA_cosine_precision@1": 0.36, |
| "eval_NanoHotpotQA_cosine_precision@5": 0.11200000000000003, |
| "eval_NanoHotpotQA_cosine_recall@1": 0.18, |
| "eval_NanoHotpotQA_cosine_recall@5": 0.28, |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.3, |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.6, |
| "eval_NanoMSMARCO_cosine_map@100": 0.45126632559991314, |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.4376904761904762, |
| "eval_NanoMSMARCO_cosine_ndcg@1": 0.3, |
| "eval_NanoMSMARCO_cosine_ndcg@5": 0.46991794294902106, |
| "eval_NanoMSMARCO_cosine_precision@1": 0.3, |
| "eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, |
| "eval_NanoMSMARCO_cosine_recall@1": 0.3, |
| "eval_NanoMSMARCO_cosine_recall@5": 0.6, |
| "eval_mldr_loss": 1.3893917798995972, |
| "eval_mldr_runtime": 14.4056, |
| "eval_mldr_samples_per_second": 34.709, |
| "eval_mldr_steps_per_second": 2.221, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3241491085899514, |
| "eval_squad_loss": 0.7646868824958801, |
| "eval_squad_runtime": 1.072, |
| "eval_squad_samples_per_second": 44.776, |
| "eval_squad_steps_per_second": 2.799, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3241491085899514, |
| "eval_narrative_qa_loss": 2.59745192527771, |
| "eval_narrative_qa_runtime": 1.1589, |
| "eval_narrative_qa_samples_per_second": 257.994, |
| "eval_narrative_qa_steps_per_second": 16.394, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.34035656401944897, |
| "grad_norm": 35.0, |
| "learning_rate": 4.7897852705183785e-05, |
| "loss": 1.6009, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3565640194489465, |
| "grad_norm": 1352.0, |
| "learning_rate": 4.7622377835639064e-05, |
| "loss": 1.5156, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3727714748784441, |
| "grad_norm": 704.0, |
| "learning_rate": 4.73308415183119e-05, |
| "loss": 1.5305, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3889789303079417, |
| "grad_norm": 824.0, |
| "learning_rate": 4.702345073831109e-05, |
| "loss": 1.6842, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4051863857374392, |
| "grad_norm": 21760.0, |
| "learning_rate": 4.6700423737105236e-05, |
| "loss": 1.2207, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4213938411669368, |
| "grad_norm": 3040.0, |
| "learning_rate": 4.63619898575755e-05, |
| "loss": 1.2634, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.4376012965964344, |
| "grad_norm": 46.5, |
| "learning_rate": 4.600838938118672e-05, |
| "loss": 1.3175, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.4538087520259319, |
| "grad_norm": 6112.0, |
| "learning_rate": 4.563987335739216e-05, |
| "loss": 1.3107, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4700162074554295, |
| "grad_norm": 2256.0, |
| "learning_rate": 4.525670342539332e-05, |
| "loss": 1.3163, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.4862236628849271, |
| "grad_norm": 860.0, |
| "learning_rate": 4.485915162838122e-05, |
| "loss": 1.3703, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4862236628849271, |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.18666666666666668, |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.37333333333333335, |
| "eval_NanoBEIR_mean_cosine_map@100": 0.24055436231400043, |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.26788624338624334, |
| "eval_NanoBEIR_mean_cosine_ndcg@1": 0.18666666666666668, |
| "eval_NanoBEIR_mean_cosine_ndcg@5": 0.24938490224868604, |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.18666666666666668, |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.07733333333333335, |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.15, |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.31666666666666665, |
| "eval_NanoFEVER_cosine_accuracy@1": 0.06, |
| "eval_NanoFEVER_cosine_accuracy@5": 0.18, |
| "eval_NanoFEVER_cosine_map@100": 0.11320690658806287, |
| "eval_NanoFEVER_cosine_mrr@10": 0.11538888888888887, |
| "eval_NanoFEVER_cosine_ndcg@1": 0.06, |
| "eval_NanoFEVER_cosine_ndcg@5": 0.11246425246579403, |
| "eval_NanoFEVER_cosine_precision@1": 0.06, |
| "eval_NanoFEVER_cosine_precision@5": 0.036000000000000004, |
| "eval_NanoFEVER_cosine_recall@1": 0.05, |
| "eval_NanoFEVER_cosine_recall@5": 0.17, |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.2, |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.36, |
| "eval_NanoHotpotQA_cosine_map@100": 0.17264044237203358, |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.26374603174603173, |
| "eval_NanoHotpotQA_cosine_ndcg@1": 0.2, |
| "eval_NanoHotpotQA_cosine_ndcg@5": 0.18589124040083965, |
| "eval_NanoHotpotQA_cosine_precision@1": 0.2, |
| "eval_NanoHotpotQA_cosine_precision@5": 0.08, |
| "eval_NanoHotpotQA_cosine_recall@1": 0.1, |
| "eval_NanoHotpotQA_cosine_recall@5": 0.2, |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.3, |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, |
| "eval_NanoMSMARCO_cosine_map@100": 0.4358157379819049, |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.42452380952380947, |
| "eval_NanoMSMARCO_cosine_ndcg@1": 0.3, |
| "eval_NanoMSMARCO_cosine_ndcg@5": 0.44979921387942445, |
| "eval_NanoMSMARCO_cosine_precision@1": 0.3, |
| "eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002, |
| "eval_NanoMSMARCO_cosine_recall@1": 0.3, |
| "eval_NanoMSMARCO_cosine_recall@5": 0.58, |
| "eval_mldr_loss": 1.218342900276184, |
| "eval_mldr_runtime": 14.4679, |
| "eval_mldr_samples_per_second": 34.559, |
| "eval_mldr_steps_per_second": 2.212, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4862236628849271, |
| "eval_squad_loss": 1.0243560075759888, |
| "eval_squad_runtime": 0.9647, |
| "eval_squad_samples_per_second": 49.755, |
| "eval_squad_steps_per_second": 3.11, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4862236628849271, |
| "eval_narrative_qa_loss": 2.635084629058838, |
| "eval_narrative_qa_runtime": 1.1616, |
| "eval_narrative_qa_samples_per_second": 257.409, |
| "eval_narrative_qa_steps_per_second": 16.357, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5024311183144247, |
| "grad_norm": 39.25, |
| "learning_rate": 4.444750022039099e-05, |
| "loss": 1.2901, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5186385737439222, |
| "grad_norm": 260.0, |
| "learning_rate": 4.4022041465907036e-05, |
| "loss": 1.245, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5348460291734197, |
| "grad_norm": 2192.0, |
| "learning_rate": 4.358307743236092e-05, |
| "loss": 1.1749, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5510534846029174, |
| "grad_norm": 1064.0, |
| "learning_rate": 4.3130919775669374e-05, |
| "loss": 1.0522, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5672609400324149, |
| "grad_norm": 1120.0, |
| "learning_rate": 4.2665889518964684e-05, |
| "loss": 1.1319, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5834683954619124, |
| "grad_norm": 1552.0, |
| "learning_rate": 4.2188316824674504e-05, |
| "loss": 1.1986, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5996758508914101, |
| "grad_norm": 133.0, |
| "learning_rate": 4.169854076011292e-05, |
| "loss": 1.3508, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.6158833063209076, |
| "grad_norm": 5408.0, |
| "learning_rate": 4.119690905674937e-05, |
| "loss": 1.3465, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6320907617504052, |
| "grad_norm": 1624.0, |
| "learning_rate": 4.068377786332593e-05, |
| "loss": 1.3062, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.6482982171799028, |
| "grad_norm": 1192.0, |
| "learning_rate": 4.0159511492998746e-05, |
| "loss": 0.9974, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6482982171799028, |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.20000000000000004, |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.39333333333333337, |
| "eval_NanoBEIR_mean_cosine_map@100": 0.25566257669608067, |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.2828835978835979, |
| "eval_NanoBEIR_mean_cosine_ndcg@1": 0.20000000000000004, |
| "eval_NanoBEIR_mean_cosine_ndcg@5": 0.2711361604281084, |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.20000000000000004, |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.08266666666666668, |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.16333333333333333, |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.3433333333333333, |
| "eval_NanoFEVER_cosine_accuracy@1": 0.08, |
| "eval_NanoFEVER_cosine_accuracy@5": 0.2, |
| "eval_NanoFEVER_cosine_map@100": 0.12628338770667222, |
| "eval_NanoFEVER_cosine_mrr@10": 0.1275, |
| "eval_NanoFEVER_cosine_ndcg@1": 0.08, |
| "eval_NanoFEVER_cosine_ndcg@5": 0.1285946755597121, |
| "eval_NanoFEVER_cosine_precision@1": 0.08, |
| "eval_NanoFEVER_cosine_precision@5": 0.04, |
| "eval_NanoFEVER_cosine_recall@1": 0.07, |
| "eval_NanoFEVER_cosine_recall@5": 0.18, |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.2, |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.32, |
| "eval_NanoHotpotQA_cosine_map@100": 0.17161850356661693, |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.26146031746031745, |
| "eval_NanoHotpotQA_cosine_ndcg@1": 0.2, |
| "eval_NanoHotpotQA_cosine_ndcg@5": 0.18168469434151963, |
| "eval_NanoHotpotQA_cosine_precision@1": 0.2, |
| "eval_NanoHotpotQA_cosine_precision@5": 0.07600000000000001, |
| "eval_NanoHotpotQA_cosine_recall@1": 0.1, |
| "eval_NanoHotpotQA_cosine_recall@5": 0.19, |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.32, |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.66, |
| "eval_NanoMSMARCO_cosine_map@100": 0.4690858388149529, |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.45969047619047615, |
| "eval_NanoMSMARCO_cosine_ndcg@1": 0.32, |
| "eval_NanoMSMARCO_cosine_ndcg@5": 0.5031291113830936, |
| "eval_NanoMSMARCO_cosine_precision@1": 0.32, |
| "eval_NanoMSMARCO_cosine_precision@5": 0.132, |
| "eval_NanoMSMARCO_cosine_recall@1": 0.32, |
| "eval_NanoMSMARCO_cosine_recall@5": 0.66, |
| "eval_mldr_loss": 1.7250500917434692, |
| "eval_mldr_runtime": 14.4371, |
| "eval_mldr_samples_per_second": 34.633, |
| "eval_mldr_steps_per_second": 2.217, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6482982171799028, |
| "eval_squad_loss": 0.8224272131919861, |
| "eval_squad_runtime": 0.9831, |
| "eval_squad_samples_per_second": 48.823, |
| "eval_squad_steps_per_second": 3.051, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6482982171799028, |
| "eval_narrative_qa_loss": 2.191936492919922, |
| "eval_narrative_qa_runtime": 1.1776, |
| "eval_narrative_qa_samples_per_second": 253.914, |
| "eval_narrative_qa_steps_per_second": 16.135, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6645056726094003, |
| "grad_norm": 796.0, |
| "learning_rate": 3.962448216468275e-05, |
| "loss": 3.2988, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6807131280388979, |
| "grad_norm": 9216.0, |
| "learning_rate": 3.9079069738783484e-05, |
| "loss": 1.3305, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6969205834683955, |
| "grad_norm": 227.0, |
| "learning_rate": 3.852366144750358e-05, |
| "loss": 1.2771, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.713128038897893, |
| "grad_norm": 1456.0, |
| "learning_rate": 3.7958651619915495e-05, |
| "loss": 1.0724, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.7293354943273906, |
| "grad_norm": 704.0, |
| "learning_rate": 3.738444140199549e-05, |
| "loss": 1.2473, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7455429497568882, |
| "grad_norm": 4608.0, |
| "learning_rate": 3.680143847181783e-05, |
| "loss": 1.4045, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.7617504051863857, |
| "grad_norm": 8384.0, |
| "learning_rate": 3.621005675011127e-05, |
| "loss": 1.6167, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7779578606158833, |
| "grad_norm": 10752.0, |
| "learning_rate": 3.5610716106383426e-05, |
| "loss": 1.427, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7941653160453809, |
| "grad_norm": 32512.0, |
| "learning_rate": 3.500384206082155e-05, |
| "loss": 1.256, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.8103727714748784, |
| "grad_norm": 2080.0, |
| "learning_rate": 3.438986548218155e-05, |
| "loss": 1.7336, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8103727714748784, |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.22666666666666668, |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.35333333333333333, |
| "eval_NanoBEIR_mean_cosine_map@100": 0.25285734426656203, |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.28679100529100526, |
| "eval_NanoBEIR_mean_cosine_ndcg@1": 0.22666666666666668, |
| "eval_NanoBEIR_mean_cosine_ndcg@5": 0.26096692405722594, |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.22666666666666668, |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.07600000000000001, |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.17666666666666667, |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.31, |
| "eval_NanoFEVER_cosine_accuracy@1": 0.08, |
| "eval_NanoFEVER_cosine_accuracy@5": 0.18, |
| "eval_NanoFEVER_cosine_map@100": 0.11514089868666741, |
| "eval_NanoFEVER_cosine_mrr@10": 0.11641269841269843, |
| "eval_NanoFEVER_cosine_ndcg@1": 0.08, |
| "eval_NanoFEVER_cosine_ndcg@5": 0.11458961164975079, |
| "eval_NanoFEVER_cosine_precision@1": 0.08, |
| "eval_NanoFEVER_cosine_precision@5": 0.036000000000000004, |
| "eval_NanoFEVER_cosine_recall@1": 0.07, |
| "eval_NanoFEVER_cosine_recall@5": 0.16, |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.28, |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.3, |
| "eval_NanoHotpotQA_cosine_map@100": 0.1921273663105645, |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.3077380952380952, |
| "eval_NanoHotpotQA_cosine_ndcg@1": 0.28, |
| "eval_NanoHotpotQA_cosine_ndcg@5": 0.202243938877232, |
| "eval_NanoHotpotQA_cosine_precision@1": 0.28, |
| "eval_NanoHotpotQA_cosine_precision@5": 0.07600000000000001, |
| "eval_NanoHotpotQA_cosine_recall@1": 0.14, |
| "eval_NanoHotpotQA_cosine_recall@5": 0.19, |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.32, |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, |
| "eval_NanoMSMARCO_cosine_map@100": 0.4513037678024542, |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.4362222222222223, |
| "eval_NanoMSMARCO_cosine_ndcg@1": 0.32, |
| "eval_NanoMSMARCO_cosine_ndcg@5": 0.4660672216446949, |
| "eval_NanoMSMARCO_cosine_precision@1": 0.32, |
| "eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002, |
| "eval_NanoMSMARCO_cosine_recall@1": 0.32, |
| "eval_NanoMSMARCO_cosine_recall@5": 0.58, |
| "eval_mldr_loss": 1.2118109464645386, |
| "eval_mldr_runtime": 14.4727, |
| "eval_mldr_samples_per_second": 34.548, |
| "eval_mldr_steps_per_second": 2.211, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8103727714748784, |
| "eval_squad_loss": 0.669793426990509, |
| "eval_squad_runtime": 0.9528, |
| "eval_squad_samples_per_second": 50.377, |
| "eval_squad_steps_per_second": 3.149, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8103727714748784, |
| "eval_narrative_qa_loss": 2.0540804862976074, |
| "eval_narrative_qa_runtime": 1.1722, |
| "eval_narrative_qa_samples_per_second": 255.077, |
| "eval_narrative_qa_steps_per_second": 16.209, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1234, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|