mlconti's picture
Upload folder using huggingface_hub
d2f8fa7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.8103727714748784,
"eval_steps": 100,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.5733333333333334,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.88,
"eval_NanoBEIR_mean_cosine_map@100": 0.6499700198395969,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.7025343915343916,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.5733333333333334,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.6933983700329899,
"eval_NanoBEIR_mean_cosine_precision@1": 0.5733333333333334,
"eval_NanoBEIR_mean_cosine_precision@5": 0.21599999999999997,
"eval_NanoBEIR_mean_cosine_recall@1": 0.44222222222222224,
"eval_NanoBEIR_mean_cosine_recall@5": 0.8077777777777778,
"eval_NanoFEVER_cosine_accuracy@1": 0.66,
"eval_NanoFEVER_cosine_accuracy@5": 0.94,
"eval_NanoFEVER_cosine_map@100": 0.7456105053991163,
"eval_NanoFEVER_cosine_mrr@10": 0.7750000000000001,
"eval_NanoFEVER_cosine_ndcg@1": 0.66,
"eval_NanoFEVER_cosine_ndcg@5": 0.7909186074531693,
"eval_NanoFEVER_cosine_precision@1": 0.66,
"eval_NanoFEVER_cosine_precision@5": 0.19999999999999996,
"eval_NanoFEVER_cosine_recall@1": 0.6166666666666667,
"eval_NanoFEVER_cosine_recall@5": 0.9133333333333333,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.7,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.92,
"eval_NanoHotpotQA_cosine_map@100": 0.659131307951428,
"eval_NanoHotpotQA_cosine_mrr@10": 0.795,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.7,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.7005437199248264,
"eval_NanoHotpotQA_cosine_precision@1": 0.7,
"eval_NanoHotpotQA_cosine_precision@5": 0.292,
"eval_NanoHotpotQA_cosine_recall@1": 0.35,
"eval_NanoHotpotQA_cosine_recall@5": 0.73,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.36,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.78,
"eval_NanoMSMARCO_cosine_map@100": 0.5451682461682461,
"eval_NanoMSMARCO_cosine_mrr@10": 0.5376031746031745,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.36,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.5887327827209738,
"eval_NanoMSMARCO_cosine_precision@1": 0.36,
"eval_NanoMSMARCO_cosine_precision@5": 0.156,
"eval_NanoMSMARCO_cosine_recall@1": 0.36,
"eval_NanoMSMARCO_cosine_recall@5": 0.78,
"eval_mldr_loss": 98.74517822265625,
"eval_mldr_runtime": 49.7129,
"eval_mldr_samples_per_second": 10.058,
"eval_mldr_steps_per_second": 0.644,
"step": 0
},
{
"epoch": 0,
"eval_squad_loss": 4.950023174285889,
"eval_squad_runtime": 0.9354,
"eval_squad_samples_per_second": 51.314,
"eval_squad_steps_per_second": 3.207,
"step": 0
},
{
"epoch": 0,
"eval_narrative_qa_loss": 49.38987731933594,
"eval_narrative_qa_runtime": 1.0306,
"eval_narrative_qa_samples_per_second": 290.117,
"eval_narrative_qa_steps_per_second": 18.436,
"step": 0
},
{
"epoch": 0.01620745542949757,
"grad_norm": 1336.0,
"learning_rate": 9.090909090909091e-06,
"loss": 86.6132,
"step": 10
},
{
"epoch": 0.03241491085899514,
"grad_norm": 828.0,
"learning_rate": 1.8181818181818182e-05,
"loss": 83.7595,
"step": 20
},
{
"epoch": 0.04862236628849271,
"grad_norm": 388.0,
"learning_rate": 2.7272727272727273e-05,
"loss": 65.5413,
"step": 30
},
{
"epoch": 0.06482982171799027,
"grad_norm": 320.0,
"learning_rate": 3.6363636363636364e-05,
"loss": 40.6319,
"step": 40
},
{
"epoch": 0.08103727714748785,
"grad_norm": 296.0,
"learning_rate": 4.545454545454546e-05,
"loss": 26.2768,
"step": 50
},
{
"epoch": 0.09724473257698542,
"grad_norm": 290.0,
"learning_rate": 4.9997781212386585e-05,
"loss": 14.4879,
"step": 60
},
{
"epoch": 0.11345218800648298,
"grad_norm": 160.0,
"learning_rate": 4.9980033274458193e-05,
"loss": 9.4888,
"step": 70
},
{
"epoch": 0.12965964343598055,
"grad_norm": 212.0,
"learning_rate": 4.994454999929178e-05,
"loss": 8.831,
"step": 80
},
{
"epoch": 0.1458670988654781,
"grad_norm": 109.0,
"learning_rate": 4.98913565793218e-05,
"loss": 7.6776,
"step": 90
},
{
"epoch": 0.1620745542949757,
"grad_norm": 71.5,
"learning_rate": 4.982049078084071e-05,
"loss": 6.4716,
"step": 100
},
{
"epoch": 0.1620745542949757,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.32666666666666666,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.6333333333333333,
"eval_NanoBEIR_mean_cosine_map@100": 0.39934768753390587,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.4495740740740741,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.32666666666666666,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.43241374431385005,
"eval_NanoBEIR_mean_cosine_precision@1": 0.32666666666666666,
"eval_NanoBEIR_mean_cosine_precision@5": 0.13466666666666668,
"eval_NanoBEIR_mean_cosine_recall@1": 0.25666666666666665,
"eval_NanoBEIR_mean_cosine_recall@5": 0.5466666666666667,
"eval_NanoFEVER_cosine_accuracy@1": 0.24,
"eval_NanoFEVER_cosine_accuracy@5": 0.66,
"eval_NanoFEVER_cosine_map@100": 0.3935747452453245,
"eval_NanoFEVER_cosine_mrr@10": 0.39322222222222225,
"eval_NanoFEVER_cosine_ndcg@1": 0.24,
"eval_NanoFEVER_cosine_ndcg@5": 0.4387408055008589,
"eval_NanoFEVER_cosine_precision@1": 0.24,
"eval_NanoFEVER_cosine_precision@5": 0.132,
"eval_NanoFEVER_cosine_recall@1": 0.24,
"eval_NanoFEVER_cosine_recall@5": 0.63,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.42,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.58,
"eval_NanoHotpotQA_cosine_map@100": 0.3274226457297244,
"eval_NanoHotpotQA_cosine_mrr@10": 0.4911666666666667,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.42,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.35120107956309937,
"eval_NanoHotpotQA_cosine_precision@1": 0.42,
"eval_NanoHotpotQA_cosine_precision@5": 0.14,
"eval_NanoHotpotQA_cosine_recall@1": 0.21,
"eval_NanoHotpotQA_cosine_recall@5": 0.35,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.66,
"eval_NanoMSMARCO_cosine_map@100": 0.47704567162666855,
"eval_NanoMSMARCO_cosine_mrr@10": 0.4643333333333334,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.32,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.5072993478775919,
"eval_NanoMSMARCO_cosine_precision@1": 0.32,
"eval_NanoMSMARCO_cosine_precision@5": 0.132,
"eval_NanoMSMARCO_cosine_recall@1": 0.32,
"eval_NanoMSMARCO_cosine_recall@5": 0.66,
"eval_mldr_loss": 4.950920581817627,
"eval_mldr_runtime": 14.3444,
"eval_mldr_samples_per_second": 34.857,
"eval_mldr_steps_per_second": 2.231,
"step": 100
},
{
"epoch": 0.1620745542949757,
"eval_squad_loss": 2.0146381855010986,
"eval_squad_runtime": 0.9385,
"eval_squad_samples_per_second": 51.146,
"eval_squad_steps_per_second": 3.197,
"step": 100
},
{
"epoch": 0.1620745542949757,
"eval_narrative_qa_loss": 4.486134052276611,
"eval_narrative_qa_runtime": 1.1506,
"eval_narrative_qa_samples_per_second": 259.856,
"eval_narrative_qa_steps_per_second": 16.513,
"step": 100
},
{
"epoch": 0.17828200972447325,
"grad_norm": 107.5,
"learning_rate": 4.973200291718561e-05,
"loss": 6.1959,
"step": 110
},
{
"epoch": 0.19448946515397084,
"grad_norm": 50.75,
"learning_rate": 4.962595581301673e-05,
"loss": 4.5491,
"step": 120
},
{
"epoch": 0.2106969205834684,
"grad_norm": 50.25,
"learning_rate": 4.95024247597132e-05,
"loss": 4.0871,
"step": 130
},
{
"epoch": 0.22690437601296595,
"grad_norm": 53.25,
"learning_rate": 4.936149746191758e-05,
"loss": 3.0861,
"step": 140
},
{
"epoch": 0.24311183144246354,
"grad_norm": 25.5,
"learning_rate": 4.920327397526731e-05,
"loss": 2.7792,
"step": 150
},
{
"epoch": 0.2593192868719611,
"grad_norm": 122.0,
"learning_rate": 4.9027866635357136e-05,
"loss": 2.2107,
"step": 160
},
{
"epoch": 0.2755267423014587,
"grad_norm": 410.0,
"learning_rate": 4.883539997798303e-05,
"loss": 1.6997,
"step": 170
},
{
"epoch": 0.2917341977309562,
"grad_norm": 66.0,
"learning_rate": 4.8626010650724204e-05,
"loss": 1.4911,
"step": 180
},
{
"epoch": 0.3079416531604538,
"grad_norm": 1072.0,
"learning_rate": 4.8399847315926e-05,
"loss": 1.6304,
"step": 190
},
{
"epoch": 0.3241491085899514,
"grad_norm": 16.5,
"learning_rate": 4.815707054515248e-05,
"loss": 1.5042,
"step": 200
},
{
"epoch": 0.3241491085899514,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.24666666666666667,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.4533333333333333,
"eval_NanoBEIR_mean_cosine_map@100": 0.2917657022707883,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.3391984126984127,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.24666666666666667,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.3047832475255773,
"eval_NanoBEIR_mean_cosine_precision@1": 0.24666666666666667,
"eval_NanoBEIR_mean_cosine_precision@5": 0.09466666666666669,
"eval_NanoBEIR_mean_cosine_recall@1": 0.18333333333333335,
"eval_NanoBEIR_mean_cosine_recall@5": 0.37333333333333335,
"eval_NanoFEVER_cosine_accuracy@1": 0.08,
"eval_NanoFEVER_cosine_accuracy@5": 0.26,
"eval_NanoFEVER_cosine_map@100": 0.15831046306304816,
"eval_NanoFEVER_cosine_mrr@10": 0.1541904761904762,
"eval_NanoFEVER_cosine_ndcg@1": 0.08,
"eval_NanoFEVER_cosine_ndcg@5": 0.15530091408199076,
"eval_NanoFEVER_cosine_precision@1": 0.08,
"eval_NanoFEVER_cosine_precision@5": 0.052000000000000005,
"eval_NanoFEVER_cosine_recall@1": 0.07,
"eval_NanoFEVER_cosine_recall@5": 0.24,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.36,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.5,
"eval_NanoHotpotQA_cosine_map@100": 0.2657203181494037,
"eval_NanoHotpotQA_cosine_mrr@10": 0.4257142857142857,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.36,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.2891308855457201,
"eval_NanoHotpotQA_cosine_precision@1": 0.36,
"eval_NanoHotpotQA_cosine_precision@5": 0.11200000000000003,
"eval_NanoHotpotQA_cosine_recall@1": 0.18,
"eval_NanoHotpotQA_cosine_recall@5": 0.28,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.3,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.6,
"eval_NanoMSMARCO_cosine_map@100": 0.45126632559991314,
"eval_NanoMSMARCO_cosine_mrr@10": 0.4376904761904762,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.3,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.46991794294902106,
"eval_NanoMSMARCO_cosine_precision@1": 0.3,
"eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.3,
"eval_NanoMSMARCO_cosine_recall@5": 0.6,
"eval_mldr_loss": 1.3893917798995972,
"eval_mldr_runtime": 14.4056,
"eval_mldr_samples_per_second": 34.709,
"eval_mldr_steps_per_second": 2.221,
"step": 200
},
{
"epoch": 0.3241491085899514,
"eval_squad_loss": 0.7646868824958801,
"eval_squad_runtime": 1.072,
"eval_squad_samples_per_second": 44.776,
"eval_squad_steps_per_second": 2.799,
"step": 200
},
{
"epoch": 0.3241491085899514,
"eval_narrative_qa_loss": 2.59745192527771,
"eval_narrative_qa_runtime": 1.1589,
"eval_narrative_qa_samples_per_second": 257.994,
"eval_narrative_qa_steps_per_second": 16.394,
"step": 200
},
{
"epoch": 0.34035656401944897,
"grad_norm": 35.0,
"learning_rate": 4.7897852705183785e-05,
"loss": 1.6009,
"step": 210
},
{
"epoch": 0.3565640194489465,
"grad_norm": 1352.0,
"learning_rate": 4.7622377835639064e-05,
"loss": 1.5156,
"step": 220
},
{
"epoch": 0.3727714748784441,
"grad_norm": 704.0,
"learning_rate": 4.73308415183119e-05,
"loss": 1.5305,
"step": 230
},
{
"epoch": 0.3889789303079417,
"grad_norm": 824.0,
"learning_rate": 4.702345073831109e-05,
"loss": 1.6842,
"step": 240
},
{
"epoch": 0.4051863857374392,
"grad_norm": 21760.0,
"learning_rate": 4.6700423737105236e-05,
"loss": 1.2207,
"step": 250
},
{
"epoch": 0.4213938411669368,
"grad_norm": 3040.0,
"learning_rate": 4.63619898575755e-05,
"loss": 1.2634,
"step": 260
},
{
"epoch": 0.4376012965964344,
"grad_norm": 46.5,
"learning_rate": 4.600838938118672e-05,
"loss": 1.3175,
"step": 270
},
{
"epoch": 0.4538087520259319,
"grad_norm": 6112.0,
"learning_rate": 4.563987335739216e-05,
"loss": 1.3107,
"step": 280
},
{
"epoch": 0.4700162074554295,
"grad_norm": 2256.0,
"learning_rate": 4.525670342539332e-05,
"loss": 1.3163,
"step": 290
},
{
"epoch": 0.4862236628849271,
"grad_norm": 860.0,
"learning_rate": 4.485915162838122e-05,
"loss": 1.3703,
"step": 300
},
{
"epoch": 0.4862236628849271,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.18666666666666668,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.37333333333333335,
"eval_NanoBEIR_mean_cosine_map@100": 0.24055436231400043,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.26788624338624334,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.18666666666666668,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.24938490224868604,
"eval_NanoBEIR_mean_cosine_precision@1": 0.18666666666666668,
"eval_NanoBEIR_mean_cosine_precision@5": 0.07733333333333335,
"eval_NanoBEIR_mean_cosine_recall@1": 0.15,
"eval_NanoBEIR_mean_cosine_recall@5": 0.31666666666666665,
"eval_NanoFEVER_cosine_accuracy@1": 0.06,
"eval_NanoFEVER_cosine_accuracy@5": 0.18,
"eval_NanoFEVER_cosine_map@100": 0.11320690658806287,
"eval_NanoFEVER_cosine_mrr@10": 0.11538888888888887,
"eval_NanoFEVER_cosine_ndcg@1": 0.06,
"eval_NanoFEVER_cosine_ndcg@5": 0.11246425246579403,
"eval_NanoFEVER_cosine_precision@1": 0.06,
"eval_NanoFEVER_cosine_precision@5": 0.036000000000000004,
"eval_NanoFEVER_cosine_recall@1": 0.05,
"eval_NanoFEVER_cosine_recall@5": 0.17,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.2,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.36,
"eval_NanoHotpotQA_cosine_map@100": 0.17264044237203358,
"eval_NanoHotpotQA_cosine_mrr@10": 0.26374603174603173,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.2,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.18589124040083965,
"eval_NanoHotpotQA_cosine_precision@1": 0.2,
"eval_NanoHotpotQA_cosine_precision@5": 0.08,
"eval_NanoHotpotQA_cosine_recall@1": 0.1,
"eval_NanoHotpotQA_cosine_recall@5": 0.2,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.3,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58,
"eval_NanoMSMARCO_cosine_map@100": 0.4358157379819049,
"eval_NanoMSMARCO_cosine_mrr@10": 0.42452380952380947,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.3,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.44979921387942445,
"eval_NanoMSMARCO_cosine_precision@1": 0.3,
"eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.3,
"eval_NanoMSMARCO_cosine_recall@5": 0.58,
"eval_mldr_loss": 1.218342900276184,
"eval_mldr_runtime": 14.4679,
"eval_mldr_samples_per_second": 34.559,
"eval_mldr_steps_per_second": 2.212,
"step": 300
},
{
"epoch": 0.4862236628849271,
"eval_squad_loss": 1.0243560075759888,
"eval_squad_runtime": 0.9647,
"eval_squad_samples_per_second": 49.755,
"eval_squad_steps_per_second": 3.11,
"step": 300
},
{
"epoch": 0.4862236628849271,
"eval_narrative_qa_loss": 2.635084629058838,
"eval_narrative_qa_runtime": 1.1616,
"eval_narrative_qa_samples_per_second": 257.409,
"eval_narrative_qa_steps_per_second": 16.357,
"step": 300
},
{
"epoch": 0.5024311183144247,
"grad_norm": 39.25,
"learning_rate": 4.444750022039099e-05,
"loss": 1.2901,
"step": 310
},
{
"epoch": 0.5186385737439222,
"grad_norm": 260.0,
"learning_rate": 4.4022041465907036e-05,
"loss": 1.245,
"step": 320
},
{
"epoch": 0.5348460291734197,
"grad_norm": 2192.0,
"learning_rate": 4.358307743236092e-05,
"loss": 1.1749,
"step": 330
},
{
"epoch": 0.5510534846029174,
"grad_norm": 1064.0,
"learning_rate": 4.3130919775669374e-05,
"loss": 1.0522,
"step": 340
},
{
"epoch": 0.5672609400324149,
"grad_norm": 1120.0,
"learning_rate": 4.2665889518964684e-05,
"loss": 1.1319,
"step": 350
},
{
"epoch": 0.5834683954619124,
"grad_norm": 1552.0,
"learning_rate": 4.2188316824674504e-05,
"loss": 1.1986,
"step": 360
},
{
"epoch": 0.5996758508914101,
"grad_norm": 133.0,
"learning_rate": 4.169854076011292e-05,
"loss": 1.3508,
"step": 370
},
{
"epoch": 0.6158833063209076,
"grad_norm": 5408.0,
"learning_rate": 4.119690905674937e-05,
"loss": 1.3465,
"step": 380
},
{
"epoch": 0.6320907617504052,
"grad_norm": 1624.0,
"learning_rate": 4.068377786332593e-05,
"loss": 1.3062,
"step": 390
},
{
"epoch": 0.6482982171799028,
"grad_norm": 1192.0,
"learning_rate": 4.0159511492998746e-05,
"loss": 0.9974,
"step": 400
},
{
"epoch": 0.6482982171799028,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.20000000000000004,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.39333333333333337,
"eval_NanoBEIR_mean_cosine_map@100": 0.25566257669608067,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.2828835978835979,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.20000000000000004,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.2711361604281084,
"eval_NanoBEIR_mean_cosine_precision@1": 0.20000000000000004,
"eval_NanoBEIR_mean_cosine_precision@5": 0.08266666666666668,
"eval_NanoBEIR_mean_cosine_recall@1": 0.16333333333333333,
"eval_NanoBEIR_mean_cosine_recall@5": 0.3433333333333333,
"eval_NanoFEVER_cosine_accuracy@1": 0.08,
"eval_NanoFEVER_cosine_accuracy@5": 0.2,
"eval_NanoFEVER_cosine_map@100": 0.12628338770667222,
"eval_NanoFEVER_cosine_mrr@10": 0.1275,
"eval_NanoFEVER_cosine_ndcg@1": 0.08,
"eval_NanoFEVER_cosine_ndcg@5": 0.1285946755597121,
"eval_NanoFEVER_cosine_precision@1": 0.08,
"eval_NanoFEVER_cosine_precision@5": 0.04,
"eval_NanoFEVER_cosine_recall@1": 0.07,
"eval_NanoFEVER_cosine_recall@5": 0.18,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.2,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.32,
"eval_NanoHotpotQA_cosine_map@100": 0.17161850356661693,
"eval_NanoHotpotQA_cosine_mrr@10": 0.26146031746031745,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.2,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.18168469434151963,
"eval_NanoHotpotQA_cosine_precision@1": 0.2,
"eval_NanoHotpotQA_cosine_precision@5": 0.07600000000000001,
"eval_NanoHotpotQA_cosine_recall@1": 0.1,
"eval_NanoHotpotQA_cosine_recall@5": 0.19,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.66,
"eval_NanoMSMARCO_cosine_map@100": 0.4690858388149529,
"eval_NanoMSMARCO_cosine_mrr@10": 0.45969047619047615,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.32,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.5031291113830936,
"eval_NanoMSMARCO_cosine_precision@1": 0.32,
"eval_NanoMSMARCO_cosine_precision@5": 0.132,
"eval_NanoMSMARCO_cosine_recall@1": 0.32,
"eval_NanoMSMARCO_cosine_recall@5": 0.66,
"eval_mldr_loss": 1.7250500917434692,
"eval_mldr_runtime": 14.4371,
"eval_mldr_samples_per_second": 34.633,
"eval_mldr_steps_per_second": 2.217,
"step": 400
},
{
"epoch": 0.6482982171799028,
"eval_squad_loss": 0.8224272131919861,
"eval_squad_runtime": 0.9831,
"eval_squad_samples_per_second": 48.823,
"eval_squad_steps_per_second": 3.051,
"step": 400
},
{
"epoch": 0.6482982171799028,
"eval_narrative_qa_loss": 2.191936492919922,
"eval_narrative_qa_runtime": 1.1776,
"eval_narrative_qa_samples_per_second": 253.914,
"eval_narrative_qa_steps_per_second": 16.135,
"step": 400
},
{
"epoch": 0.6645056726094003,
"grad_norm": 796.0,
"learning_rate": 3.962448216468275e-05,
"loss": 3.2988,
"step": 410
},
{
"epoch": 0.6807131280388979,
"grad_norm": 9216.0,
"learning_rate": 3.9079069738783484e-05,
"loss": 1.3305,
"step": 420
},
{
"epoch": 0.6969205834683955,
"grad_norm": 227.0,
"learning_rate": 3.852366144750358e-05,
"loss": 1.2771,
"step": 430
},
{
"epoch": 0.713128038897893,
"grad_norm": 1456.0,
"learning_rate": 3.7958651619915495e-05,
"loss": 1.0724,
"step": 440
},
{
"epoch": 0.7293354943273906,
"grad_norm": 704.0,
"learning_rate": 3.738444140199549e-05,
"loss": 1.2473,
"step": 450
},
{
"epoch": 0.7455429497568882,
"grad_norm": 4608.0,
"learning_rate": 3.680143847181783e-05,
"loss": 1.4045,
"step": 460
},
{
"epoch": 0.7617504051863857,
"grad_norm": 8384.0,
"learning_rate": 3.621005675011127e-05,
"loss": 1.6167,
"step": 470
},
{
"epoch": 0.7779578606158833,
"grad_norm": 10752.0,
"learning_rate": 3.5610716106383426e-05,
"loss": 1.427,
"step": 480
},
{
"epoch": 0.7941653160453809,
"grad_norm": 32512.0,
"learning_rate": 3.500384206082155e-05,
"loss": 1.256,
"step": 490
},
{
"epoch": 0.8103727714748784,
"grad_norm": 2080.0,
"learning_rate": 3.438986548218155e-05,
"loss": 1.7336,
"step": 500
},
{
"epoch": 0.8103727714748784,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.22666666666666668,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.35333333333333333,
"eval_NanoBEIR_mean_cosine_map@100": 0.25285734426656203,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.28679100529100526,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.22666666666666668,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.26096692405722594,
"eval_NanoBEIR_mean_cosine_precision@1": 0.22666666666666668,
"eval_NanoBEIR_mean_cosine_precision@5": 0.07600000000000001,
"eval_NanoBEIR_mean_cosine_recall@1": 0.17666666666666667,
"eval_NanoBEIR_mean_cosine_recall@5": 0.31,
"eval_NanoFEVER_cosine_accuracy@1": 0.08,
"eval_NanoFEVER_cosine_accuracy@5": 0.18,
"eval_NanoFEVER_cosine_map@100": 0.11514089868666741,
"eval_NanoFEVER_cosine_mrr@10": 0.11641269841269843,
"eval_NanoFEVER_cosine_ndcg@1": 0.08,
"eval_NanoFEVER_cosine_ndcg@5": 0.11458961164975079,
"eval_NanoFEVER_cosine_precision@1": 0.08,
"eval_NanoFEVER_cosine_precision@5": 0.036000000000000004,
"eval_NanoFEVER_cosine_recall@1": 0.07,
"eval_NanoFEVER_cosine_recall@5": 0.16,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.28,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.3,
"eval_NanoHotpotQA_cosine_map@100": 0.1921273663105645,
"eval_NanoHotpotQA_cosine_mrr@10": 0.3077380952380952,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.28,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.202243938877232,
"eval_NanoHotpotQA_cosine_precision@1": 0.28,
"eval_NanoHotpotQA_cosine_precision@5": 0.07600000000000001,
"eval_NanoHotpotQA_cosine_recall@1": 0.14,
"eval_NanoHotpotQA_cosine_recall@5": 0.19,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58,
"eval_NanoMSMARCO_cosine_map@100": 0.4513037678024542,
"eval_NanoMSMARCO_cosine_mrr@10": 0.4362222222222223,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.32,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.4660672216446949,
"eval_NanoMSMARCO_cosine_precision@1": 0.32,
"eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.32,
"eval_NanoMSMARCO_cosine_recall@5": 0.58,
"eval_mldr_loss": 1.2118109464645386,
"eval_mldr_runtime": 14.4727,
"eval_mldr_samples_per_second": 34.548,
"eval_mldr_steps_per_second": 2.211,
"step": 500
},
{
"epoch": 0.8103727714748784,
"eval_squad_loss": 0.669793426990509,
"eval_squad_runtime": 0.9528,
"eval_squad_samples_per_second": 50.377,
"eval_squad_steps_per_second": 3.149,
"step": 500
},
{
"epoch": 0.8103727714748784,
"eval_narrative_qa_loss": 2.0540804862976074,
"eval_narrative_qa_runtime": 1.1722,
"eval_narrative_qa_samples_per_second": 255.077,
"eval_narrative_qa_steps_per_second": 16.209,
"step": 500
}
],
"logging_steps": 10,
"max_steps": 1234,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}