HackerSignal / benchmark_v2 /all_results.json
DatasetSubmission's picture
Upload benchmark_v2/all_results.json with huggingface_hub
ae1b6ee verified
{
"task1_cve_linkage": {
"BM25": {
"R@1": 0.445,
"R@5": 0.64,
"R@10": 0.707,
"MRR": 0.528,
"verified": true
},
"all-MiniLM-L6-v2": {
"R@1": 0.431,
"R@5": 0.656,
"R@10": 0.727,
"MRR": 0.53,
"verified": true
},
"all-mpnet-base-v2": {
"R@1": 0.46,
"R@5": 0.64,
"R@10": 0.718,
"MRR": 0.54,
"verified": true
},
"Hybrid BM25+mpnet": {
"R@1": 0.486,
"R@5": 0.675,
"R@10": 0.736,
"MRR": 0.567,
"verified": true
}
},
"task2_signal_detection": {
"Majority": {
"F1": 0.0,
"Precision": 0.0,
"Recall": 0.0,
"verified": true
},
"TF-IDF + LR": {
"F1": 0.928,
"Precision": 1.0,
"Recall": 0.865,
"verified": true
},
"SecBERT (FT)": {
"F1": 0.943,
"Precision": 1.0,
"Recall": 0.892,
"verified": true
}
},
"task3_temporal_generalization": {
"BM25": {
"R@1": 0.443,
"R@5": 0.654,
"R@10": 0.715,
"MRR": 0.533,
"verified": true
},
"all-MiniLM-L6-v2": {
"R@1": 0.453,
"R@5": 0.669,
"R@10": 0.728,
"MRR": 0.546,
"verified": true
},
"all-mpnet-base-v2": {
"R@1": 0.461,
"R@5": 0.657,
"R@10": 0.725,
"MRR": 0.544,
"verified": true
},
"Hybrid BM25+mpnet": {
"R@1": 0.491,
"R@5": 0.69,
"R@10": 0.743,
"MRR": 0.577,
"verified": true
}
},
"meta": {
"corpus_size": 340536,
"task1_test_size": 550,
"task2_test_size": 407,
"task3_test_size": 393,
"hardware": "macOS arm64 (Apple MPS)",
"python_version": "3.12",
"pytorch_version": "2.4"
}
}