File size: 1,361 Bytes
b89e6d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
"""Cross-modal retrieval eval — no LLM generation, finishes in seconds.

Runs twice:
  1. Raw code (inflated — docstring is embedded inside func_code_string).
  2. Docstrings stripped from candidate code (cleaner semantic signal).

Usage:  python scripts/retrieval_only_eval.py
"""
import sys
from pathlib import Path

import pandas as pd

sys.path.append(str(Path(__file__).resolve().parents[1]))
from src.config import load_config
from src.eval.retrieval_eval import evaluate_cross_modal
from src.rag.embedder import CodeIndex

cfg = load_config()

print("[load] reading test split ...")
test = pd.read_parquet(Path(cfg.paths.processed_dir) / "test.parquet")
pairs = (
    test[["docstring", "code"]]
    .dropna()
    .sample(n=min(500, len(test)), random_state=42)
    .reset_index(drop=True)
)
print(f"       {len(pairs)} pairs")

print("[load] loading embedder ...")
idx = CodeIndex.load(cfg.paths.index_dir)

print()
for strip in (False, True):
    label = "stripped (leakage-free)" if strip else "raw code  (⚠ lexical leakage)"
    r = evaluate_cross_modal(idx.embedder, pairs, k_values=(1, 5, 10),
                             strip_code_docstrings=strip)
    print(f"\n=== {label} ===")
    print(f"  N     : {r['n_pairs']}")
    print(f"  MRR   : {r['mrr']:.4f}")
    for k in (1, 5, 10):
        print(f"  R@{k:2d}  : {r[f'recall@{k}']:.4f}")