"""Cross-modal retrieval eval — no LLM generation, finishes in seconds. Runs twice: 1. Raw code (inflated — docstring is embedded inside func_code_string). 2. Docstrings stripped from candidate code (cleaner semantic signal). Usage: python scripts/retrieval_only_eval.py """ import sys from pathlib import Path import pandas as pd sys.path.append(str(Path(__file__).resolve().parents[1])) from src.config import load_config from src.eval.retrieval_eval import evaluate_cross_modal from src.rag.embedder import CodeIndex cfg = load_config() print("[load] reading test split ...") test = pd.read_parquet(Path(cfg.paths.processed_dir) / "test.parquet") pairs = ( test[["docstring", "code"]] .dropna() .sample(n=min(500, len(test)), random_state=42) .reset_index(drop=True) ) print(f" {len(pairs)} pairs") print("[load] loading embedder ...") idx = CodeIndex.load(cfg.paths.index_dir) print() for strip in (False, True): label = "stripped (leakage-free)" if strip else "raw code (⚠ lexical leakage)" r = evaluate_cross_modal(idx.embedder, pairs, k_values=(1, 5, 10), strip_code_docstrings=strip) print(f"\n=== {label} ===") print(f" N : {r['n_pairs']}") print(f" MRR : {r['mrr']:.4f}") for k in (1, 5, 10): print(f" R@{k:2d} : {r[f'recall@{k}']:.4f}")