code-gen-assistant / scripts /retrieval_only_eval.py
Rushabh147's picture
Initial deploy to HF Spaces (clean history, LFS for all binaries)
b89e6d6
Raw
History Blame Contribute Delete
1.36 kB
"""Cross-modal retrieval eval — no LLM generation, finishes in seconds.
Runs twice:
1. Raw code (inflated — docstring is embedded inside func_code_string).
2. Docstrings stripped from candidate code (cleaner semantic signal).
Usage: python scripts/retrieval_only_eval.py
"""
import sys
from pathlib import Path
import pandas as pd
sys.path.append(str(Path(__file__).resolve().parents[1]))
from src.config import load_config
from src.eval.retrieval_eval import evaluate_cross_modal
from src.rag.embedder import CodeIndex
cfg = load_config()
print("[load] reading test split ...")
test = pd.read_parquet(Path(cfg.paths.processed_dir) / "test.parquet")
pairs = (
test[["docstring", "code"]]
.dropna()
.sample(n=min(500, len(test)), random_state=42)
.reset_index(drop=True)
)
print(f" {len(pairs)} pairs")
print("[load] loading embedder ...")
idx = CodeIndex.load(cfg.paths.index_dir)
print()
for strip in (False, True):
label = "stripped (leakage-free)" if strip else "raw code (⚠ lexical leakage)"
r = evaluate_cross_modal(idx.embedder, pairs, k_values=(1, 5, 10),
strip_code_docstrings=strip)
print(f"\n=== {label} ===")
print(f" N : {r['n_pairs']}")
print(f" MRR : {r['mrr']:.4f}")
for k in (1, 5, 10):
print(f" R@{k:2d} : {r[f'recall@{k}']:.4f}")