Spaces:
Sleeping
Sleeping
| """Cross-modal retrieval eval — no LLM generation, finishes in seconds. | |
| Runs twice: | |
| 1. Raw code (inflated — docstring is embedded inside func_code_string). | |
| 2. Docstrings stripped from candidate code (cleaner semantic signal). | |
| Usage: python scripts/retrieval_only_eval.py | |
| """ | |
| import sys | |
| from pathlib import Path | |
| import pandas as pd | |
| sys.path.append(str(Path(__file__).resolve().parents[1])) | |
| from src.config import load_config | |
| from src.eval.retrieval_eval import evaluate_cross_modal | |
| from src.rag.embedder import CodeIndex | |
| cfg = load_config() | |
| print("[load] reading test split ...") | |
| test = pd.read_parquet(Path(cfg.paths.processed_dir) / "test.parquet") | |
| pairs = ( | |
| test[["docstring", "code"]] | |
| .dropna() | |
| .sample(n=min(500, len(test)), random_state=42) | |
| .reset_index(drop=True) | |
| ) | |
| print(f" {len(pairs)} pairs") | |
| print("[load] loading embedder ...") | |
| idx = CodeIndex.load(cfg.paths.index_dir) | |
| print() | |
| for strip in (False, True): | |
| label = "stripped (leakage-free)" if strip else "raw code (⚠ lexical leakage)" | |
| r = evaluate_cross_modal(idx.embedder, pairs, k_values=(1, 5, 10), | |
| strip_code_docstrings=strip) | |
| print(f"\n=== {label} ===") | |
| print(f" N : {r['n_pairs']}") | |
| print(f" MRR : {r['mrr']:.4f}") | |
| for k in (1, 5, 10): | |
| print(f" R@{k:2d} : {r[f'recall@{k}']:.4f}") | |