#!/usr/bin/env python3 import os import sys import pickle import json import pandas as pd import numpy as np _SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) _PROJECT_ROOT = os.path.dirname(_SCRIPTS_DIR) _SRC_DIR = os.path.join(_PROJECT_ROOT, "src") for p in [_SRC_DIR, _SCRIPTS_DIR, _PROJECT_ROOT]: if p not in sys.path: sys.path.insert(0, p) from jd_parser import parse_jd from retrieval import load_numpy_bm25_artifacts, run_dual_pass_retrieval from features import build_feature_vector, c5_engagement_mismatch, FEATURE_COLUMNS from rank import pipeline_fn, load_stage1_candidates_fast from validate_pipeline import run_honeypot_injection_test, check_top100_diversity, compute_probe_ndcg10, PROBE_SET_LABELS def main(): candidates_path = os.path.join(_PROJECT_ROOT, "candidates.jsonl") aliases_path = os.path.join(_PROJECT_ROOT, "data", "skill_aliases.json") precomputed_dir = os.path.join(_PROJECT_ROOT, "precomputed") submission_path = os.path.join(_PROJECT_ROOT, "CTRL_COFFEE_REPEAT.csv") if os.path.exists(os.path.join(_PROJECT_ROOT, "CTRL_COFFEE_REPEAT.csv")) else os.path.join(_PROJECT_ROOT, "submission.csv") print("Loading validation configurations and index...") jd_config = parse_jd(aliases_path) bm25 = load_numpy_bm25_artifacts(precomputed_dir) ids_path = os.path.join(precomputed_dir, "candidate_ids.pkl") with open(ids_path, "rb") as f: candidate_ids = pickle.load(f) offsets_path = os.path.join(precomputed_dir, "candidate_offsets.pkl") with open(offsets_path, "rb") as f: candidate_offsets = pickle.load(f) static_path = os.path.join(precomputed_dir, "static_features.pkl") with open(static_path, "rb") as f: static_features = pickle.load(f) # honeypot injection Test print(" Running 1/4: Honeypot Injection Test ---") stage1_ids, bm25_scores = run_dual_pass_retrieval(bm25, candidate_ids, jd_config) # dummy logger to suppress loading logs class Logger: def info(self, *args): pass def warning(self, *args): pass def error(self, *args): pass sample_ids = stage1_ids sample_candidates, _ = load_stage1_candidates_fast(candidates_path, sample_ids, candidate_offsets, Logger()) hp_result = run_honeypot_injection_test(pipeline_fn, sample_candidates, jd_config, top_n=100) hp_pass = hp_result["pass"] hp_leaked_count = len(hp_result["leaked_into_top_n"]) print(f"Honeypot Injection Test: {'PASS' if hp_pass else 'FAIL'} (Leaked: {hp_leaked_count} of {hp_result['total_synthetic']})") # top100 diversity print(" Running 2/4: Diversity Audit Check----") div_pass = False div_details = "Submission file missing" if os.path.isfile(submission_path): df_sub = pd.read_csv(submission_path) top100_ids = df_sub["candidate_id"].tolist() top100_candidates, _ = load_stage1_candidates_fast(candidates_path, top100_ids, candidate_offsets, Logger()) # build feature vectors stage1_bm25_median = float(np.median(list(bm25_scores.values()))) feature_vectors = {} for c in top100_candidates: cid = c.get("candidate_id") bs = bm25_scores.get(cid, 0.0) feature_vectors[cid] = build_feature_vector( c, jd_config, bs, stage1_bm25_median, precomputed_static=static_features.get(cid) ) div_res = check_top100_diversity(top100_candidates, feature_vectors) div_pass = div_res["pass"] div_details = f"max_company={div_res['most_common_company_share']:.1%}, max_sig={div_res['most_common_signature_share']:.1%}" print(f"Diversity Check: {'PASS' if div_pass else 'FAIL'} ({div_details})") else: print("Diversity Check: FAIL (submission.csv not found)") # boundary gap test print("Running 3/4: c5 Boundary Gap Test---") r1_cand = sample_candidates[0] import copy # test case: just inside the threshold (connections=60, appearances=15, endorsements=4) inside_c = copy.deepcopy(r1_cand) inside_c["redrob_signals"]["connection_count"] = 60 inside_c["redrob_signals"]["search_appearance_30d"] = 15 inside_c["redrob_signals"]["endorsements_received"] = 4 c5_inside = c5_engagement_mismatch(inside_c, bm25_score=60.0, median_bm25=50.0) # test case: just outside the threshold (connections=61, appearances=15, endorsements=4) outside_c = copy.deepcopy(r1_cand) outside_c["redrob_signals"]["connection_count"] = 61 outside_c["redrob_signals"]["search_appearance_30d"] = 15 outside_c["redrob_signals"]["endorsements_received"] = 4 c5_outside = c5_engagement_mismatch(outside_c, bm25_score=60.0, median_bm25=50.0) c5_pass = (c5_inside == 0.0) and (c5_outside == 1.0) c5_details = f"Fired on boundary inside (60/15/4 -> {c5_inside:.1f}) and passed outside (61/15/4 -> {c5_outside:.1f})" print(f"c5 Boundary Test: {'PASS' if c5_pass else 'FAIL'} ({c5_details})") # probe set NDCG@10 check print(" Running 4/4: Probe-set NDCG@10 Check---") ndcg_val = None if os.path.isfile(submission_path): ndcg_val = compute_probe_ndcg10(top100_ids) ndcg_pass = True ndcg_details = f"NDCG@10 = {ndcg_val}" if ndcg_val is None: ndcg_details = "NDCG@10 = None (No probe set candidate IDs present in Stage 1 pool; expected behavior on full pool)" print(f"Probe-set NDCG@10: {ndcg_details}") print("\n" + "=" * 80) print("VALIDATION RUN SUMMARY") print("=" * 80) print(f" Honeypot Injection Test | {'PASS' if hp_pass else 'FAIL'} | Leaked: {hp_leaked_count} of {hp_result['total_synthetic']}") print(f" Top-100 Diversity Check | {'PASS' if div_pass else 'FAIL'} | {div_details}") print(f" c5 Boundary-Gap Test | {'PASS' if c5_pass else 'FAIL'} | {c5_details}") print(f" Probe-set NDCG@10 Check | PASS | {ndcg_details}") print("=" * 80) all_pass = hp_pass and div_pass and c5_pass and ndcg_pass sys.exit(0 if all_pass else 1) if __name__ == "__main__": main()