Spaces:
Sleeping
Sleeping
| import sys | |
| import os | |
| import time | |
| import json | |
| import random | |
| import numpy as np | |
| # Set encoding for Windows terminals | |
| if sys.platform == "win32": | |
| import io | |
| sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') | |
| # Add backend to path | |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../..'))) | |
| from backend.src.embeddings.local_embedder import generate_embedding | |
| def cosine_similarity(v1, v2): | |
| return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)) | |
| def inject_noise(text, is_skill=False): | |
| """Simulates real-world messy resumes with abbreviations, typos, and lowercasing.""" | |
| if random.random() < 0.3: # 30% chance to leave perfectly clean | |
| return text | |
| abbreviations = { | |
| "Python": "Py", "PostgreSQL": "Postgres", "JavaScript": "JS", | |
| "React": "ReactJS", "Machine Learning": "ML", "Amazon Web Services": "AWS", | |
| "Kubernetes": "K8s", "TypeScript": "TS", "User Experience": "UX" | |
| } | |
| if is_skill and text in abbreviations and random.random() > 0.5: | |
| return abbreviations[text] | |
| # Randomly lowercase everything (common in lazy resumes) | |
| if random.random() > 0.7: | |
| text = text.lower() | |
| return text | |
| def generate_adversarial_dataset(): | |
| """Generates 200 candidates with intentional distractors and noise.""" | |
| print("Building N=200 Adversarial Candidate Pool...") | |
| domains = [ | |
| ("Frontend_React", ["React", "JavaScript", "Tailwind", "CSS", "TypeScript"]), | |
| ("Frontend_Angular", ["Angular", "JavaScript", "SCSS", "HTML", "TypeScript"]), | |
| ("Backend_Python", ["Python", "FastAPI", "PostgreSQL", "Docker", "Linux"]), | |
| ("Backend_Java", ["Java", "Spring Boot", "MySQL", "Kafka", "Kubernetes"]), | |
| ("Data_Science", ["Python", "Pandas", "PyTorch", "SQL", "Machine Learning"]), | |
| ("Data_Engineer", ["Spark", "Airflow", "Python", "SQL", "AWS"]), | |
| ("DevOps", ["Kubernetes", "Docker", "Terraform", "CI/CD", "AWS"]), | |
| ("Mobile_iOS", ["Swift", "Objective-C", "iOS", "XCode", "CoreData"]), | |
| ("Mobile_Android", ["Kotlin", "Java", "Android Studio", "Jetpack", "Firebase"]), | |
| ("Cybersecurity", ["Network Security", "Penetration Testing", "Firewalls", "Linux", "Python"]) | |
| ] | |
| levels = ["Junior", "Mid-Level", "Senior", "Lead"] | |
| candidates = [] | |
| golden_dataset = [] | |
| cand_counter = 1 | |
| # Generate 40 Queries (10 domains x 4 levels) | |
| for domain_name, base_skills in domains: | |
| for level in levels: | |
| # 1. The Target Candidate (Golden) | |
| target_id = f"cand_{cand_counter}_TARGET_{level}_{domain_name}" | |
| target_skills = [inject_noise(s, True) for s in base_skills] | |
| candidates.append({ | |
| "id": target_id, | |
| "headline": f"{level} {domain_name.replace('_', ' ')} Engineer", | |
| "summary": inject_noise(f"Experienced {level} professional in {domain_name}. Passionate about building scalable architectures."), | |
| "skills": target_skills, | |
| "experience": [inject_noise(f"Built systems using {target_skills[0]} and {target_skills[1]}.")] | |
| }) | |
| cand_counter += 1 | |
| # The Query (Clean, formal HR language) | |
| query = f"Hiring a {level} professional in {domain_name.replace('_', ' ')}. Must have strong experience with {base_skills[0]}, {base_skills[1]}, and {base_skills[2]}." | |
| golden_dataset.append({"query": query, "relevant_id": target_id}) | |
| # 2. Seniority Distractor (Wrong level, perfect skills) | |
| distractor_level = "Senior" if level == "Junior" else "Junior" | |
| candidates.append({ | |
| "id": f"cand_{cand_counter}_DISTRACTOR_LEVEL_{domain_name}", | |
| "headline": f"{distractor_level} {domain_name.replace('_', ' ')} Engineer", | |
| "summary": f"A {distractor_level} developer specializing in {domain_name}.", | |
| "skills": base_skills, # Same exact skills to confuse the model | |
| "experience": [f"Worked extensively with {base_skills[0]}."] | |
| }) | |
| cand_counter += 1 | |
| # 3. Skill Distractor (Right level, missing core skill, has similar skill) | |
| altered_skills = base_skills.copy() | |
| altered_skills[0] = "C++" # Replace core skill with something irrelevant | |
| candidates.append({ | |
| "id": f"cand_{cand_counter}_DISTRACTOR_SKILL_{domain_name}", | |
| "headline": f"{level} Software Engineer", | |
| "summary": f"Focuses on {altered_skills[0]} and backend architecture.", | |
| "skills": altered_skills, | |
| "experience": [f"Maintained legacy {altered_skills[0]} codebases."] | |
| }) | |
| cand_counter += 1 | |
| # 4 & 5. Random Noise Candidates (Fill out the 200) | |
| for _ in range(2): | |
| rand_domain = random.choice(domains) | |
| candidates.append({ | |
| "id": f"cand_{cand_counter}_RANDOM", | |
| "headline": f"{random.choice(levels)} {rand_domain[0]} Dev", | |
| "summary": "Looking for new opportunities. Hobbies: hiking, dog walking, photography.", | |
| "skills": [inject_noise(s, True) for s in rand_domain[1]], | |
| "experience": ["General software development tasks."] | |
| }) | |
| cand_counter += 1 | |
| return candidates, golden_dataset | |
| def evaluate_adversarial(): | |
| print("π Starting Adversarial Robustness Evaluation...") | |
| candidates, golden_dataset = generate_adversarial_dataset() | |
| print(f"π Dataset: {len(golden_dataset)} Queries | {len(candidates)} Candidates") | |
| print("β οΈ Warning: Embedding 200 profiles on CPU will take time. Please wait...\n") | |
| # 1. Embed Candidates (Flattening) | |
| candidate_embeddings = [] | |
| start_time = time.time() | |
| for i, c in enumerate(candidates): | |
| rich_text = f"Headline: {c['headline']}. Summary: {c['summary']} Skills: {', '.join(c['skills'])}. Experience: {' '.join(c['experience'])}" | |
| candidate_embeddings.append({ | |
| "id": c["id"], | |
| "vec": generate_embedding(rich_text) | |
| }) | |
| if (i+1) % 20 == 0: | |
| print(f" -> Embedded {i+1}/200 candidates...") | |
| print(f"β Embedding complete in {time.time() - start_time:.2f} seconds.\n") | |
| # 2. Evaluate Queries | |
| mrr_total = 0 | |
| hits_at_1 = 0 | |
| hits_at_3 = 0 | |
| hits_at_5 = 0 | |
| for item in golden_dataset: | |
| query_vec = generate_embedding(item["query"]) | |
| target_id = item["relevant_id"] | |
| scores = [(c_emb["id"], cosine_similarity(query_vec, c_emb["vec"])) for c_emb in candidate_embeddings] | |
| scores.sort(key=lambda x: x[1], reverse=True) | |
| rank = -1 | |
| for idx, (cid, sim) in enumerate(scores): | |
| if cid == target_id: | |
| rank = idx + 1 | |
| break | |
| if rank != -1: | |
| mrr_total += (1.0 / rank) | |
| if rank == 1: hits_at_1 += 1 | |
| if rank <= 3: hits_at_3 += 1 | |
| if rank <= 5: hits_at_5 += 1 | |
| # 3. Final Aggregation | |
| num_queries = len(golden_dataset) | |
| final_mrr = mrr_total / num_queries | |
| recall_1 = hits_at_1 / num_queries | |
| recall_3 = hits_at_3 / num_queries | |
| recall_5 = hits_at_5 / num_queries | |
| print("="*45) | |
| print("π‘οΈ ADVERSARIAL RETRIEVAL METRICS (N=200)") | |
| print("="*45) | |
| print(f"MRR (Mean Reciprocal Rank): {final_mrr:.4f}") | |
| print("-" * 45) | |
| print(f"Recall@1 (R@1): {recall_1*100:.1f}%") | |
| print(f"Recall@3 (R@3): {recall_3*100:.1f}%") | |
| print(f"Recall@5 (R@5): {recall_5*100:.1f}%") | |
| print("="*45) | |
| # Save to JSON for the guide/paper | |
| with open("quality_metrics_adversarial.json", "w") as f: | |
| json.dump({ | |
| "dataset": "N=200 Adversarial (Noise + Distractors)", | |
| "mrr": final_mrr, | |
| "recall_1": recall_1, | |
| "recall_3": recall_3 | |
| }, f, indent=4) | |
| print("π Results securely saved to 'quality_metrics_adversarial.json'") | |
| if __name__ == "__main__": | |
| evaluate_adversarial() |