Spaces:

sameer2026
/

iris_backend

Sleeping

File size: 8,318 Bytes

4b3a33f

import sys
import os
import time
import json
import random
import numpy as np

# Set encoding for Windows terminals
if sys.platform == "win32":
    import io
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')

# Add backend to path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../..')))

from backend.src.embeddings.local_embedder import generate_embedding

def cosine_similarity(v1, v2):
    return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

def inject_noise(text, is_skill=False):
    """Simulates real-world messy resumes with abbreviations, typos, and lowercasing."""
    if random.random() < 0.3: # 30% chance to leave perfectly clean
        return text
        
    abbreviations = {
        "Python": "Py", "PostgreSQL": "Postgres", "JavaScript": "JS", 
        "React": "ReactJS", "Machine Learning": "ML", "Amazon Web Services": "AWS",
        "Kubernetes": "K8s", "TypeScript": "TS", "User Experience": "UX"
    }
    
    if is_skill and text in abbreviations and random.random() > 0.5:
        return abbreviations[text]
        
    # Randomly lowercase everything (common in lazy resumes)
    if random.random() > 0.7:
        text = text.lower()
        
    return text

def generate_adversarial_dataset():
    """Generates 200 candidates with intentional distractors and noise."""
    print("Building N=200 Adversarial Candidate Pool...")
    
    domains = [
        ("Frontend_React", ["React", "JavaScript", "Tailwind", "CSS", "TypeScript"]),
        ("Frontend_Angular", ["Angular", "JavaScript", "SCSS", "HTML", "TypeScript"]),
        ("Backend_Python", ["Python", "FastAPI", "PostgreSQL", "Docker", "Linux"]),
        ("Backend_Java", ["Java", "Spring Boot", "MySQL", "Kafka", "Kubernetes"]),
        ("Data_Science", ["Python", "Pandas", "PyTorch", "SQL", "Machine Learning"]),
        ("Data_Engineer", ["Spark", "Airflow", "Python", "SQL", "AWS"]),
        ("DevOps", ["Kubernetes", "Docker", "Terraform", "CI/CD", "AWS"]),
        ("Mobile_iOS", ["Swift", "Objective-C", "iOS", "XCode", "CoreData"]),
        ("Mobile_Android", ["Kotlin", "Java", "Android Studio", "Jetpack", "Firebase"]),
        ("Cybersecurity", ["Network Security", "Penetration Testing", "Firewalls", "Linux", "Python"])
    ]
    levels = ["Junior", "Mid-Level", "Senior", "Lead"]
    
    candidates = []
    golden_dataset = []
    
    cand_counter = 1
    
    # Generate 40 Queries (10 domains x 4 levels)
    for domain_name, base_skills in domains:
        for level in levels:
            # 1. The Target Candidate (Golden)
            target_id = f"cand_{cand_counter}_TARGET_{level}_{domain_name}"
            target_skills = [inject_noise(s, True) for s in base_skills]
            candidates.append({
                "id": target_id,
                "headline": f"{level} {domain_name.replace('_', ' ')} Engineer",
                "summary": inject_noise(f"Experienced {level} professional in {domain_name}. Passionate about building scalable architectures."),
                "skills": target_skills,
                "experience": [inject_noise(f"Built systems using {target_skills[0]} and {target_skills[1]}.")]
            })
            cand_counter += 1
            
            # The Query (Clean, formal HR language)
            query = f"Hiring a {level} professional in {domain_name.replace('_', ' ')}. Must have strong experience with {base_skills[0]}, {base_skills[1]}, and {base_skills[2]}."
            golden_dataset.append({"query": query, "relevant_id": target_id})

            # 2. Seniority Distractor (Wrong level, perfect skills)
            distractor_level = "Senior" if level == "Junior" else "Junior"
            candidates.append({
                "id": f"cand_{cand_counter}_DISTRACTOR_LEVEL_{domain_name}",
                "headline": f"{distractor_level} {domain_name.replace('_', ' ')} Engineer",
                "summary": f"A {distractor_level} developer specializing in {domain_name}.",
                "skills": base_skills, # Same exact skills to confuse the model
                "experience": [f"Worked extensively with {base_skills[0]}."]
            })
            cand_counter += 1

            # 3. Skill Distractor (Right level, missing core skill, has similar skill)
            altered_skills = base_skills.copy()
            altered_skills[0] = "C++" # Replace core skill with something irrelevant
            candidates.append({
                "id": f"cand_{cand_counter}_DISTRACTOR_SKILL_{domain_name}",
                "headline": f"{level} Software Engineer",
                "summary": f"Focuses on {altered_skills[0]} and backend architecture.",
                "skills": altered_skills,
                "experience": [f"Maintained legacy {altered_skills[0]} codebases."]
            })
            cand_counter += 1
            
            # 4 & 5. Random Noise Candidates (Fill out the 200)
            for _ in range(2):
                rand_domain = random.choice(domains)
                candidates.append({
                    "id": f"cand_{cand_counter}_RANDOM",
                    "headline": f"{random.choice(levels)} {rand_domain[0]} Dev",
                    "summary": "Looking for new opportunities. Hobbies: hiking, dog walking, photography.",
                    "skills": [inject_noise(s, True) for s in rand_domain[1]],
                    "experience": ["General software development tasks."]
                })
                cand_counter += 1

    return candidates, golden_dataset

def evaluate_adversarial():
    print("🚀 Starting Adversarial Robustness Evaluation...")
    
    candidates, golden_dataset = generate_adversarial_dataset()
    
    print(f"📊 Dataset: {len(golden_dataset)} Queries | {len(candidates)} Candidates")
    print("⚠️  Warning: Embedding 200 profiles on CPU will take time. Please wait...\n")
    
    # 1. Embed Candidates (Flattening)
    candidate_embeddings = []
    start_time = time.time()
    
    for i, c in enumerate(candidates):
        rich_text = f"Headline: {c['headline']}. Summary: {c['summary']} Skills: {', '.join(c['skills'])}. Experience: {' '.join(c['experience'])}"
        candidate_embeddings.append({
            "id": c["id"],
            "vec": generate_embedding(rich_text)
        })
        if (i+1) % 20 == 0:
            print(f"  -> Embedded {i+1}/200 candidates...")
            
    print(f"✅ Embedding complete in {time.time() - start_time:.2f} seconds.\n")

    # 2. Evaluate Queries
    mrr_total = 0
    hits_at_1 = 0
    hits_at_3 = 0
    hits_at_5 = 0
    
    for item in golden_dataset:
        query_vec = generate_embedding(item["query"])
        target_id = item["relevant_id"]
        
        scores = [(c_emb["id"], cosine_similarity(query_vec, c_emb["vec"])) for c_emb in candidate_embeddings]
        scores.sort(key=lambda x: x[1], reverse=True)
        
        rank = -1
        for idx, (cid, sim) in enumerate(scores):
            if cid == target_id:
                rank = idx + 1
                break
                
        if rank != -1:
            mrr_total += (1.0 / rank)
            if rank == 1: hits_at_1 += 1
            if rank <= 3: hits_at_3 += 1
            if rank <= 5: hits_at_5 += 1

    # 3. Final Aggregation
    num_queries = len(golden_dataset)
    final_mrr = mrr_total / num_queries
    recall_1 = hits_at_1 / num_queries
    recall_3 = hits_at_3 / num_queries
    recall_5 = hits_at_5 / num_queries

    print("="*45)
    print("🛡️ ADVERSARIAL RETRIEVAL METRICS (N=200)")
    print("="*45)
    print(f"MRR (Mean Reciprocal Rank):  {final_mrr:.4f}")
    print("-" * 45)
    print(f"Recall@1  (R@1):             {recall_1*100:.1f}%")
    print(f"Recall@3  (R@3):             {recall_3*100:.1f}%")
    print(f"Recall@5  (R@5):             {recall_5*100:.1f}%")
    print("="*45)
    
    # Save to JSON for the guide/paper
    with open("quality_metrics_adversarial.json", "w") as f:
        json.dump({
            "dataset": "N=200 Adversarial (Noise + Distractors)",
            "mrr": final_mrr,
            "recall_1": recall_1,
            "recall_3": recall_3
        }, f, indent=4)
        
    print("📄 Results securely saved to 'quality_metrics_adversarial.json'")

if __name__ == "__main__":
    evaluate_adversarial()