ats-resume-optimizer / hallucination_test.py
Salim Shaikh
Add hallucination testing & strengthen anti-hallucination guardrails
fa7ae20
#!/usr/bin/env python3
"""
Comprehensive Hallucination Testing Suite
==========================================
Tests that LLM optimization preserves facts across all domains.
Checks that facts are reworded, not changed or fabricated.
Key checks:
- Skills: No new skills added that weren't in original
- Experience: Years/months preserved exactly
- Companies: Company names preserved exactly
- Job Titles: Titles preserved (can be slightly reworded)
- Metrics: Numbers/percentages preserved exactly
- Dates: All dates preserved exactly
- Education: Degrees/institutions preserved exactly
- Certifications: No fabricated certifications
"""
import os
import re
import json
import sys
from datetime import datetime
from typing import Dict, List, Tuple, Set
# Add parent directory for imports
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from app import ATSCompatibilityAnalyzer, optimize_with_llm
# ==================== DOMAIN TEST CASES ====================
DOMAIN_TEST_CASES = {
# ==================== TECHNOLOGY ====================
"Software Engineer": {
"resume": """
John Smith | john.smith@email.com | (555) 123-4567 | San Francisco, CA
PROFESSIONAL SUMMARY
Software Engineer with 5 years of experience in Python and JavaScript development.
EXPERIENCE
Senior Software Engineer | TechCorp Inc. | Jan 2021 - Present
- Developed REST APIs using Python Flask serving 10,000 daily users
- Reduced database query time by 35% through optimization
- Led team of 4 engineers on microservices migration
Software Engineer | StartupXYZ | Jun 2019 - Dec 2020
- Built React frontend for e-commerce platform
- Implemented CI/CD pipeline using Jenkins
- Increased test coverage from 40% to 85%
EDUCATION
BS Computer Science | Stanford University | 2019
SKILLS
Python, JavaScript, React, Flask, PostgreSQL, Docker, AWS, Git
""",
"jd": """
Senior Software Engineer
Requirements:
- 5+ years Python experience
- Experience with REST APIs and microservices
- React or similar frontend framework
- AWS cloud experience
- Strong communication skills
""",
"facts_to_preserve": {
"years_experience": ["5 years"],
"companies": ["TechCorp Inc.", "StartupXYZ"],
"metrics": ["10,000", "35%", "4 engineers", "40%", "85%"],
"dates": ["Jan 2021", "Present", "Jun 2019", "Dec 2020", "2019"],
"skills": ["Python", "JavaScript", "React", "Flask", "PostgreSQL", "Docker", "AWS", "Git"],
"education": ["BS Computer Science", "Stanford University", "2019"],
"certifications": []
}
},
"Data Scientist": {
"resume": """
Sarah Chen | sarah.chen@email.com | Boston, MA
SUMMARY
Data Scientist with 4 years of experience in machine learning and statistical analysis.
EXPERIENCE
Data Scientist | DataDriven Corp | Mar 2022 - Present
- Built recommendation engine increasing sales by 23%
- Developed NLP pipeline processing 50,000 documents daily
- Created dashboards in Tableau for C-suite executives
Junior Data Scientist | Analytics Inc. | Aug 2020 - Feb 2022
- Performed A/B testing on 15 marketing campaigns
- Built predictive models with 92% accuracy
- Reduced customer churn by 18% using ML models
EDUCATION
MS Data Science | MIT | 2020
BS Statistics | UCLA | 2018
SKILLS
Python, R, TensorFlow, PyTorch, SQL, Tableau, Spark, Statistics
""",
"jd": """
Senior Data Scientist
- 4+ years ML experience
- NLP and recommendation systems
- Statistical analysis
- Python and SQL required
""",
"facts_to_preserve": {
"years_experience": ["4 years"],
"companies": ["DataDriven Corp", "Analytics Inc."],
"metrics": ["23%", "50,000", "15", "92%", "18%"],
"dates": ["Mar 2022", "Present", "Aug 2020", "Feb 2022", "2020", "2018"],
"skills": ["Python", "R", "TensorFlow", "PyTorch", "SQL", "Tableau", "Spark"],
"education": ["MS Data Science", "MIT", "BS Statistics", "UCLA"],
"certifications": []
}
},
"DevOps Engineer": {
"resume": """
Mike Johnson | mike.j@email.com | Seattle, WA
DevOps Engineer with 6 years of cloud infrastructure experience.
EXPERIENCE
Senior DevOps Engineer | CloudScale Systems | Apr 2021 - Present
- Managed Kubernetes clusters with 200+ pods
- Reduced deployment time from 2 hours to 15 minutes
- Implemented Terraform for infrastructure as code
- Achieved 99.9% uptime SLA
DevOps Engineer | WebServices LLC | Sep 2018 - Mar 2021
- Built CI/CD pipelines in GitLab
- Migrated 50 applications to AWS
- Automated monitoring with Prometheus and Grafana
EDUCATION
BS Computer Engineering | University of Washington | 2018
CERTIFICATIONS
- AWS Solutions Architect Associate (2022)
- Kubernetes Administrator (CKA) (2021)
SKILLS
Kubernetes, Docker, Terraform, AWS, GCP, Jenkins, GitLab CI, Prometheus
""",
"jd": """
Senior DevOps Engineer
- 5+ years DevOps experience
- Kubernetes and container orchestration
- AWS or GCP cloud platforms
- Infrastructure as Code (Terraform)
""",
"facts_to_preserve": {
"years_experience": ["6 years"],
"companies": ["CloudScale Systems", "WebServices LLC"],
"metrics": ["200+", "2 hours", "15 minutes", "99.9%", "50 applications"],
"dates": ["Apr 2021", "Present", "Sep 2018", "Mar 2021", "2018", "2022", "2021"],
"skills": ["Kubernetes", "Docker", "Terraform", "AWS", "GCP", "Jenkins", "GitLab CI", "Prometheus"],
"education": ["BS Computer Engineering", "University of Washington"],
"certifications": ["AWS Solutions Architect Associate", "Kubernetes Administrator", "CKA"]
}
},
# ==================== HEALTHCARE ====================
"Registered Nurse": {
"resume": """
Emily Rodriguez | emily.r@email.com | Los Angeles, CA
Registered Nurse with 8 years of critical care experience.
EXPERIENCE
ICU Nurse | Cedar-Sinai Medical Center | May 2019 - Present
- Managed care for 4-6 critically ill patients per shift
- Reduced medication errors by 40% through protocol improvements
- Trained 12 new graduate nurses
- Responded to 50+ code blue emergencies
Staff Nurse | UCLA Health | Jun 2016 - Apr 2019
- Provided care in 20-bed medical-surgical unit
- Administered 100+ IV medications daily
- Maintained 98% patient satisfaction scores
EDUCATION
BSN Nursing | University of Southern California | 2016
LICENSES & CERTIFICATIONS
- RN License #RN12345 (California)
- BLS Certified
- ACLS Certified
- CCRN Certified (2020)
SKILLS
Patient Assessment, IV Therapy, Ventilator Management, Epic EMR
""",
"jd": """
ICU Registered Nurse
- Active RN license required
- 5+ years critical care experience
- BLS and ACLS certification
- Strong patient care skills
""",
"facts_to_preserve": {
"years_experience": ["8 years"],
"companies": ["Cedar-Sinai Medical Center", "UCLA Health"],
"metrics": ["4-6", "40%", "12", "50+", "20-bed", "100+", "98%"],
"dates": ["May 2019", "Present", "Jun 2016", "Apr 2019", "2016", "2020"],
"skills": ["Patient Assessment", "IV Therapy", "Ventilator Management", "Epic EMR"],
"education": ["BSN Nursing", "University of Southern California"],
"certifications": ["RN License", "RN12345", "BLS", "ACLS", "CCRN"]
}
},
"Physical Therapist": {
"resume": """
David Kim | david.kim@email.com | Chicago, IL
Doctor of Physical Therapy with 5 years of outpatient orthopedic experience.
EXPERIENCE
Physical Therapist | Midwest Rehab Center | Aug 2021 - Present
- Treat 12-15 patients daily with orthopedic conditions
- Achieved 89% patient goal attainment rate
- Specialized in post-surgical ACL and rotator cuff rehabilitation
- Supervised 3 physical therapy assistants
Physical Therapist | City Sports Medicine | Jul 2019 - Jul 2021
- Provided care for 40+ athletes per week
- Developed return-to-sport protocols
- Reduced average treatment duration by 2 weeks
EDUCATION
DPT Physical Therapy | Northwestern University | 2019
BS Kinesiology | University of Illinois | 2016
LICENSES
- PT License #PT98765 (Illinois)
- Certified Orthopedic Clinical Specialist (OCS)
SKILLS
Manual Therapy, Therapeutic Exercise, Dry Needling, Sports Rehabilitation
""",
"jd": """
Physical Therapist - Orthopedics
- DPT or equivalent required
- Active PT license
- 3+ years orthopedic experience
- Manual therapy skills preferred
""",
"facts_to_preserve": {
"years_experience": ["5 years"],
"companies": ["Midwest Rehab Center", "City Sports Medicine"],
"metrics": ["12-15", "89%", "3", "40+", "2 weeks"],
"dates": ["Aug 2021", "Present", "Jul 2019", "Jul 2021", "2019", "2016"],
"skills": ["Manual Therapy", "Therapeutic Exercise", "Dry Needling", "Sports Rehabilitation"],
"education": ["DPT Physical Therapy", "Northwestern University", "BS Kinesiology", "University of Illinois"],
"certifications": ["PT License", "PT98765", "OCS", "Orthopedic Clinical Specialist"]
}
},
# ==================== FINANCE ====================
"Financial Analyst": {
"resume": """
Jennifer Lee | jennifer.lee@email.com | New York, NY
Financial Analyst with 6 years of FP&A and financial modeling experience.
EXPERIENCE
Senior Financial Analyst | Goldman Sachs | Feb 2021 - Present
- Built financial models for $500M+ M&A transactions
- Prepared quarterly earnings presentations for CFO
- Reduced forecasting variance from 8% to 3%
- Managed team of 2 junior analysts
Financial Analyst | Morgan Stanley | Aug 2018 - Jan 2021
- Analyzed 25+ investment opportunities annually
- Created DCF models for valuation analysis
- Supported $2B fundraising activities
EDUCATION
MBA Finance | Columbia Business School | 2018
BA Economics | Yale University | 2015
CERTIFICATIONS
- CFA Level III Candidate
- Financial Modeling Certificate (2019)
SKILLS
Excel, PowerPoint, Bloomberg Terminal, Capital IQ, SQL, Python
""",
"jd": """
Senior Financial Analyst
- 5+ years FP&A experience
- Advanced financial modeling skills
- MBA preferred
- CFA progress preferred
""",
"facts_to_preserve": {
"years_experience": ["6 years"],
"companies": ["Goldman Sachs", "Morgan Stanley"],
"metrics": ["$500M+", "8%", "3%", "2", "25+", "$2B"],
"dates": ["Feb 2021", "Present", "Aug 2018", "Jan 2021", "2018", "2015", "2019"],
"skills": ["Excel", "PowerPoint", "Bloomberg Terminal", "Capital IQ", "SQL", "Python"],
"education": ["MBA Finance", "Columbia Business School", "BA Economics", "Yale University"],
"certifications": ["CFA Level III", "Financial Modeling Certificate"]
}
},
"Accountant": {
"resume": """
Robert Martinez | robert.m@email.com | Dallas, TX
CPA with 7 years of public and corporate accounting experience.
EXPERIENCE
Senior Accountant | Deloitte | Jan 2020 - Present
- Led audit engagements for 8 Fortune 500 clients
- Managed $50M+ in client receivables
- Trained 5 staff accountants on GAAP procedures
- Identified $2.3M in cost savings for clients
Staff Accountant | PricewaterhouseCoopers | Sep 2017 - Dec 2019
- Prepared financial statements for 15+ clients
- Performed SOX compliance testing
- Processed 500+ journal entries monthly
EDUCATION
MS Accounting | University of Texas at Austin | 2017
BS Accounting | Texas A&M University | 2015
CERTIFICATIONS
- CPA License #CPA456789 (Texas)
- Certified Internal Auditor (CIA)
SKILLS
GAAP, IFRS, SAP, QuickBooks, Excel, Audit, Tax Preparation
""",
"jd": """
Senior Accountant
- CPA required
- 5+ years public accounting
- GAAP and audit experience
- Fortune 500 client experience preferred
""",
"facts_to_preserve": {
"years_experience": ["7 years"],
"companies": ["Deloitte", "PricewaterhouseCoopers"],
"metrics": ["8", "$50M+", "5", "$2.3M", "15+", "500+"],
"dates": ["Jan 2020", "Present", "Sep 2017", "Dec 2019", "2017", "2015"],
"skills": ["GAAP", "IFRS", "SAP", "QuickBooks", "Excel", "Audit", "Tax Preparation"],
"education": ["MS Accounting", "University of Texas at Austin", "BS Accounting", "Texas A&M University"],
"certifications": ["CPA", "CPA456789", "CIA", "Certified Internal Auditor"]
}
},
# ==================== LEGAL ====================
"Attorney": {
"resume": """
Amanda Thompson | amanda.t@email.com | Washington, DC
Corporate Attorney with 9 years of M&A and securities law experience.
EXPERIENCE
Senior Associate | Skadden Arps | Mar 2019 - Present
- Closed 12 M&A transactions totaling $8B
- Advised Fortune 100 companies on SEC compliance
- Led due diligence teams of 6-8 attorneys
- Negotiated complex acquisition agreements
Associate | Davis Polk | Sep 2015 - Feb 2019
- Drafted 100+ securities filings
- Supported 20 IPO transactions
- Conducted antitrust analysis for mergers
EDUCATION
JD | Harvard Law School | 2015
BA Political Science | Princeton University | 2012
BAR ADMISSIONS
- New York Bar (2015)
- DC Bar (2016)
SKILLS
M&A, Securities Law, Due Diligence, Contract Negotiation, SEC Filings
""",
"jd": """
Senior Corporate Associate
- JD from top law school
- 8+ years M&A experience
- Bar admission required
- SEC and securities experience
""",
"facts_to_preserve": {
"years_experience": ["9 years"],
"companies": ["Skadden Arps", "Davis Polk"],
"metrics": ["12", "$8B", "6-8", "100+", "20"],
"dates": ["Mar 2019", "Present", "Sep 2015", "Feb 2019", "2015", "2012", "2016"],
"skills": ["M&A", "Securities Law", "Due Diligence", "Contract Negotiation", "SEC Filings"],
"education": ["JD", "Harvard Law School", "BA Political Science", "Princeton University"],
"certifications": ["New York Bar", "DC Bar"]
}
},
# ==================== MARKETING ====================
"Marketing Manager": {
"resume": """
Lisa Wang | lisa.wang@email.com | San Francisco, CA
Marketing Manager with 7 years of B2B SaaS marketing experience.
EXPERIENCE
Senior Marketing Manager | Salesforce | Jun 2021 - Present
- Managed $3M annual marketing budget
- Increased MQLs by 45% through demand generation
- Led team of 4 marketing specialists
- Launched 8 product campaigns with 150% ROI
Marketing Manager | HubSpot | Apr 2018 - May 2021
- Grew email list from 50,000 to 200,000 subscribers
- Achieved 25% increase in website traffic
- Created content strategy generating 500 leads monthly
EDUCATION
MBA Marketing | UC Berkeley | 2018
BA Communications | USC | 2015
CERTIFICATIONS
- Google Analytics Certified
- HubSpot Inbound Marketing Certified
SKILLS
Demand Generation, Content Marketing, SEO, Paid Ads, Marketo, Salesforce
""",
"jd": """
Senior Marketing Manager
- 5+ years B2B marketing
- Demand generation experience
- Team leadership
- Marketing automation tools
""",
"facts_to_preserve": {
"years_experience": ["7 years"],
"companies": ["Salesforce", "HubSpot"],
"metrics": ["$3M", "45%", "4", "8", "150%", "50,000", "200,000", "25%", "500"],
"dates": ["Jun 2021", "Present", "Apr 2018", "May 2021", "2018", "2015"],
"skills": ["Demand Generation", "Content Marketing", "SEO", "Paid Ads", "Marketo", "Salesforce"],
"education": ["MBA Marketing", "UC Berkeley", "BA Communications", "USC"],
"certifications": ["Google Analytics", "HubSpot Inbound Marketing"]
}
},
# ==================== HUMAN RESOURCES ====================
"HR Manager": {
"resume": """
Michael Brown | michael.b@email.com | Atlanta, GA
HR Manager with 8 years of talent acquisition and employee relations experience.
EXPERIENCE
HR Manager | Delta Air Lines | Sep 2020 - Present
- Oversee HR operations for 500+ employees
- Reduced turnover by 22% through engagement initiatives
- Implemented new HRIS system serving 5,000 users
- Led diversity hiring increasing representation by 35%
HR Generalist | Coca-Cola Company | Jun 2016 - Aug 2020
- Recruited 150+ employees annually
- Processed 200+ employee relations cases
- Administered benefits for 2,000 employees
EDUCATION
MS Human Resource Management | Georgia State University | 2016
BS Psychology | University of Georgia | 2014
CERTIFICATIONS
- SHRM-SCP (2021)
- PHR Certified
SKILLS
Talent Acquisition, Employee Relations, HRIS, Benefits Administration, Workday
""",
"jd": """
HR Manager
- 7+ years HR experience
- SHRM certification preferred
- Employee relations expertise
- HRIS implementation experience
""",
"facts_to_preserve": {
"years_experience": ["8 years"],
"companies": ["Delta Air Lines", "Coca-Cola Company"],
"metrics": ["500+", "22%", "5,000", "35%", "150+", "200+", "2,000"],
"dates": ["Sep 2020", "Present", "Jun 2016", "Aug 2020", "2016", "2014", "2021"],
"skills": ["Talent Acquisition", "Employee Relations", "HRIS", "Benefits Administration", "Workday"],
"education": ["MS Human Resource Management", "Georgia State University", "BS Psychology", "University of Georgia"],
"certifications": ["SHRM-SCP", "PHR"]
}
},
# ==================== EDUCATION ====================
"Teacher": {
"resume": """
Jessica Adams | jessica.a@email.com | Denver, CO
High School Math Teacher with 10 years of classroom experience.
EXPERIENCE
Math Teacher | Denver East High School | Aug 2018 - Present
- Teach Algebra II, Pre-Calculus, and AP Calculus to 150 students
- Improved AP exam pass rate from 65% to 88%
- Sponsor Math Club with 25 student members
- Mentor 3 student teachers annually
Math Teacher | Aurora Central High School | Aug 2014 - Jul 2018
- Taught 5 classes of 30 students each
- Developed curriculum for honors geometry
- Achieved 95% student pass rate
EDUCATION
MA Education | University of Colorado | 2014
BS Mathematics | Colorado State University | 2012
CERTIFICATIONS
- Colorado Teaching License #T123456
- AP Calculus Certified Teacher
SKILLS
Curriculum Development, Classroom Management, Differentiated Instruction, Google Classroom
""",
"jd": """
High School Math Teacher
- Teaching license required
- 5+ years teaching experience
- AP teaching experience preferred
- Strong classroom management
""",
"facts_to_preserve": {
"years_experience": ["10 years"],
"companies": ["Denver East High School", "Aurora Central High School"],
"metrics": ["150", "65%", "88%", "25", "3", "5", "30", "95%"],
"dates": ["Aug 2018", "Present", "Aug 2014", "Jul 2018", "2014", "2012"],
"skills": ["Curriculum Development", "Classroom Management", "Differentiated Instruction", "Google Classroom"],
"education": ["MA Education", "University of Colorado", "BS Mathematics", "Colorado State University"],
"certifications": ["Colorado Teaching License", "T123456", "AP Calculus Certified"]
}
},
# ==================== ENGINEERING ====================
"Mechanical Engineer": {
"resume": """
Chris Taylor | chris.t@email.com | Detroit, MI
Mechanical Engineer with 6 years of automotive design experience.
EXPERIENCE
Senior Mechanical Engineer | Ford Motor Company | Mar 2021 - Present
- Designed powertrain components for 3 vehicle platforms
- Reduced manufacturing costs by $1.2M annually
- Led team of 5 engineers on EV battery enclosure project
- Filed 2 patents for thermal management systems
Mechanical Engineer | General Motors | Jul 2018 - Feb 2021
- Performed FEA analysis on 50+ component designs
- Improved fuel efficiency by 8% through aerodynamic optimization
- Managed $500K prototype budget
EDUCATION
MS Mechanical Engineering | University of Michigan | 2018
BS Mechanical Engineering | Purdue University | 2016
CERTIFICATIONS
- Professional Engineer (PE) #PE789012 (Michigan)
- Six Sigma Green Belt
SKILLS
SolidWorks, CATIA, ANSYS, FEA, CFD, GD&T, DFMEA
""",
"jd": """
Senior Mechanical Engineer - Automotive
- 5+ years automotive experience
- CAD proficiency (SolidWorks, CATIA)
- FEA analysis experience
- PE license preferred
""",
"facts_to_preserve": {
"years_experience": ["6 years"],
"companies": ["Ford Motor Company", "General Motors"],
"metrics": ["3", "$1.2M", "5", "2 patents", "50+", "8%", "$500K"],
"dates": ["Mar 2021", "Present", "Jul 2018", "Feb 2021", "2018", "2016"],
"skills": ["SolidWorks", "CATIA", "ANSYS", "FEA", "CFD", "GD&T", "DFMEA"],
"education": ["MS Mechanical Engineering", "University of Michigan", "BS Mechanical Engineering", "Purdue University"],
"certifications": ["PE", "PE789012", "Professional Engineer", "Six Sigma Green Belt"]
}
},
# ==================== SALES ====================
"Sales Manager": {
"resume": """
Brian Wilson | brian.w@email.com | Austin, TX
Sales Manager with 9 years of enterprise software sales experience.
EXPERIENCE
Regional Sales Manager | Oracle | Jan 2020 - Present
- Manage territory with $25M annual quota
- Exceeded quota by 115% in 2023
- Lead team of 8 account executives
- Closed 3 deals worth $5M+ each
- Expanded customer base by 40%
Account Executive | SAP | Jun 2015 - Dec 2019
- Achieved 120% of quota for 4 consecutive years
- Managed 50+ enterprise accounts
- Generated $15M in new business
EDUCATION
BS Business Administration | University of Texas at Austin | 2015
CERTIFICATIONS
- Salesforce Certified Administrator
- Oracle Cloud Certified
SKILLS
Enterprise Sales, Account Management, Salesforce, Contract Negotiation, Team Leadership
""",
"jd": """
Regional Sales Manager
- 8+ years B2B sales experience
- Enterprise software background
- Quota achievement track record
- Team management experience
""",
"facts_to_preserve": {
"years_experience": ["9 years"],
"companies": ["Oracle", "SAP"],
"metrics": ["$25M", "115%", "8", "3 deals", "$5M+", "40%", "120%", "4 consecutive years", "50+", "$15M"],
"dates": ["Jan 2020", "Present", "Jun 2015", "Dec 2019", "2015", "2023"],
"skills": ["Enterprise Sales", "Account Management", "Salesforce", "Contract Negotiation", "Team Leadership"],
"education": ["BS Business Administration", "University of Texas at Austin"],
"certifications": ["Salesforce Certified Administrator", "Oracle Cloud Certified"]
}
},
}
# ==================== HALLUCINATION DETECTION ====================
def extract_numbers(text: str) -> Set[str]:
"""Extract all numbers, percentages, and monetary values from text."""
# Find all numbers with context
patterns = [
r'\$[\d,]+(?:\.\d+)?[MBK]?', # Money
r'\d+(?:\.\d+)?%', # Percentages
r'\d{1,3}(?:,\d{3})+', # Large numbers with commas
r'\b\d+\+?\b', # Simple numbers
]
numbers = set()
for pattern in patterns:
matches = re.findall(pattern, text)
numbers.update(matches)
return numbers
def extract_dates(text: str) -> Set[str]:
"""Extract all dates from text."""
patterns = [
r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s*\d{4}\b',
r'\b\d{4}\b',
r'\bPresent\b',
]
dates = set()
for pattern in patterns:
matches = re.findall(pattern, text, re.IGNORECASE)
dates.update(m.lower() for m in matches)
return dates
def extract_proper_nouns(text: str) -> Set[str]:
"""Extract company names, universities, etc."""
# Look for capitalized multi-word phrases
patterns = [
r'[A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)+', # Multi-word proper nouns
]
nouns = set()
for pattern in patterns:
matches = re.findall(pattern, text)
nouns.update(matches)
return nouns
def check_fact_preservation(original: str, optimized: str, facts: Dict) -> Dict:
"""Check if key facts are preserved in optimized resume."""
issues = []
preserved = []
optimized_lower = optimized.lower()
original_lower = original.lower()
# Check each category of facts
for category, items in facts.items():
for item in items:
item_lower = item.lower()
# Check if fact exists in optimized version
if item_lower in optimized_lower:
preserved.append(f"{category}: {item}")
else:
# Try fuzzy match for slight rewording
words = item_lower.split()
if len(words) > 1:
# Check if most words are present
matches = sum(1 for w in words if w in optimized_lower)
if matches >= len(words) * 0.7:
preserved.append(f"{category}: {item} (reworded)")
else:
issues.append(f"MISSING {category}: '{item}'")
else:
issues.append(f"MISSING {category}: '{item}'")
return {
"preserved": preserved,
"issues": issues,
"preservation_rate": len(preserved) / (len(preserved) + len(issues)) if preserved or issues else 1.0
}
def check_for_hallucinations(original: str, optimized: str, domain: str) -> Dict:
"""Check if new facts were hallucinated."""
hallucinations = []
original_lower = original.lower()
optimized_lower = optimized.lower()
# Extract numbers from both - be more careful with monetary values
def extract_numbers_carefully(text):
numbers = set()
# Money with suffixes
for m in re.findall(r'\$[\d,]+(?:\.\d+)?\s*(?:million|billion|M|B|K)?', text, re.IGNORECASE):
numbers.add(m.lower().replace(' ', ''))
# Percentages
for m in re.findall(r'\d+(?:\.\d+)?%', text):
numbers.add(m)
# Large numbers with + suffix
for m in re.findall(r'\d+\+', text):
numbers.add(m)
return numbers
original_numbers = extract_numbers_carefully(original)
optimized_numbers = extract_numbers_carefully(optimized)
# Find new numbers that weren't in original (but ignore small numbers and reformatting)
for num in optimized_numbers:
# Check if this number or a variant exists in original
num_clean = num.replace(',', '').replace('$', '').replace('%', '')
found = False
for orig_num in original_numbers:
orig_clean = orig_num.replace(',', '').replace('$', '').replace('%', '')
if num_clean in orig_clean or orig_clean in num_clean:
found = True
break
if not found and len(num) > 2: # Ignore very short numbers
hallucinations.append(f"NEW NUMBER: {num}")
# Check for hallucinated skills - only flag clearly unrelated skills
# Skills that should ONLY appear if original resume mentions related tech
skill_requirements = {
'kubernetes': ['docker', 'container', 'devops', 'cloud', 'aws', 'gcp', 'azure'],
'terraform': ['infrastructure', 'cloud', 'aws', 'devops', 'iac'],
'pytorch': ['machine learning', 'ml', 'deep learning', 'ai', 'neural', 'tensorflow'],
'tensorflow': ['machine learning', 'ml', 'deep learning', 'ai', 'neural', 'pytorch'],
'scala': ['java', 'spark', 'big data', 'jvm', 'functional'],
'golang': ['backend', 'microservices', 'distributed', 'cloud'],
'rust': ['systems', 'performance', 'low-level', 'c++'],
'blockchain': ['crypto', 'web3', 'smart contract', 'ethereum'],
}
for skill, prereqs in skill_requirements.items():
if skill in optimized_lower and skill not in original_lower:
# Check if any prerequisite skill exists
has_prereq = any(prereq in original_lower for prereq in prereqs)
if not has_prereq:
hallucinations.append(f"HALLUCINATED SKILL: {skill}")
# Check for hallucinated certifications - these are serious
common_certs = [
('aws certified', ['aws', 'amazon web services', 'cloud']),
('azure certified', ['azure', 'microsoft cloud']),
('gcp certified', ['gcp', 'google cloud']),
('pmp', ['project manage', 'pm ']),
('cissp', ['security', 'infosec', 'cybersecurity']),
('cfa', ['finance', 'investment', 'portfolio']),
('cpa', ['accounting', 'audit', 'tax']),
]
for cert, prereqs in common_certs:
if cert in optimized_lower and cert not in original_lower:
has_prereq = any(prereq in original_lower for prereq in prereqs)
if not has_prereq:
hallucinations.append(f"HALLUCINATED CERT: {cert}")
# Check for hallucinated degrees - very serious
degree_patterns = [
(r'\bphd\b|\bph\.?d\.?\b', 'PhD'),
(r'\bmba\b', 'MBA'),
(r'\bmd\b', 'MD'),
(r'\bjd\b', 'JD'),
]
for pattern, name in degree_patterns:
orig_count = len(re.findall(pattern, original_lower))
opt_count = len(re.findall(pattern, optimized_lower))
if opt_count > orig_count:
hallucinations.append(f"HALLUCINATED DEGREE: {name}")
return {
"hallucinations": hallucinations,
"hallucination_count": len(hallucinations)
}
def run_domain_test(domain: str, test_case: Dict) -> Dict:
"""Run a single domain test."""
resume = test_case["resume"]
jd = test_case["jd"]
facts = test_case["facts_to_preserve"]
print(f"\n{'='*60}")
print(f"Testing: {domain}")
print(f"{'='*60}")
# Get original ATS score
analyzer = ATSCompatibilityAnalyzer()
original_result = analyzer.analyze(resume, jd)
original_score = original_result['total_score']
print(f"Original ATS Score: {original_score}%")
# Run optimization
try:
result_tuple = optimize_with_llm(resume, jd)
optimized = result_tuple[0] if isinstance(result_tuple, tuple) else result_tuple
if optimized.startswith("❌ ERROR"):
return {
"domain": domain,
"status": "SKIPPED",
"reason": "Optimization blocked (expected for short resumes)",
"original_score": original_score
}
# Get optimized ATS score
optimized_result = analyzer.analyze(optimized, jd)
optimized_score = optimized_result['total_score']
print(f"Optimized ATS Score: {optimized_score}%")
print(f"Score Change: {optimized_score - original_score:+d}%")
# Check fact preservation
preservation = check_fact_preservation(resume, optimized, facts)
print(f"\nFact Preservation Rate: {preservation['preservation_rate']*100:.1f}%")
if preservation['issues']:
print(f"⚠️ Missing Facts ({len(preservation['issues'])}):")
for issue in preservation['issues'][:5]: # Show first 5
print(f" - {issue}")
# Check for hallucinations
hallucination_check = check_for_hallucinations(resume, optimized, domain)
if hallucination_check['hallucinations']:
print(f"🚨 Hallucinations Detected ({hallucination_check['hallucination_count']}):")
for h in hallucination_check['hallucinations'][:5]:
print(f" - {h}")
else:
print("✅ No hallucinations detected")
# Determine pass/fail
passed = (
preservation['preservation_rate'] >= 0.8 and # At least 80% facts preserved
hallucination_check['hallucination_count'] <= 2 and # Allow minor issues
optimized_score >= original_score # Score should not decrease
)
return {
"domain": domain,
"status": "PASS" if passed else "FAIL",
"original_score": original_score,
"optimized_score": optimized_score,
"score_change": optimized_score - original_score,
"preservation_rate": preservation['preservation_rate'],
"missing_facts": preservation['issues'],
"hallucinations": hallucination_check['hallucinations'],
"passed": passed
}
except Exception as e:
print(f"❌ Error: {str(e)}")
return {
"domain": domain,
"status": "ERROR",
"error": str(e),
"original_score": original_score
}
def main():
"""Run all domain tests."""
print("="*70)
print("COMPREHENSIVE HALLUCINATION TESTING SUITE")
print("="*70)
print(f"Testing {len(DOMAIN_TEST_CASES)} domains for fact preservation and hallucination detection")
print(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
results = []
passed = 0
failed = 0
skipped = 0
errors = 0
for domain, test_case in DOMAIN_TEST_CASES.items():
result = run_domain_test(domain, test_case)
results.append(result)
if result['status'] == 'PASS':
passed += 1
elif result['status'] == 'FAIL':
failed += 1
elif result['status'] == 'SKIPPED':
skipped += 1
else:
errors += 1
# Print summary
print("\n" + "="*70)
print("SUMMARY")
print("="*70)
print(f"✅ Passed: {passed}/{len(DOMAIN_TEST_CASES)}")
print(f"❌ Failed: {failed}/{len(DOMAIN_TEST_CASES)}")
print(f"⏭️ Skipped: {skipped}/{len(DOMAIN_TEST_CASES)}")
print(f"⚠️ Errors: {errors}/{len(DOMAIN_TEST_CASES)}")
# Show failed tests
if failed > 0:
print("\n" + "-"*70)
print("FAILED TESTS:")
print("-"*70)
for r in results:
if r['status'] == 'FAIL':
print(f"\n{r['domain']}:")
print(f" Preservation Rate: {r['preservation_rate']*100:.1f}%")
if r['missing_facts']:
print(f" Missing Facts: {len(r['missing_facts'])}")
if r['hallucinations']:
print(f" Hallucinations: {r['hallucinations']}")
# Calculate overall metrics
all_preservation_rates = [r['preservation_rate'] for r in results if 'preservation_rate' in r]
all_hallucination_counts = [len(r.get('hallucinations', [])) for r in results]
if all_preservation_rates:
print(f"\nAverage Fact Preservation Rate: {sum(all_preservation_rates)/len(all_preservation_rates)*100:.1f}%")
print(f"Total Hallucinations Detected: {sum(all_hallucination_counts)}")
# Save results
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
results_file = f"hallucination_results_{timestamp}.json"
with open(results_file, 'w') as f:
json.dump({
"summary": {
"passed": passed,
"failed": failed,
"skipped": skipped,
"errors": errors,
"total": len(DOMAIN_TEST_CASES)
},
"results": results
}, f, indent=2)
print(f"\nResults saved to: {results_file}")
# Return exit code
return 0 if failed == 0 and errors == 0 else 1
if __name__ == "__main__":
exit(main())