Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| Comprehensive Hallucination Testing Suite | |
| ========================================== | |
| Tests that LLM optimization preserves facts across all domains. | |
| Checks that facts are reworded, not changed or fabricated. | |
| Key checks: | |
| - Skills: No new skills added that weren't in original | |
| - Experience: Years/months preserved exactly | |
| - Companies: Company names preserved exactly | |
| - Job Titles: Titles preserved (can be slightly reworded) | |
| - Metrics: Numbers/percentages preserved exactly | |
| - Dates: All dates preserved exactly | |
| - Education: Degrees/institutions preserved exactly | |
| - Certifications: No fabricated certifications | |
| """ | |
| import os | |
| import re | |
| import json | |
| import sys | |
| from datetime import datetime | |
| from typing import Dict, List, Tuple, Set | |
| # Add parent directory for imports | |
| sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) | |
| from app import ATSCompatibilityAnalyzer, optimize_with_llm | |
| # ==================== DOMAIN TEST CASES ==================== | |
| DOMAIN_TEST_CASES = { | |
| # ==================== TECHNOLOGY ==================== | |
| "Software Engineer": { | |
| "resume": """ | |
| John Smith | john.smith@email.com | (555) 123-4567 | San Francisco, CA | |
| PROFESSIONAL SUMMARY | |
| Software Engineer with 5 years of experience in Python and JavaScript development. | |
| EXPERIENCE | |
| Senior Software Engineer | TechCorp Inc. | Jan 2021 - Present | |
| - Developed REST APIs using Python Flask serving 10,000 daily users | |
| - Reduced database query time by 35% through optimization | |
| - Led team of 4 engineers on microservices migration | |
| Software Engineer | StartupXYZ | Jun 2019 - Dec 2020 | |
| - Built React frontend for e-commerce platform | |
| - Implemented CI/CD pipeline using Jenkins | |
| - Increased test coverage from 40% to 85% | |
| EDUCATION | |
| BS Computer Science | Stanford University | 2019 | |
| SKILLS | |
| Python, JavaScript, React, Flask, PostgreSQL, Docker, AWS, Git | |
| """, | |
| "jd": """ | |
| Senior Software Engineer | |
| Requirements: | |
| - 5+ years Python experience | |
| - Experience with REST APIs and microservices | |
| - React or similar frontend framework | |
| - AWS cloud experience | |
| - Strong communication skills | |
| """, | |
| "facts_to_preserve": { | |
| "years_experience": ["5 years"], | |
| "companies": ["TechCorp Inc.", "StartupXYZ"], | |
| "metrics": ["10,000", "35%", "4 engineers", "40%", "85%"], | |
| "dates": ["Jan 2021", "Present", "Jun 2019", "Dec 2020", "2019"], | |
| "skills": ["Python", "JavaScript", "React", "Flask", "PostgreSQL", "Docker", "AWS", "Git"], | |
| "education": ["BS Computer Science", "Stanford University", "2019"], | |
| "certifications": [] | |
| } | |
| }, | |
| "Data Scientist": { | |
| "resume": """ | |
| Sarah Chen | sarah.chen@email.com | Boston, MA | |
| SUMMARY | |
| Data Scientist with 4 years of experience in machine learning and statistical analysis. | |
| EXPERIENCE | |
| Data Scientist | DataDriven Corp | Mar 2022 - Present | |
| - Built recommendation engine increasing sales by 23% | |
| - Developed NLP pipeline processing 50,000 documents daily | |
| - Created dashboards in Tableau for C-suite executives | |
| Junior Data Scientist | Analytics Inc. | Aug 2020 - Feb 2022 | |
| - Performed A/B testing on 15 marketing campaigns | |
| - Built predictive models with 92% accuracy | |
| - Reduced customer churn by 18% using ML models | |
| EDUCATION | |
| MS Data Science | MIT | 2020 | |
| BS Statistics | UCLA | 2018 | |
| SKILLS | |
| Python, R, TensorFlow, PyTorch, SQL, Tableau, Spark, Statistics | |
| """, | |
| "jd": """ | |
| Senior Data Scientist | |
| - 4+ years ML experience | |
| - NLP and recommendation systems | |
| - Statistical analysis | |
| - Python and SQL required | |
| """, | |
| "facts_to_preserve": { | |
| "years_experience": ["4 years"], | |
| "companies": ["DataDriven Corp", "Analytics Inc."], | |
| "metrics": ["23%", "50,000", "15", "92%", "18%"], | |
| "dates": ["Mar 2022", "Present", "Aug 2020", "Feb 2022", "2020", "2018"], | |
| "skills": ["Python", "R", "TensorFlow", "PyTorch", "SQL", "Tableau", "Spark"], | |
| "education": ["MS Data Science", "MIT", "BS Statistics", "UCLA"], | |
| "certifications": [] | |
| } | |
| }, | |
| "DevOps Engineer": { | |
| "resume": """ | |
| Mike Johnson | mike.j@email.com | Seattle, WA | |
| DevOps Engineer with 6 years of cloud infrastructure experience. | |
| EXPERIENCE | |
| Senior DevOps Engineer | CloudScale Systems | Apr 2021 - Present | |
| - Managed Kubernetes clusters with 200+ pods | |
| - Reduced deployment time from 2 hours to 15 minutes | |
| - Implemented Terraform for infrastructure as code | |
| - Achieved 99.9% uptime SLA | |
| DevOps Engineer | WebServices LLC | Sep 2018 - Mar 2021 | |
| - Built CI/CD pipelines in GitLab | |
| - Migrated 50 applications to AWS | |
| - Automated monitoring with Prometheus and Grafana | |
| EDUCATION | |
| BS Computer Engineering | University of Washington | 2018 | |
| CERTIFICATIONS | |
| - AWS Solutions Architect Associate (2022) | |
| - Kubernetes Administrator (CKA) (2021) | |
| SKILLS | |
| Kubernetes, Docker, Terraform, AWS, GCP, Jenkins, GitLab CI, Prometheus | |
| """, | |
| "jd": """ | |
| Senior DevOps Engineer | |
| - 5+ years DevOps experience | |
| - Kubernetes and container orchestration | |
| - AWS or GCP cloud platforms | |
| - Infrastructure as Code (Terraform) | |
| """, | |
| "facts_to_preserve": { | |
| "years_experience": ["6 years"], | |
| "companies": ["CloudScale Systems", "WebServices LLC"], | |
| "metrics": ["200+", "2 hours", "15 minutes", "99.9%", "50 applications"], | |
| "dates": ["Apr 2021", "Present", "Sep 2018", "Mar 2021", "2018", "2022", "2021"], | |
| "skills": ["Kubernetes", "Docker", "Terraform", "AWS", "GCP", "Jenkins", "GitLab CI", "Prometheus"], | |
| "education": ["BS Computer Engineering", "University of Washington"], | |
| "certifications": ["AWS Solutions Architect Associate", "Kubernetes Administrator", "CKA"] | |
| } | |
| }, | |
| # ==================== HEALTHCARE ==================== | |
| "Registered Nurse": { | |
| "resume": """ | |
| Emily Rodriguez | emily.r@email.com | Los Angeles, CA | |
| Registered Nurse with 8 years of critical care experience. | |
| EXPERIENCE | |
| ICU Nurse | Cedar-Sinai Medical Center | May 2019 - Present | |
| - Managed care for 4-6 critically ill patients per shift | |
| - Reduced medication errors by 40% through protocol improvements | |
| - Trained 12 new graduate nurses | |
| - Responded to 50+ code blue emergencies | |
| Staff Nurse | UCLA Health | Jun 2016 - Apr 2019 | |
| - Provided care in 20-bed medical-surgical unit | |
| - Administered 100+ IV medications daily | |
| - Maintained 98% patient satisfaction scores | |
| EDUCATION | |
| BSN Nursing | University of Southern California | 2016 | |
| LICENSES & CERTIFICATIONS | |
| - RN License #RN12345 (California) | |
| - BLS Certified | |
| - ACLS Certified | |
| - CCRN Certified (2020) | |
| SKILLS | |
| Patient Assessment, IV Therapy, Ventilator Management, Epic EMR | |
| """, | |
| "jd": """ | |
| ICU Registered Nurse | |
| - Active RN license required | |
| - 5+ years critical care experience | |
| - BLS and ACLS certification | |
| - Strong patient care skills | |
| """, | |
| "facts_to_preserve": { | |
| "years_experience": ["8 years"], | |
| "companies": ["Cedar-Sinai Medical Center", "UCLA Health"], | |
| "metrics": ["4-6", "40%", "12", "50+", "20-bed", "100+", "98%"], | |
| "dates": ["May 2019", "Present", "Jun 2016", "Apr 2019", "2016", "2020"], | |
| "skills": ["Patient Assessment", "IV Therapy", "Ventilator Management", "Epic EMR"], | |
| "education": ["BSN Nursing", "University of Southern California"], | |
| "certifications": ["RN License", "RN12345", "BLS", "ACLS", "CCRN"] | |
| } | |
| }, | |
| "Physical Therapist": { | |
| "resume": """ | |
| David Kim | david.kim@email.com | Chicago, IL | |
| Doctor of Physical Therapy with 5 years of outpatient orthopedic experience. | |
| EXPERIENCE | |
| Physical Therapist | Midwest Rehab Center | Aug 2021 - Present | |
| - Treat 12-15 patients daily with orthopedic conditions | |
| - Achieved 89% patient goal attainment rate | |
| - Specialized in post-surgical ACL and rotator cuff rehabilitation | |
| - Supervised 3 physical therapy assistants | |
| Physical Therapist | City Sports Medicine | Jul 2019 - Jul 2021 | |
| - Provided care for 40+ athletes per week | |
| - Developed return-to-sport protocols | |
| - Reduced average treatment duration by 2 weeks | |
| EDUCATION | |
| DPT Physical Therapy | Northwestern University | 2019 | |
| BS Kinesiology | University of Illinois | 2016 | |
| LICENSES | |
| - PT License #PT98765 (Illinois) | |
| - Certified Orthopedic Clinical Specialist (OCS) | |
| SKILLS | |
| Manual Therapy, Therapeutic Exercise, Dry Needling, Sports Rehabilitation | |
| """, | |
| "jd": """ | |
| Physical Therapist - Orthopedics | |
| - DPT or equivalent required | |
| - Active PT license | |
| - 3+ years orthopedic experience | |
| - Manual therapy skills preferred | |
| """, | |
| "facts_to_preserve": { | |
| "years_experience": ["5 years"], | |
| "companies": ["Midwest Rehab Center", "City Sports Medicine"], | |
| "metrics": ["12-15", "89%", "3", "40+", "2 weeks"], | |
| "dates": ["Aug 2021", "Present", "Jul 2019", "Jul 2021", "2019", "2016"], | |
| "skills": ["Manual Therapy", "Therapeutic Exercise", "Dry Needling", "Sports Rehabilitation"], | |
| "education": ["DPT Physical Therapy", "Northwestern University", "BS Kinesiology", "University of Illinois"], | |
| "certifications": ["PT License", "PT98765", "OCS", "Orthopedic Clinical Specialist"] | |
| } | |
| }, | |
| # ==================== FINANCE ==================== | |
| "Financial Analyst": { | |
| "resume": """ | |
| Jennifer Lee | jennifer.lee@email.com | New York, NY | |
| Financial Analyst with 6 years of FP&A and financial modeling experience. | |
| EXPERIENCE | |
| Senior Financial Analyst | Goldman Sachs | Feb 2021 - Present | |
| - Built financial models for $500M+ M&A transactions | |
| - Prepared quarterly earnings presentations for CFO | |
| - Reduced forecasting variance from 8% to 3% | |
| - Managed team of 2 junior analysts | |
| Financial Analyst | Morgan Stanley | Aug 2018 - Jan 2021 | |
| - Analyzed 25+ investment opportunities annually | |
| - Created DCF models for valuation analysis | |
| - Supported $2B fundraising activities | |
| EDUCATION | |
| MBA Finance | Columbia Business School | 2018 | |
| BA Economics | Yale University | 2015 | |
| CERTIFICATIONS | |
| - CFA Level III Candidate | |
| - Financial Modeling Certificate (2019) | |
| SKILLS | |
| Excel, PowerPoint, Bloomberg Terminal, Capital IQ, SQL, Python | |
| """, | |
| "jd": """ | |
| Senior Financial Analyst | |
| - 5+ years FP&A experience | |
| - Advanced financial modeling skills | |
| - MBA preferred | |
| - CFA progress preferred | |
| """, | |
| "facts_to_preserve": { | |
| "years_experience": ["6 years"], | |
| "companies": ["Goldman Sachs", "Morgan Stanley"], | |
| "metrics": ["$500M+", "8%", "3%", "2", "25+", "$2B"], | |
| "dates": ["Feb 2021", "Present", "Aug 2018", "Jan 2021", "2018", "2015", "2019"], | |
| "skills": ["Excel", "PowerPoint", "Bloomberg Terminal", "Capital IQ", "SQL", "Python"], | |
| "education": ["MBA Finance", "Columbia Business School", "BA Economics", "Yale University"], | |
| "certifications": ["CFA Level III", "Financial Modeling Certificate"] | |
| } | |
| }, | |
| "Accountant": { | |
| "resume": """ | |
| Robert Martinez | robert.m@email.com | Dallas, TX | |
| CPA with 7 years of public and corporate accounting experience. | |
| EXPERIENCE | |
| Senior Accountant | Deloitte | Jan 2020 - Present | |
| - Led audit engagements for 8 Fortune 500 clients | |
| - Managed $50M+ in client receivables | |
| - Trained 5 staff accountants on GAAP procedures | |
| - Identified $2.3M in cost savings for clients | |
| Staff Accountant | PricewaterhouseCoopers | Sep 2017 - Dec 2019 | |
| - Prepared financial statements for 15+ clients | |
| - Performed SOX compliance testing | |
| - Processed 500+ journal entries monthly | |
| EDUCATION | |
| MS Accounting | University of Texas at Austin | 2017 | |
| BS Accounting | Texas A&M University | 2015 | |
| CERTIFICATIONS | |
| - CPA License #CPA456789 (Texas) | |
| - Certified Internal Auditor (CIA) | |
| SKILLS | |
| GAAP, IFRS, SAP, QuickBooks, Excel, Audit, Tax Preparation | |
| """, | |
| "jd": """ | |
| Senior Accountant | |
| - CPA required | |
| - 5+ years public accounting | |
| - GAAP and audit experience | |
| - Fortune 500 client experience preferred | |
| """, | |
| "facts_to_preserve": { | |
| "years_experience": ["7 years"], | |
| "companies": ["Deloitte", "PricewaterhouseCoopers"], | |
| "metrics": ["8", "$50M+", "5", "$2.3M", "15+", "500+"], | |
| "dates": ["Jan 2020", "Present", "Sep 2017", "Dec 2019", "2017", "2015"], | |
| "skills": ["GAAP", "IFRS", "SAP", "QuickBooks", "Excel", "Audit", "Tax Preparation"], | |
| "education": ["MS Accounting", "University of Texas at Austin", "BS Accounting", "Texas A&M University"], | |
| "certifications": ["CPA", "CPA456789", "CIA", "Certified Internal Auditor"] | |
| } | |
| }, | |
| # ==================== LEGAL ==================== | |
| "Attorney": { | |
| "resume": """ | |
| Amanda Thompson | amanda.t@email.com | Washington, DC | |
| Corporate Attorney with 9 years of M&A and securities law experience. | |
| EXPERIENCE | |
| Senior Associate | Skadden Arps | Mar 2019 - Present | |
| - Closed 12 M&A transactions totaling $8B | |
| - Advised Fortune 100 companies on SEC compliance | |
| - Led due diligence teams of 6-8 attorneys | |
| - Negotiated complex acquisition agreements | |
| Associate | Davis Polk | Sep 2015 - Feb 2019 | |
| - Drafted 100+ securities filings | |
| - Supported 20 IPO transactions | |
| - Conducted antitrust analysis for mergers | |
| EDUCATION | |
| JD | Harvard Law School | 2015 | |
| BA Political Science | Princeton University | 2012 | |
| BAR ADMISSIONS | |
| - New York Bar (2015) | |
| - DC Bar (2016) | |
| SKILLS | |
| M&A, Securities Law, Due Diligence, Contract Negotiation, SEC Filings | |
| """, | |
| "jd": """ | |
| Senior Corporate Associate | |
| - JD from top law school | |
| - 8+ years M&A experience | |
| - Bar admission required | |
| - SEC and securities experience | |
| """, | |
| "facts_to_preserve": { | |
| "years_experience": ["9 years"], | |
| "companies": ["Skadden Arps", "Davis Polk"], | |
| "metrics": ["12", "$8B", "6-8", "100+", "20"], | |
| "dates": ["Mar 2019", "Present", "Sep 2015", "Feb 2019", "2015", "2012", "2016"], | |
| "skills": ["M&A", "Securities Law", "Due Diligence", "Contract Negotiation", "SEC Filings"], | |
| "education": ["JD", "Harvard Law School", "BA Political Science", "Princeton University"], | |
| "certifications": ["New York Bar", "DC Bar"] | |
| } | |
| }, | |
| # ==================== MARKETING ==================== | |
| "Marketing Manager": { | |
| "resume": """ | |
| Lisa Wang | lisa.wang@email.com | San Francisco, CA | |
| Marketing Manager with 7 years of B2B SaaS marketing experience. | |
| EXPERIENCE | |
| Senior Marketing Manager | Salesforce | Jun 2021 - Present | |
| - Managed $3M annual marketing budget | |
| - Increased MQLs by 45% through demand generation | |
| - Led team of 4 marketing specialists | |
| - Launched 8 product campaigns with 150% ROI | |
| Marketing Manager | HubSpot | Apr 2018 - May 2021 | |
| - Grew email list from 50,000 to 200,000 subscribers | |
| - Achieved 25% increase in website traffic | |
| - Created content strategy generating 500 leads monthly | |
| EDUCATION | |
| MBA Marketing | UC Berkeley | 2018 | |
| BA Communications | USC | 2015 | |
| CERTIFICATIONS | |
| - Google Analytics Certified | |
| - HubSpot Inbound Marketing Certified | |
| SKILLS | |
| Demand Generation, Content Marketing, SEO, Paid Ads, Marketo, Salesforce | |
| """, | |
| "jd": """ | |
| Senior Marketing Manager | |
| - 5+ years B2B marketing | |
| - Demand generation experience | |
| - Team leadership | |
| - Marketing automation tools | |
| """, | |
| "facts_to_preserve": { | |
| "years_experience": ["7 years"], | |
| "companies": ["Salesforce", "HubSpot"], | |
| "metrics": ["$3M", "45%", "4", "8", "150%", "50,000", "200,000", "25%", "500"], | |
| "dates": ["Jun 2021", "Present", "Apr 2018", "May 2021", "2018", "2015"], | |
| "skills": ["Demand Generation", "Content Marketing", "SEO", "Paid Ads", "Marketo", "Salesforce"], | |
| "education": ["MBA Marketing", "UC Berkeley", "BA Communications", "USC"], | |
| "certifications": ["Google Analytics", "HubSpot Inbound Marketing"] | |
| } | |
| }, | |
| # ==================== HUMAN RESOURCES ==================== | |
| "HR Manager": { | |
| "resume": """ | |
| Michael Brown | michael.b@email.com | Atlanta, GA | |
| HR Manager with 8 years of talent acquisition and employee relations experience. | |
| EXPERIENCE | |
| HR Manager | Delta Air Lines | Sep 2020 - Present | |
| - Oversee HR operations for 500+ employees | |
| - Reduced turnover by 22% through engagement initiatives | |
| - Implemented new HRIS system serving 5,000 users | |
| - Led diversity hiring increasing representation by 35% | |
| HR Generalist | Coca-Cola Company | Jun 2016 - Aug 2020 | |
| - Recruited 150+ employees annually | |
| - Processed 200+ employee relations cases | |
| - Administered benefits for 2,000 employees | |
| EDUCATION | |
| MS Human Resource Management | Georgia State University | 2016 | |
| BS Psychology | University of Georgia | 2014 | |
| CERTIFICATIONS | |
| - SHRM-SCP (2021) | |
| - PHR Certified | |
| SKILLS | |
| Talent Acquisition, Employee Relations, HRIS, Benefits Administration, Workday | |
| """, | |
| "jd": """ | |
| HR Manager | |
| - 7+ years HR experience | |
| - SHRM certification preferred | |
| - Employee relations expertise | |
| - HRIS implementation experience | |
| """, | |
| "facts_to_preserve": { | |
| "years_experience": ["8 years"], | |
| "companies": ["Delta Air Lines", "Coca-Cola Company"], | |
| "metrics": ["500+", "22%", "5,000", "35%", "150+", "200+", "2,000"], | |
| "dates": ["Sep 2020", "Present", "Jun 2016", "Aug 2020", "2016", "2014", "2021"], | |
| "skills": ["Talent Acquisition", "Employee Relations", "HRIS", "Benefits Administration", "Workday"], | |
| "education": ["MS Human Resource Management", "Georgia State University", "BS Psychology", "University of Georgia"], | |
| "certifications": ["SHRM-SCP", "PHR"] | |
| } | |
| }, | |
| # ==================== EDUCATION ==================== | |
| "Teacher": { | |
| "resume": """ | |
| Jessica Adams | jessica.a@email.com | Denver, CO | |
| High School Math Teacher with 10 years of classroom experience. | |
| EXPERIENCE | |
| Math Teacher | Denver East High School | Aug 2018 - Present | |
| - Teach Algebra II, Pre-Calculus, and AP Calculus to 150 students | |
| - Improved AP exam pass rate from 65% to 88% | |
| - Sponsor Math Club with 25 student members | |
| - Mentor 3 student teachers annually | |
| Math Teacher | Aurora Central High School | Aug 2014 - Jul 2018 | |
| - Taught 5 classes of 30 students each | |
| - Developed curriculum for honors geometry | |
| - Achieved 95% student pass rate | |
| EDUCATION | |
| MA Education | University of Colorado | 2014 | |
| BS Mathematics | Colorado State University | 2012 | |
| CERTIFICATIONS | |
| - Colorado Teaching License #T123456 | |
| - AP Calculus Certified Teacher | |
| SKILLS | |
| Curriculum Development, Classroom Management, Differentiated Instruction, Google Classroom | |
| """, | |
| "jd": """ | |
| High School Math Teacher | |
| - Teaching license required | |
| - 5+ years teaching experience | |
| - AP teaching experience preferred | |
| - Strong classroom management | |
| """, | |
| "facts_to_preserve": { | |
| "years_experience": ["10 years"], | |
| "companies": ["Denver East High School", "Aurora Central High School"], | |
| "metrics": ["150", "65%", "88%", "25", "3", "5", "30", "95%"], | |
| "dates": ["Aug 2018", "Present", "Aug 2014", "Jul 2018", "2014", "2012"], | |
| "skills": ["Curriculum Development", "Classroom Management", "Differentiated Instruction", "Google Classroom"], | |
| "education": ["MA Education", "University of Colorado", "BS Mathematics", "Colorado State University"], | |
| "certifications": ["Colorado Teaching License", "T123456", "AP Calculus Certified"] | |
| } | |
| }, | |
| # ==================== ENGINEERING ==================== | |
| "Mechanical Engineer": { | |
| "resume": """ | |
| Chris Taylor | chris.t@email.com | Detroit, MI | |
| Mechanical Engineer with 6 years of automotive design experience. | |
| EXPERIENCE | |
| Senior Mechanical Engineer | Ford Motor Company | Mar 2021 - Present | |
| - Designed powertrain components for 3 vehicle platforms | |
| - Reduced manufacturing costs by $1.2M annually | |
| - Led team of 5 engineers on EV battery enclosure project | |
| - Filed 2 patents for thermal management systems | |
| Mechanical Engineer | General Motors | Jul 2018 - Feb 2021 | |
| - Performed FEA analysis on 50+ component designs | |
| - Improved fuel efficiency by 8% through aerodynamic optimization | |
| - Managed $500K prototype budget | |
| EDUCATION | |
| MS Mechanical Engineering | University of Michigan | 2018 | |
| BS Mechanical Engineering | Purdue University | 2016 | |
| CERTIFICATIONS | |
| - Professional Engineer (PE) #PE789012 (Michigan) | |
| - Six Sigma Green Belt | |
| SKILLS | |
| SolidWorks, CATIA, ANSYS, FEA, CFD, GD&T, DFMEA | |
| """, | |
| "jd": """ | |
| Senior Mechanical Engineer - Automotive | |
| - 5+ years automotive experience | |
| - CAD proficiency (SolidWorks, CATIA) | |
| - FEA analysis experience | |
| - PE license preferred | |
| """, | |
| "facts_to_preserve": { | |
| "years_experience": ["6 years"], | |
| "companies": ["Ford Motor Company", "General Motors"], | |
| "metrics": ["3", "$1.2M", "5", "2 patents", "50+", "8%", "$500K"], | |
| "dates": ["Mar 2021", "Present", "Jul 2018", "Feb 2021", "2018", "2016"], | |
| "skills": ["SolidWorks", "CATIA", "ANSYS", "FEA", "CFD", "GD&T", "DFMEA"], | |
| "education": ["MS Mechanical Engineering", "University of Michigan", "BS Mechanical Engineering", "Purdue University"], | |
| "certifications": ["PE", "PE789012", "Professional Engineer", "Six Sigma Green Belt"] | |
| } | |
| }, | |
| # ==================== SALES ==================== | |
| "Sales Manager": { | |
| "resume": """ | |
| Brian Wilson | brian.w@email.com | Austin, TX | |
| Sales Manager with 9 years of enterprise software sales experience. | |
| EXPERIENCE | |
| Regional Sales Manager | Oracle | Jan 2020 - Present | |
| - Manage territory with $25M annual quota | |
| - Exceeded quota by 115% in 2023 | |
| - Lead team of 8 account executives | |
| - Closed 3 deals worth $5M+ each | |
| - Expanded customer base by 40% | |
| Account Executive | SAP | Jun 2015 - Dec 2019 | |
| - Achieved 120% of quota for 4 consecutive years | |
| - Managed 50+ enterprise accounts | |
| - Generated $15M in new business | |
| EDUCATION | |
| BS Business Administration | University of Texas at Austin | 2015 | |
| CERTIFICATIONS | |
| - Salesforce Certified Administrator | |
| - Oracle Cloud Certified | |
| SKILLS | |
| Enterprise Sales, Account Management, Salesforce, Contract Negotiation, Team Leadership | |
| """, | |
| "jd": """ | |
| Regional Sales Manager | |
| - 8+ years B2B sales experience | |
| - Enterprise software background | |
| - Quota achievement track record | |
| - Team management experience | |
| """, | |
| "facts_to_preserve": { | |
| "years_experience": ["9 years"], | |
| "companies": ["Oracle", "SAP"], | |
| "metrics": ["$25M", "115%", "8", "3 deals", "$5M+", "40%", "120%", "4 consecutive years", "50+", "$15M"], | |
| "dates": ["Jan 2020", "Present", "Jun 2015", "Dec 2019", "2015", "2023"], | |
| "skills": ["Enterprise Sales", "Account Management", "Salesforce", "Contract Negotiation", "Team Leadership"], | |
| "education": ["BS Business Administration", "University of Texas at Austin"], | |
| "certifications": ["Salesforce Certified Administrator", "Oracle Cloud Certified"] | |
| } | |
| }, | |
| } | |
| # ==================== HALLUCINATION DETECTION ==================== | |
| def extract_numbers(text: str) -> Set[str]: | |
| """Extract all numbers, percentages, and monetary values from text.""" | |
| # Find all numbers with context | |
| patterns = [ | |
| r'\$[\d,]+(?:\.\d+)?[MBK]?', # Money | |
| r'\d+(?:\.\d+)?%', # Percentages | |
| r'\d{1,3}(?:,\d{3})+', # Large numbers with commas | |
| r'\b\d+\+?\b', # Simple numbers | |
| ] | |
| numbers = set() | |
| for pattern in patterns: | |
| matches = re.findall(pattern, text) | |
| numbers.update(matches) | |
| return numbers | |
| def extract_dates(text: str) -> Set[str]: | |
| """Extract all dates from text.""" | |
| patterns = [ | |
| r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s*\d{4}\b', | |
| r'\b\d{4}\b', | |
| r'\bPresent\b', | |
| ] | |
| dates = set() | |
| for pattern in patterns: | |
| matches = re.findall(pattern, text, re.IGNORECASE) | |
| dates.update(m.lower() for m in matches) | |
| return dates | |
| def extract_proper_nouns(text: str) -> Set[str]: | |
| """Extract company names, universities, etc.""" | |
| # Look for capitalized multi-word phrases | |
| patterns = [ | |
| r'[A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)+', # Multi-word proper nouns | |
| ] | |
| nouns = set() | |
| for pattern in patterns: | |
| matches = re.findall(pattern, text) | |
| nouns.update(matches) | |
| return nouns | |
| def check_fact_preservation(original: str, optimized: str, facts: Dict) -> Dict: | |
| """Check if key facts are preserved in optimized resume.""" | |
| issues = [] | |
| preserved = [] | |
| optimized_lower = optimized.lower() | |
| original_lower = original.lower() | |
| # Check each category of facts | |
| for category, items in facts.items(): | |
| for item in items: | |
| item_lower = item.lower() | |
| # Check if fact exists in optimized version | |
| if item_lower in optimized_lower: | |
| preserved.append(f"{category}: {item}") | |
| else: | |
| # Try fuzzy match for slight rewording | |
| words = item_lower.split() | |
| if len(words) > 1: | |
| # Check if most words are present | |
| matches = sum(1 for w in words if w in optimized_lower) | |
| if matches >= len(words) * 0.7: | |
| preserved.append(f"{category}: {item} (reworded)") | |
| else: | |
| issues.append(f"MISSING {category}: '{item}'") | |
| else: | |
| issues.append(f"MISSING {category}: '{item}'") | |
| return { | |
| "preserved": preserved, | |
| "issues": issues, | |
| "preservation_rate": len(preserved) / (len(preserved) + len(issues)) if preserved or issues else 1.0 | |
| } | |
| def check_for_hallucinations(original: str, optimized: str, domain: str) -> Dict: | |
| """Check if new facts were hallucinated.""" | |
| hallucinations = [] | |
| original_lower = original.lower() | |
| optimized_lower = optimized.lower() | |
| # Extract numbers from both - be more careful with monetary values | |
| def extract_numbers_carefully(text): | |
| numbers = set() | |
| # Money with suffixes | |
| for m in re.findall(r'\$[\d,]+(?:\.\d+)?\s*(?:million|billion|M|B|K)?', text, re.IGNORECASE): | |
| numbers.add(m.lower().replace(' ', '')) | |
| # Percentages | |
| for m in re.findall(r'\d+(?:\.\d+)?%', text): | |
| numbers.add(m) | |
| # Large numbers with + suffix | |
| for m in re.findall(r'\d+\+', text): | |
| numbers.add(m) | |
| return numbers | |
| original_numbers = extract_numbers_carefully(original) | |
| optimized_numbers = extract_numbers_carefully(optimized) | |
| # Find new numbers that weren't in original (but ignore small numbers and reformatting) | |
| for num in optimized_numbers: | |
| # Check if this number or a variant exists in original | |
| num_clean = num.replace(',', '').replace('$', '').replace('%', '') | |
| found = False | |
| for orig_num in original_numbers: | |
| orig_clean = orig_num.replace(',', '').replace('$', '').replace('%', '') | |
| if num_clean in orig_clean or orig_clean in num_clean: | |
| found = True | |
| break | |
| if not found and len(num) > 2: # Ignore very short numbers | |
| hallucinations.append(f"NEW NUMBER: {num}") | |
| # Check for hallucinated skills - only flag clearly unrelated skills | |
| # Skills that should ONLY appear if original resume mentions related tech | |
| skill_requirements = { | |
| 'kubernetes': ['docker', 'container', 'devops', 'cloud', 'aws', 'gcp', 'azure'], | |
| 'terraform': ['infrastructure', 'cloud', 'aws', 'devops', 'iac'], | |
| 'pytorch': ['machine learning', 'ml', 'deep learning', 'ai', 'neural', 'tensorflow'], | |
| 'tensorflow': ['machine learning', 'ml', 'deep learning', 'ai', 'neural', 'pytorch'], | |
| 'scala': ['java', 'spark', 'big data', 'jvm', 'functional'], | |
| 'golang': ['backend', 'microservices', 'distributed', 'cloud'], | |
| 'rust': ['systems', 'performance', 'low-level', 'c++'], | |
| 'blockchain': ['crypto', 'web3', 'smart contract', 'ethereum'], | |
| } | |
| for skill, prereqs in skill_requirements.items(): | |
| if skill in optimized_lower and skill not in original_lower: | |
| # Check if any prerequisite skill exists | |
| has_prereq = any(prereq in original_lower for prereq in prereqs) | |
| if not has_prereq: | |
| hallucinations.append(f"HALLUCINATED SKILL: {skill}") | |
| # Check for hallucinated certifications - these are serious | |
| common_certs = [ | |
| ('aws certified', ['aws', 'amazon web services', 'cloud']), | |
| ('azure certified', ['azure', 'microsoft cloud']), | |
| ('gcp certified', ['gcp', 'google cloud']), | |
| ('pmp', ['project manage', 'pm ']), | |
| ('cissp', ['security', 'infosec', 'cybersecurity']), | |
| ('cfa', ['finance', 'investment', 'portfolio']), | |
| ('cpa', ['accounting', 'audit', 'tax']), | |
| ] | |
| for cert, prereqs in common_certs: | |
| if cert in optimized_lower and cert not in original_lower: | |
| has_prereq = any(prereq in original_lower for prereq in prereqs) | |
| if not has_prereq: | |
| hallucinations.append(f"HALLUCINATED CERT: {cert}") | |
| # Check for hallucinated degrees - very serious | |
| degree_patterns = [ | |
| (r'\bphd\b|\bph\.?d\.?\b', 'PhD'), | |
| (r'\bmba\b', 'MBA'), | |
| (r'\bmd\b', 'MD'), | |
| (r'\bjd\b', 'JD'), | |
| ] | |
| for pattern, name in degree_patterns: | |
| orig_count = len(re.findall(pattern, original_lower)) | |
| opt_count = len(re.findall(pattern, optimized_lower)) | |
| if opt_count > orig_count: | |
| hallucinations.append(f"HALLUCINATED DEGREE: {name}") | |
| return { | |
| "hallucinations": hallucinations, | |
| "hallucination_count": len(hallucinations) | |
| } | |
| def run_domain_test(domain: str, test_case: Dict) -> Dict: | |
| """Run a single domain test.""" | |
| resume = test_case["resume"] | |
| jd = test_case["jd"] | |
| facts = test_case["facts_to_preserve"] | |
| print(f"\n{'='*60}") | |
| print(f"Testing: {domain}") | |
| print(f"{'='*60}") | |
| # Get original ATS score | |
| analyzer = ATSCompatibilityAnalyzer() | |
| original_result = analyzer.analyze(resume, jd) | |
| original_score = original_result['total_score'] | |
| print(f"Original ATS Score: {original_score}%") | |
| # Run optimization | |
| try: | |
| result_tuple = optimize_with_llm(resume, jd) | |
| optimized = result_tuple[0] if isinstance(result_tuple, tuple) else result_tuple | |
| if optimized.startswith("❌ ERROR"): | |
| return { | |
| "domain": domain, | |
| "status": "SKIPPED", | |
| "reason": "Optimization blocked (expected for short resumes)", | |
| "original_score": original_score | |
| } | |
| # Get optimized ATS score | |
| optimized_result = analyzer.analyze(optimized, jd) | |
| optimized_score = optimized_result['total_score'] | |
| print(f"Optimized ATS Score: {optimized_score}%") | |
| print(f"Score Change: {optimized_score - original_score:+d}%") | |
| # Check fact preservation | |
| preservation = check_fact_preservation(resume, optimized, facts) | |
| print(f"\nFact Preservation Rate: {preservation['preservation_rate']*100:.1f}%") | |
| if preservation['issues']: | |
| print(f"⚠️ Missing Facts ({len(preservation['issues'])}):") | |
| for issue in preservation['issues'][:5]: # Show first 5 | |
| print(f" - {issue}") | |
| # Check for hallucinations | |
| hallucination_check = check_for_hallucinations(resume, optimized, domain) | |
| if hallucination_check['hallucinations']: | |
| print(f"🚨 Hallucinations Detected ({hallucination_check['hallucination_count']}):") | |
| for h in hallucination_check['hallucinations'][:5]: | |
| print(f" - {h}") | |
| else: | |
| print("✅ No hallucinations detected") | |
| # Determine pass/fail | |
| passed = ( | |
| preservation['preservation_rate'] >= 0.8 and # At least 80% facts preserved | |
| hallucination_check['hallucination_count'] <= 2 and # Allow minor issues | |
| optimized_score >= original_score # Score should not decrease | |
| ) | |
| return { | |
| "domain": domain, | |
| "status": "PASS" if passed else "FAIL", | |
| "original_score": original_score, | |
| "optimized_score": optimized_score, | |
| "score_change": optimized_score - original_score, | |
| "preservation_rate": preservation['preservation_rate'], | |
| "missing_facts": preservation['issues'], | |
| "hallucinations": hallucination_check['hallucinations'], | |
| "passed": passed | |
| } | |
| except Exception as e: | |
| print(f"❌ Error: {str(e)}") | |
| return { | |
| "domain": domain, | |
| "status": "ERROR", | |
| "error": str(e), | |
| "original_score": original_score | |
| } | |
| def main(): | |
| """Run all domain tests.""" | |
| print("="*70) | |
| print("COMPREHENSIVE HALLUCINATION TESTING SUITE") | |
| print("="*70) | |
| print(f"Testing {len(DOMAIN_TEST_CASES)} domains for fact preservation and hallucination detection") | |
| print(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
| results = [] | |
| passed = 0 | |
| failed = 0 | |
| skipped = 0 | |
| errors = 0 | |
| for domain, test_case in DOMAIN_TEST_CASES.items(): | |
| result = run_domain_test(domain, test_case) | |
| results.append(result) | |
| if result['status'] == 'PASS': | |
| passed += 1 | |
| elif result['status'] == 'FAIL': | |
| failed += 1 | |
| elif result['status'] == 'SKIPPED': | |
| skipped += 1 | |
| else: | |
| errors += 1 | |
| # Print summary | |
| print("\n" + "="*70) | |
| print("SUMMARY") | |
| print("="*70) | |
| print(f"✅ Passed: {passed}/{len(DOMAIN_TEST_CASES)}") | |
| print(f"❌ Failed: {failed}/{len(DOMAIN_TEST_CASES)}") | |
| print(f"⏭️ Skipped: {skipped}/{len(DOMAIN_TEST_CASES)}") | |
| print(f"⚠️ Errors: {errors}/{len(DOMAIN_TEST_CASES)}") | |
| # Show failed tests | |
| if failed > 0: | |
| print("\n" + "-"*70) | |
| print("FAILED TESTS:") | |
| print("-"*70) | |
| for r in results: | |
| if r['status'] == 'FAIL': | |
| print(f"\n{r['domain']}:") | |
| print(f" Preservation Rate: {r['preservation_rate']*100:.1f}%") | |
| if r['missing_facts']: | |
| print(f" Missing Facts: {len(r['missing_facts'])}") | |
| if r['hallucinations']: | |
| print(f" Hallucinations: {r['hallucinations']}") | |
| # Calculate overall metrics | |
| all_preservation_rates = [r['preservation_rate'] for r in results if 'preservation_rate' in r] | |
| all_hallucination_counts = [len(r.get('hallucinations', [])) for r in results] | |
| if all_preservation_rates: | |
| print(f"\nAverage Fact Preservation Rate: {sum(all_preservation_rates)/len(all_preservation_rates)*100:.1f}%") | |
| print(f"Total Hallucinations Detected: {sum(all_hallucination_counts)}") | |
| # Save results | |
| timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') | |
| results_file = f"hallucination_results_{timestamp}.json" | |
| with open(results_file, 'w') as f: | |
| json.dump({ | |
| "summary": { | |
| "passed": passed, | |
| "failed": failed, | |
| "skipped": skipped, | |
| "errors": errors, | |
| "total": len(DOMAIN_TEST_CASES) | |
| }, | |
| "results": results | |
| }, f, indent=2) | |
| print(f"\nResults saved to: {results_file}") | |
| # Return exit code | |
| return 0 if failed == 0 and errors == 0 else 1 | |
| if __name__ == "__main__": | |
| exit(main()) | |