#!/usr/bin/env python3 """Debug: Why does Chef resume get 60% keyword_match for ML role?""" from app import ATSCompatibilityAnalyzer import re from collections import Counter import math analyzer = ATSCompatibilityAnalyzer() chef_resume = """ Chef John Smith Executive Chef | Le Restaurant | 2015-2023 • Created award-winning French cuisine menus • Managed kitchen staff of 20 • Sourced local organic ingredients Skills: French cuisine, pastry, wine pairing Education: Culinary Institute of America """ ml_jd = "Machine Learning Engineer with PhD, PyTorch, TensorFlow experience" print("=" * 60) print("DEBUG: Keyword Match Score Calculation") print("=" * 60) # Replicate the calculation resume_lower = chef_resume.lower() jd_lower = ml_jd.lower() # Get JD words jd_words = re.findall(r'\b[a-zA-Z]{2,}\b', jd_lower) jd_words = [analyzer._stem_word(w) for w in jd_words if w not in analyzer.stop_words] print(f"\nJD Keywords (stemmed): {jd_words}") # Get resume words resume_words = re.findall(r'\b[a-zA-Z]{2,}\b', resume_lower) resume_words = [analyzer._stem_word(w) for w in resume_words if w not in analyzer.stop_words] print(f"\nResume Keywords (stemmed): {resume_words}") # Calculate what matches jd_tf = Counter(jd_words) max_count = max(jd_tf.values()) if jd_tf else 1 jd_weights = {word: 1 + math.log(max_count / count) for word, count in jd_tf.items()} print(f"\nJD Weights: {jd_weights}") resume_stems = set(resume_words) resume_raw = set(resume_lower.split()) print(f"\nResume stems: {resume_stems}") print(f"\nChecking matches...") for word, weight in jd_weights.items(): matched = False match_type = "NO MATCH" match_value = 0 # Check direct match if word in resume_stems: matched = True match_type = "DIRECT" match_value = weight # Check raw word in resume text elif word in resume_lower: matched = True match_type = "IN TEXT" match_value = weight # Check containment elif any(word in rw or rw in word for rw in resume_stems if len(word) > 3 and len(rw) > 3): matched = True match_type = "CONTAINMENT" match_value = weight * 0.95 # Show which word matched for rw in resume_stems: if len(word) > 3 and len(rw) > 3 and (word in rw or rw in word): print(f" → Containment match: '{word}' with '{rw}'") # Check 4-char prefix elif any(word[:4] in rw for rw in resume_raw if len(word) >= 4 and len(rw) >= 4): matched = True match_type = "4-CHAR PREFIX" match_value = weight * 0.75 for rw in resume_raw: if len(word) >= 4 and len(rw) >= 4 and word[:4] in rw: print(f" → Prefix match: '{word}' -> '{word[:4]}' in '{rw}'") # Check 3-char prefix elif any(word[:3] == rw[:3] for rw in resume_stems if len(word) >= 3 and len(rw) >= 3): matched = True match_type = "3-CHAR PREFIX" match_value = weight * 0.5 for rw in resume_stems: if len(word) >= 3 and len(rw) >= 3 and word[:3] == rw[:3]: print(f" → 3-char prefix match: '{word}' -> '{word[:3]}' == '{rw[:3]}' ('{rw}')") status = "✅" if matched else "❌" print(f"{status} '{word}' (weight={weight:.2f}) -> {match_type} ({match_value:.2f})") # Get actual score score = analyzer._calculate_tfidf_score(chef_resume, ml_jd) print(f"\n📊 FINAL SCORE: {score}%")