Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """Debug: Why does Chef resume get 60% keyword_match for ML role?""" | |
| from app import ATSCompatibilityAnalyzer | |
| import re | |
| from collections import Counter | |
| import math | |
| analyzer = ATSCompatibilityAnalyzer() | |
| chef_resume = """ | |
| Chef John Smith | |
| Executive Chef | Le Restaurant | 2015-2023 | |
| β’ Created award-winning French cuisine menus | |
| β’ Managed kitchen staff of 20 | |
| β’ Sourced local organic ingredients | |
| Skills: French cuisine, pastry, wine pairing | |
| Education: Culinary Institute of America | |
| """ | |
| ml_jd = "Machine Learning Engineer with PhD, PyTorch, TensorFlow experience" | |
| print("=" * 60) | |
| print("DEBUG: Keyword Match Score Calculation") | |
| print("=" * 60) | |
| # Replicate the calculation | |
| resume_lower = chef_resume.lower() | |
| jd_lower = ml_jd.lower() | |
| # Get JD words | |
| jd_words = re.findall(r'\b[a-zA-Z]{2,}\b', jd_lower) | |
| jd_words = [analyzer._stem_word(w) for w in jd_words if w not in analyzer.stop_words] | |
| print(f"\nJD Keywords (stemmed): {jd_words}") | |
| # Get resume words | |
| resume_words = re.findall(r'\b[a-zA-Z]{2,}\b', resume_lower) | |
| resume_words = [analyzer._stem_word(w) for w in resume_words if w not in analyzer.stop_words] | |
| print(f"\nResume Keywords (stemmed): {resume_words}") | |
| # Calculate what matches | |
| jd_tf = Counter(jd_words) | |
| max_count = max(jd_tf.values()) if jd_tf else 1 | |
| jd_weights = {word: 1 + math.log(max_count / count) for word, count in jd_tf.items()} | |
| print(f"\nJD Weights: {jd_weights}") | |
| resume_stems = set(resume_words) | |
| resume_raw = set(resume_lower.split()) | |
| print(f"\nResume stems: {resume_stems}") | |
| print(f"\nChecking matches...") | |
| for word, weight in jd_weights.items(): | |
| matched = False | |
| match_type = "NO MATCH" | |
| match_value = 0 | |
| # Check direct match | |
| if word in resume_stems: | |
| matched = True | |
| match_type = "DIRECT" | |
| match_value = weight | |
| # Check raw word in resume text | |
| elif word in resume_lower: | |
| matched = True | |
| match_type = "IN TEXT" | |
| match_value = weight | |
| # Check containment | |
| elif any(word in rw or rw in word for rw in resume_stems if len(word) > 3 and len(rw) > 3): | |
| matched = True | |
| match_type = "CONTAINMENT" | |
| match_value = weight * 0.95 | |
| # Show which word matched | |
| for rw in resume_stems: | |
| if len(word) > 3 and len(rw) > 3 and (word in rw or rw in word): | |
| print(f" β Containment match: '{word}' with '{rw}'") | |
| # Check 4-char prefix | |
| elif any(word[:4] in rw for rw in resume_raw if len(word) >= 4 and len(rw) >= 4): | |
| matched = True | |
| match_type = "4-CHAR PREFIX" | |
| match_value = weight * 0.75 | |
| for rw in resume_raw: | |
| if len(word) >= 4 and len(rw) >= 4 and word[:4] in rw: | |
| print(f" β Prefix match: '{word}' -> '{word[:4]}' in '{rw}'") | |
| # Check 3-char prefix | |
| elif any(word[:3] == rw[:3] for rw in resume_stems if len(word) >= 3 and len(rw) >= 3): | |
| matched = True | |
| match_type = "3-CHAR PREFIX" | |
| match_value = weight * 0.5 | |
| for rw in resume_stems: | |
| if len(word) >= 3 and len(rw) >= 3 and word[:3] == rw[:3]: | |
| print(f" β 3-char prefix match: '{word}' -> '{word[:3]}' == '{rw[:3]}' ('{rw}')") | |
| status = "β " if matched else "β" | |
| print(f"{status} '{word}' (weight={weight:.2f}) -> {match_type} ({match_value:.2f})") | |
| # Get actual score | |
| score = analyzer._calculate_tfidf_score(chef_resume, ml_jd) | |
| print(f"\nπ FINAL SCORE: {score}%") | |