Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """Debug: Find where the 60% comes from - check taxonomy expansion""" | |
| from app import ATSCompatibilityAnalyzer | |
| import re | |
| from collections import Counter | |
| import math | |
| analyzer = ATSCompatibilityAnalyzer() | |
| chef_resume = """ | |
| Chef John Smith | |
| Executive Chef | Le Restaurant | 2015-2023 | |
| β’ Created award-winning French cuisine menus | |
| β’ Managed kitchen staff of 20 | |
| β’ Sourced local organic ingredients | |
| Skills: French cuisine, pastry, wine pairing | |
| Education: Culinary Institute of America | |
| """ | |
| ml_jd = "Machine Learning Engineer with PhD, PyTorch, TensorFlow experience" | |
| resume_lower = chef_resume.lower() | |
| jd_lower = ml_jd.lower() | |
| # Get JD words | |
| jd_words = re.findall(r'\b[a-zA-Z]{2,}\b', jd_lower) | |
| jd_words = [analyzer._stem_word(w) for w in jd_words if w not in analyzer.stop_words] | |
| print(f"JD Keywords (stemmed): {jd_words}") | |
| # Get resume words | |
| resume_words = re.findall(r'\b[a-zA-Z]{2,}\b', resume_lower) | |
| resume_words_filtered = [analyzer._stem_word(w) for w in resume_words if w not in analyzer.stop_words] | |
| # EXPAND with taxonomy | |
| resume_expanded = analyzer._expand_with_taxonomy(resume_words_filtered) | |
| resume_stems = {analyzer._stem_word(w) for w in resume_expanded} | |
| resume_raw = set(resume_lower.split()) | |
| print(f"\nResume words (before expansion): {len(resume_words_filtered)}") | |
| print(f"Resume words (after expansion): {len(resume_expanded)}") | |
| print(f"\nExpanded stems: {resume_stems}") | |
| # Check each JD word | |
| jd_tf = Counter(jd_words) | |
| max_count = max(jd_tf.values()) if jd_tf else 1 | |
| jd_weights = {word: 1 + math.log(max_count / count) for word, count in jd_tf.items()} | |
| print(f"\n{'='*60}") | |
| print("CHECKING EACH JD KEYWORD:") | |
| print('='*60) | |
| weighted_matches = 0 | |
| total_weight = 0 | |
| for word, weight in jd_weights.items(): | |
| total_weight += weight | |
| matched = False | |
| match_reason = "" | |
| match_value = 0 | |
| # Check direct match in expanded stems | |
| if word in resume_stems: | |
| matched = True | |
| match_reason = f"DIRECT in stems" | |
| match_value = weight | |
| # Check raw word in resume text | |
| elif word in resume_lower: | |
| matched = True | |
| match_reason = f"IN resume text" | |
| match_value = weight | |
| # Check containment | |
| elif any(word in rw or rw in word for rw in resume_stems if len(word) > 3 and len(rw) > 3): | |
| matched = True | |
| match_reason = f"CONTAINMENT" | |
| match_value = weight * 0.95 | |
| for rw in resume_stems: | |
| if len(word) > 3 and len(rw) > 3 and (word in rw or rw in word): | |
| match_reason = f"CONTAINMENT: '{word}' in/contains '{rw}'" | |
| break | |
| # Check fuzzy | |
| elif any(analyzer._fuzzy_match(word, rw, 0.65) for rw in resume_stems): | |
| matched = True | |
| match_reason = f"FUZZY" | |
| match_value = weight * 0.85 | |
| for rw in resume_stems: | |
| if analyzer._fuzzy_match(word, rw, 0.65): | |
| match_reason = f"FUZZY: '{word}' ~ '{rw}'" | |
| break | |
| # Check 4-char prefix | |
| elif any(word[:4] in rw for rw in resume_raw if len(word) >= 4 and len(rw) >= 4): | |
| matched = True | |
| match_reason = f"4-CHAR PREFIX" | |
| match_value = weight * 0.75 | |
| # Check 3-char prefix | |
| elif any(word[:3] == rw[:3] for rw in resume_stems if len(word) >= 3 and len(rw) >= 3): | |
| matched = True | |
| match_reason = f"3-CHAR PREFIX" | |
| match_value = weight * 0.5 | |
| for rw in resume_stems: | |
| if len(word) >= 3 and len(rw) >= 3 and word[:3] == rw[:3]: | |
| match_reason = f"3-CHAR PREFIX: '{word}' -> '{rw}'" | |
| break | |
| if matched: | |
| weighted_matches += match_value | |
| print(f"β '{word}' -> {match_reason} = {match_value:.2f}") | |
| else: | |
| print(f"β '{word}' -> NO MATCH") | |
| print(f"\n{'='*60}") | |
| print(f"Total weight: {total_weight}") | |
| print(f"Weighted matches: {weighted_matches}") | |
| print(f"Raw score: {(weighted_matches / total_weight) * 100:.1f}%") | |
| # Check the floor logic | |
| resume_words_count = len(resume_words_filtered) | |
| raw_score = (weighted_matches / total_weight) * 100 | |
| if raw_score < 10 and resume_words_count > 20: | |
| adjusted = 10 + (raw_score * 0.5) | |
| print(f"\nFloor logic applied (resume has {resume_words_count} words):") | |
| print(f" raw_score ({raw_score:.1f}) < 10 AND resume has > 20 words") | |
| print(f" Adjusted: 10 + ({raw_score:.1f} * 0.5) = {adjusted:.1f}%") | |
| raw_score = adjusted | |
| print(f"\nπ EXPECTED FINAL SCORE: {raw_score:.1f}%") | |
| print(f"π ACTUAL SCORE FROM FUNCTION: {analyzer._calculate_tfidf_score(chef_resume, ml_jd)}%") | |