Spaces:

sammy786
/

ats-resume-optimizer

Sleeping

ats-resume-optimizer / debug_taxonomy.py

Salim Shaikh

Add comprehensive fraud/gaming detection - 25/25 edge cases pass

3038a81 about 1 month ago

4.54 kB

	#!/usr/bin/env python3
	"""Debug: Find where the 60% comes from - check taxonomy expansion"""

	from app import ATSCompatibilityAnalyzer
	import re
	from collections import Counter
	import math

	analyzer = ATSCompatibilityAnalyzer()

	chef_resume = """
	Chef John Smith
	Executive Chef \| Le Restaurant \| 2015-2023
	• Created award-winning French cuisine menus
	• Managed kitchen staff of 20
	• Sourced local organic ingredients
	Skills: French cuisine, pastry, wine pairing
	Education: Culinary Institute of America
	"""

	ml_jd = "Machine Learning Engineer with PhD, PyTorch, TensorFlow experience"

	resume_lower = chef_resume.lower()
	jd_lower = ml_jd.lower()

	# Get JD words
	jd_words = re.findall(r'\b[a-zA-Z]{2,}\b', jd_lower)
	jd_words = [analyzer._stem_word(w) for w in jd_words if w not in analyzer.stop_words]
	print(f"JD Keywords (stemmed): {jd_words}")

	# Get resume words
	resume_words = re.findall(r'\b[a-zA-Z]{2,}\b', resume_lower)
	resume_words_filtered = [analyzer._stem_word(w) for w in resume_words if w not in analyzer.stop_words]

	# EXPAND with taxonomy
	resume_expanded = analyzer._expand_with_taxonomy(resume_words_filtered)
	resume_stems = {analyzer._stem_word(w) for w in resume_expanded}
	resume_raw = set(resume_lower.split())

	print(f"\nResume words (before expansion): {len(resume_words_filtered)}")
	print(f"Resume words (after expansion): {len(resume_expanded)}")
	print(f"\nExpanded stems: {resume_stems}")

	# Check each JD word
	jd_tf = Counter(jd_words)
	max_count = max(jd_tf.values()) if jd_tf else 1
	jd_weights = {word: 1 + math.log(max_count / count) for word, count in jd_tf.items()}

	print(f"\n{'='*60}")
	print("CHECKING EACH JD KEYWORD:")
	print('='*60)

	weighted_matches = 0
	total_weight = 0

	for word, weight in jd_weights.items():
	total_weight += weight
	matched = False
	match_reason = ""
	match_value = 0

	# Check direct match in expanded stems
	if word in resume_stems:
	matched = True
	match_reason = f"DIRECT in stems"
	match_value = weight
	# Check raw word in resume text
	elif word in resume_lower:
	matched = True
	match_reason = f"IN resume text"
	match_value = weight
	# Check containment
	elif any(word in rw or rw in word for rw in resume_stems if len(word) > 3 and len(rw) > 3):
	matched = True
	match_reason = f"CONTAINMENT"
	match_value = weight * 0.95
	for rw in resume_stems:
	if len(word) > 3 and len(rw) > 3 and (word in rw or rw in word):
	match_reason = f"CONTAINMENT: '{word}' in/contains '{rw}'"
	break
	# Check fuzzy
	elif any(analyzer._fuzzy_match(word, rw, 0.65) for rw in resume_stems):
	matched = True
	match_reason = f"FUZZY"
	match_value = weight * 0.85
	for rw in resume_stems:
	if analyzer._fuzzy_match(word, rw, 0.65):
	match_reason = f"FUZZY: '{word}' ~ '{rw}'"
	break
	# Check 4-char prefix
	elif any(word[:4] in rw for rw in resume_raw if len(word) >= 4 and len(rw) >= 4):
	matched = True
	match_reason = f"4-CHAR PREFIX"
	match_value = weight * 0.75
	# Check 3-char prefix
	elif any(word[:3] == rw[:3] for rw in resume_stems if len(word) >= 3 and len(rw) >= 3):
	matched = True
	match_reason = f"3-CHAR PREFIX"
	match_value = weight * 0.5
	for rw in resume_stems:
	if len(word) >= 3 and len(rw) >= 3 and word[:3] == rw[:3]:
	match_reason = f"3-CHAR PREFIX: '{word}' -> '{rw}'"
	break

	if matched:
	weighted_matches += match_value
	print(f"✅ '{word}' -> {match_reason} = {match_value:.2f}")
	else:
	print(f"❌ '{word}' -> NO MATCH")

	print(f"\n{'='*60}")
	print(f"Total weight: {total_weight}")
	print(f"Weighted matches: {weighted_matches}")
	print(f"Raw score: {(weighted_matches / total_weight) * 100:.1f}%")

	# Check the floor logic
	resume_words_count = len(resume_words_filtered)
	raw_score = (weighted_matches / total_weight) * 100
	if raw_score < 10 and resume_words_count > 20:
	adjusted = 10 + (raw_score * 0.5)
	print(f"\nFloor logic applied (resume has {resume_words_count} words):")
	print(f" raw_score ({raw_score:.1f}) < 10 AND resume has > 20 words")
	print(f" Adjusted: 10 + ({raw_score:.1f} * 0.5) = {adjusted:.1f}%")
	raw_score = adjusted

	print(f"\n📊 EXPECTED FINAL SCORE: {raw_score:.1f}%")
	print(f"📊 ACTUAL SCORE FROM FUNCTION: {analyzer._calculate_tfidf_score(chef_resume, ml_jd)}%")