Spaces:

Mangesh223
/

DefendModel

Sleeping

App Files Files Community

Mangesh223 commited on Mar 27, 2025

Commit

9c66cce

verified ·

1 Parent(s): c9c405f

Update app.py

Browse files

Files changed (1) hide show

app.py +125 -66

app.py CHANGED Viewed

@@ -14,9 +14,23 @@ login(token=os.getenv("HF_TOKEN"))
 # Precompiled regex patterns
 YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})')
-ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved)\s+by\s+(\d+%|\$\d+)', re.I)
 TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
-SECTION_PATTERN = re.compile(r'^(experience|skills|education|projects|achievements)\s*:?', re.I | re.M)
 def extract_text_from_pdf(pdf_file):
     """Extract text from PDF with detailed error handling"""
@@ -40,7 +54,7 @@ def extract_text_from_pdf(pdf_file):
         if not text.strip():
             raise ValueError("No text extracted from PDF (possibly image-based or empty)")
-        return text[:10000]  # Limit to first 10,000 characters
     except PyPDF2.errors.PdfReadError as e:
         raise Exception(f"PDF read error: {str(e)}")
     except Exception as e:
@@ -48,53 +62,74 @@ def extract_text_from_pdf(pdf_file):
     finally:
         gc.collect()
-def extract_keywords(job_desc):
-    """Extract key skills, tools, and qualifications from job description"""
     if not job_desc:
-        return set()
     job_lower = job_desc.lower()
-    # Common skills/tools pattern (customize based on your domain)
-    skill_pattern = re.compile(r'\b(python|sql|excel|java|project management|communication|teamwork|aws|docker|[a-z]{2,}\d*)\b', re.I)
     keywords = set(skill_pattern.findall(job_lower))
-    # Boost priority for repeated terms
-    for word in set(re.findall(r'\w+', job_lower)):
-        if job_lower.count(word) > 2 and len(word) > 3:  # Frequent, non-trivial words
-            keywords.add(word)
-    return keywords
-def calculate_scores(resume_text, job_desc=None):
-    """Smart scoring tailored to job description"""
     resume_lower = resume_text.lower()
     scores = {
-        "relevance_to_job": 0,
-        "experience_quality": 0,
-        "skills_match": 0,
-        "education": 0,
-        "achievements": 0,
-        "clarity": 10 - min(8, len(TYPO_PATTERN.findall(resume_text))),
-        "customization": 0
     }
-    job_keywords = extract_keywords(job_desc) if job_desc else set()
     resume_words = set(re.findall(r'\w+', resume_lower))
-    # Relevance: Exact matches with job keywords
     if job_keywords:
-        matches = job_keywords & resume_words
         scores["relevance_to_job"] = min(20, int(20 * len(matches) / max(1, len(job_keywords))))
-        scores["skills_match"] = min(20, sum(2 for word in matches if len(word) > 3) + sum(1 for word in matches))
     else:
-        # Fallback: Infer skills from resume if no job desc
-        inferred_skills = set(re.findall(r'\b(python|sql|excel|java|management|teamwork|analysis)\b', resume_lower, re.I))
-        scores["skills_match"] = min(10, len(inferred_skills) * 2)
-        scores["relevance_to_job"] = min(10, len(inferred_skills))
-    # Experience: Years + context
     years = len(YEAR_PATTERN.findall(resume_text))
-    scores["experience_quality"] = min(10, years * 2)
-    if "experience" in resume_lower:
-        scores["experience_quality"] += min(5, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 2)
     # Education
     if 'phd' in resume_lower or 'doctorate' in resume_lower:
@@ -103,57 +138,80 @@ def calculate_scores(resume_text, job_desc=None):
         scores["education"] = 6
     elif 'bachelor' in resume_lower or 'bs' in resume_lower or 'ba' in resume_lower:
         scores["education"] = 4
-    elif 'high school' in resume_lower:
-        scores["education"] = 2
-    # Achievements
-    scores["achievements"] = min(10, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 3)
-    # Customization: Check if resume mirrors job desc structure
-    if job_desc and job_keywords:
         scores["customization"] = min(10, int(10 * len(job_keywords & resume_words) / max(1, len(job_keywords))))
-    return scores, min(100, sum(scores.values())), job_keywords
-def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
-    """Analyze resume with smart, job-specific feedback"""
     try:
         resume_text = extract_text_from_pdf(pdf_file)
     except Exception as e:
-        return (
-            f"Extraction failed: {str(e)}",
-            {"error": str(e)}
-        )
-    scores, total_score, job_keywords = calculate_scores(resume_text, job_desc)
     resume_words = set(re.findall(r'\w+', resume_text.lower()))
     # Basic analysis
     basic_analysis = {
         "strengths": [
-            f"Clear formatting (score: {scores['clarity']})" if scores["clarity"] > 7 else "",
-            f"Strong experience (score: {scores['experience_quality']})" if scores["experience_quality"] > 5 else ""
         ],
         "improvements": [
-            "Add specific achievements (e.g., 'Increased sales by 20%')" if scores["achievements"] < 5 else "",
-            f"Include more job-specific keywords (e.g., {list(job_keywords)[:2]})" if scores["relevance_to_job"] < 10 and job_keywords else "",
-            "Correct typos for better ATS parsing" if scores["clarity"] < 8 else ""
         ],
-        "missing_skills": list(job_keywords - resume_words)[:3] if job_keywords else ["e.g., Python", "e.g., SQL"]
     }
-    # Filter out empty strings
     basic_analysis["strengths"] = [s for s in basic_analysis["strengths"] if s]
     basic_analysis["improvements"] = [s for s in basic_analysis["improvements"] if s]
-    # Enhanced analysis with inference (if available)
     if inference_fn:
-        prompt = f"""[Return valid JSON]: Analyze this resume against the job description: {job_desc or "None"}.
-        Based on scores: {scores}, resume sample: {resume_text[:200]}, and job keywords: {list(job_keywords)[:5]},
-        provide:
-        - "strengths": 2 specific strengths (e.g., 'Lists 3+ years of Python experience'),
-        - "improvements": 3 actionable improvements (e.g., 'Add "AWS" to skills section'),
-        - "missing_skills": 3 skills missing from resume but in job desc (or inferred if no job desc).
         Return valid JSON only."""
         try:
@@ -183,13 +241,14 @@ def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
 # --- Gradio Interface --- #
 with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     with gr.Sidebar():
-        gr.Markdown("# Resume Analyzer")
-        gr.Markdown("Upload your resume in PDF format and optionally provide a job description.")
     with gr.Row():
         with gr.Column(scale=1):
             pdf_input = gr.File(label="PDF Resume", type="binary")
             job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
             submit_btn = gr.Button("Analyze")
         with gr.Column(scale=2):
@@ -198,7 +257,7 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     submit_btn.click(
         fn=analyze_resume,
-        inputs=[pdf_input, job_desc_input],
         outputs=[extracted_text, analysis_output]
     )

 # Precompiled regex patterns
 YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})')
+ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved|optimized)\s+.*?(?:\s+by\s+)?(\d+%|\$\d+|\d+\s*[a-z]+)', re.I)
 TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
+SECTION_PATTERN = re.compile(r'^(experience|skills|education|projects|achievements|github)\s*:?', re.I | re.M)
+DENSITY_PATTERN = re.compile(r'\b(\w+)\b.*\b\1\b', re.I)  # Detect repeated keywords
+LEADERSHIP_PATTERN = re.compile(r'(mentor|led|managed|team lead|open source|contributor|tech talk)', re.I)
+# Skill equivalence and inference
+SKILL_EQUIVALENTS = {
+    "node.js": {"nodejs"}, "react": {"preact"}, "mongodb": {"dynamodb"},
+    "javascript": {"js"}, "sql": {"mysql", "postgresql"}
+}
+SKILL_INFERENCES = {
+    "mern stack": {"mongodb", "express.js", "react", "node.js"},
+    "mean stack": {"mongodb", "express.js", "angular", "node.js"}
+}
+RECENT_TECH = {"next.js", "react 18", "node 20", "python 3.11"}
+OUTDATED_TECH = {"jquery", "angularjs", "php 5"}
 def extract_text_from_pdf(pdf_file):
     """Extract text from PDF with detailed error handling"""
         if not text.strip():
             raise ValueError("No text extracted from PDF (possibly image-based or empty)")
+        return text[:10000]
     except PyPDF2.errors.PdfReadError as e:
         raise Exception(f"PDF read error: {str(e)}")
     except Exception as e:
     finally:
         gc.collect()
+def extract_keywords(job_desc, role_type="general"):
+    """Extract job-specific keywords with role-based weighting"""
     if not job_desc:
+        return set(), set(), set()
     job_lower = job_desc.lower()
+    skill_pattern = re.compile(r'\b(python|sql|excel|java|react|node\.?js|mongodb|aws|docker|api|ui|ux|devops|[a-z]{2,}\d*)\b', re.I)
     keywords = set(skill_pattern.findall(job_lower))
+    frontend_terms = {"react", "vue", "angular", "ui", "ux", "css", "html", "javascript"}
+    backend_terms = {"node.js", "python", "sql", "mongodb", "api", "django", "flask", "devops"}
+    # Role-specific weighting
+    critical_keywords = set()
+    if "frontend" in role_type.lower():
+        critical_keywords = keywords & frontend_terms
+    elif "backend" in role_type.lower():
+        critical_keywords = keywords & backend_terms
+    else:
+        critical_keywords = keywords
+    return keywords, critical_keywords, set(re.findall(r'\w+', job_lower))
+def calculate_scores(resume_text, job_desc=None, role_type="general"):
+    """Advanced scoring with semantic matching, seniority, and recency"""
     resume_lower = resume_text.lower()
     scores = {
+        "relevance_to_job": 0, "experience_quality": 0, "skills_match": 0,
+        "education": 0, "achievements": 0, "clarity": 10, "customization": 0,
+        "seniority": 0, "fresher_potential": 0
     }
+    job_keywords, critical_keywords, job_words = extract_keywords(job_desc, role_type)
     resume_words = set(re.findall(r'\w+', resume_lower))
+    # Semantic Skill Matching & Inference
+    effective_skills = set()
+    for skill in resume_words:
+        effective_skills.add(skill)
+        for base_skill, equivalents in SKILL_EQUIVALENTS.items():
+            if skill in equivalents:
+                effective_skills.add(base_skill)
+        for stack, inferred in SKILL_INFERENCES.items():
+            if stack in resume_lower:
+                effective_skills.update(inferred)
+    # Skills Match & Transfer
     if job_keywords:
+        matches = job_keywords & effective_skills
+        critical_matches = critical_keywords & effective_skills
+        scores["skills_match"] = min(20, len(matches) * 2 + len(critical_matches) * 3)
         scores["relevance_to_job"] = min(20, int(20 * len(matches) / max(1, len(job_keywords))))
     else:
+        scores["skills_match"] = min(10, len(effective_skills) * 2)
+        scores["relevance_to_job"] = min(10, len(effective_skills))
+    # Experience: Projects = Work
     years = len(YEAR_PATTERN.findall(resume_text))
+    project_count = len(re.findall(r'(project|github|freelance)', resume_lower, re.I))
+    scores["experience_quality"] = min(15, years * 2 + project_count * 1)
+    # Seniority & Leadership
+    leadership_signals = len(LEADERSHIP_PATTERN.findall(resume_text))
+    scores["seniority"] = min(10, years + leadership_signals) if years > 3 else 0
+    # Fresher Potential
+    if years < 2:
+        learning_signals = len(re.findall(r'(learned|bootcamp|course|upskill)', resume_lower, re.I))
+        scores["fresher_potential"] = min(10, learning_signals * 2)
     # Education
     if 'phd' in resume_lower or 'doctorate' in resume_lower:
         scores["education"] = 6
     elif 'bachelor' in resume_lower or 'bs' in resume_lower or 'ba' in resume_lower:
         scores["education"] = 4
+    # Achievements (Mandatory for Mid/Senior)
+    achievements = len(ACHIEVEMENT_PATTERN.findall(resume_text))
+    scores["achievements"] = min(10, achievements * 3)
+    if years > 3 and achievements == 0:
+        scores["achievements"] -= 5  # Penalty for missing metrics
+    # Recency Weighting
+    recent_bonus = sum(2 for tech in RECENT_TECH if tech in resume_lower)
+    outdated_penalty = sum(-1 for tech in OUTDATED_TECH if tech in resume_lower)
+    scores["skills_match"] = max(0, scores["skills_match"] + recent_bonus + outdated_penalty)
+    # Clarity & ATS Compliance
+    scores["clarity"] -= min(8, len(TYPO_PATTERN.findall(resume_text)))
+    if "column" in resume_lower or not resume_text.strip():  # Basic ATS formatting check
+        scores["clarity"] -= 5
+    # Keyword Density & Anti-Gaming
+    density_count = len(DENSITY_PATTERN.findall(resume_text))
+    if density_count > 10:  # Excessive repetition
+        scores["customization"] -= 5
+    elif job_keywords:
         scores["customization"] = min(10, int(10 * len(job_keywords & resume_words) / max(1, len(job_keywords))))
+    return scores, min(100, sum(scores.values())), job_keywords, critical_keywords
+def analyze_resume(pdf_file, job_desc=None, role_type="general", inference_fn=None):
+    """Smart ATS analysis with detailed feedback"""
     try:
         resume_text = extract_text_from_pdf(pdf_file)
     except Exception as e:
+        return f"Extraction failed: {str(e)}", {"error": str(e)}
+    scores, total_score, job_keywords, critical_keywords = calculate_scores(resume_text, job_desc, role_type)
     resume_words = set(re.findall(r'\w+', resume_text.lower()))
     # Basic analysis
+    ats_score = scores["relevance_to_job"] + scores["skills_match"] + scores["clarity"]
+    human_potential = scores["seniority"] + scores["fresher_potential"] + scores["achievements"]
+    flag = "High human potential but low ATS score" if human_potential > 15 and ats_score < 20 else ""
     basic_analysis = {
         "strengths": [
+            f"Strong {role_type} skills (score: {scores['skills_match']})" if scores["skills_match"] > 10 else "",
+            f"Clear seniority signals (score: {scores['seniority']})" if scores["seniority"] > 5 else "",
+            f"High fresher potential (score: {scores['fresher_potential']})" if scores["fresher_potential"] > 5 else ""
         ],
         "improvements": [
+            f"Add critical {role_type} keywords (e.g., {list(critical_keywords)[:2]})" if scores["relevance_to_job"] < 10 else "",
+            "Include measurable achievements (e.g., 'Reduced latency by 30%')" if scores["achievements"] < 5 else "",
+            "Use recent tech (e.g., Next.js) over outdated (e.g., jQuery)" if any(t in resume_text.lower() for t in OUTDATED_TECH) else ""
         ],
+        "missing_skills": list(critical_keywords - resume_words)[:3] if critical_keywords else ["e.g., Python", "e.g., SQL"],
+        "flags": [flag] if flag else []
     }
     basic_analysis["strengths"] = [s for s in basic_analysis["strengths"] if s]
     basic_analysis["improvements"] = [s for s in basic_analysis["improvements"] if s]
+    # Enhanced analysis with inference
     if inference_fn:
+        prompt = f"""[Return valid JSON]: Analyze this resume against job description: {job_desc or "None"} (role: {role_type}).
+        Resume sample: {resume_text[:200]}, scores: {scores}, job keywords: {list(job_keywords)[:5]}, critical keywords: {list(critical_keywords)[:5]}.
+        Provide:
+        - "strengths": 2 specific strengths (e.g., 'Uses Next.js for modern frontend'),
+        - "improvements": 3 actionable improvements (e.g., 'Add MongoDB to skills'),
+        - "missing_skills": 3 skills missing from resume but in job desc,
+        - "flags": 1-2 flags (e.g., 'High potential but low ATS score', 'Possible keyword stuffing').
+        Account for:
+        - Semantic skill matches (e.g., Node.js = NodeJS),
+        - Contextual inference (e.g., MERN → Express.js),
+        - Seniority (require achievements for >3 years exp),
+        - Recency (favor Next.js over jQuery),
+        - Role-specific focus (e.g., frontend: UI, backend: APIs).
         Return valid JSON only."""
         try:
 # --- Gradio Interface --- #
 with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     with gr.Sidebar():
+        gr.Markdown("# Smart ATS Resume Analyzer")
+        gr.Markdown("Upload a PDF resume and optionally provide a job description and role type.")
     with gr.Row():
         with gr.Column(scale=1):
             pdf_input = gr.File(label="PDF Resume", type="binary")
             job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
+            role_type_input = gr.Dropdown(label="Role Type", choices=["General", "Frontend", "Backend"], value="General")
             submit_btn = gr.Button("Analyze")
         with gr.Column(scale=2):
     submit_btn.click(
         fn=analyze_resume,
+        inputs=[pdf_input, job_desc_input, role_type_input],
         outputs=[extracted_text, analysis_output]
     )