Spaces:

loki2910
/

Resume-Analyser

Sleeping

App Files Files Community

loki2910 commited on Dec 11, 2025

Commit

fc2dad3

verified ·

1 Parent(s): 0a5c82c

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -244

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 import re
 import tempfile
 import traceback
-from typing import Tuple, Dict, List
 import fitz  # PyMuPDF
 import docx  # python-docx
@@ -46,27 +46,18 @@ EN_STOPWORDS = {
 }
 # --------------------------
-# Job Suggestions Database - Updated for better accuracy
 # --------------------------
 JOB_SUGGESTIONS_DB = {
-    "Data Scientist": {"python", "sql", "machine", "learning", "tensorflow", "pytorch", "analysis", "pandas", "numpy"},
-    "Data Analyst": {"sql", "python", "excel", "tableau", "analysis", "statistics", "visualization"},
-    "Backend Developer": {"python", "java", "sql", "docker", "aws", "api", "git", "rest", "microservices"},
-    "Frontend Developer": {"react", "javascript", "html", "css", "git", "ui", "ux", "typescript"},
-    "Full-Stack Developer": {"python", "javascript", "react", "sql", "docker", "git", "nodejs"},
-    "Machine Learning Engineer": {"python", "tensorflow", "pytorch", "machine", "learning", "docker", "cloud", "aws", "gcp"},
-    "Project Manager": {"agile", "scrum", "project", "management", "jira", "confluence", "planning"}
 }
-# --------------------------
-# Enhanced keyword sets for specific job roles
-# --------------------------
-ML_ENGINEERING_KEYWORDS = {
-    "technical_skills": {"python", "machine", "learning", "tensorflow", "pytorch", "docker", "aws", "cloud", "sql", "git", "unix", "command", "line"},
-    "systems": {"ml", "systems", "data", "storage", "database", "api", "integration"},
-    "methodologies": {"agile", "scrum", "entrepreneurial", "distributed", "team"},
-    "soft_skills": {"collaboration", "communication", "problem", "solving", "initiative"}
-}
 # --------------------------
 # Utilities: text extraction
@@ -109,7 +100,7 @@ def extract_text_from_fileobj(file_obj) -> Tuple[str, str]:
 # --------------------------
-# Text preprocessing - Enhanced with better cleaning
 # --------------------------
 def preprocess_text(text: str, remove_stopwords: bool = True) -> str:
     if not text:
@@ -123,85 +114,6 @@ def preprocess_text(text: str, remove_stopwords: bool = True) -> str:
     return " ".join(words)
-# --------------------------
-# Enhanced section extraction
-# --------------------------
-def extract_resume_sections(resume_text: str) -> Dict:
-    sections = {
-        "summary": "",
-        "skills": "",
-        "experience": "",
-        "projects": "",
-        "education": "",
-        "certifications": ""
-    }
-    lines = resume_text.split('\n')
-    current_section = None
-    for line in lines:
-        line_lower = line.strip().lower()
-        # Identify section headers
-        if any(keyword in line_lower for keyword in ["summary", "objective"]):
-            current_section = "summary"
-            continue
-        elif any(keyword in line_lower for keyword in ["skills", "technical skills", "programming languages"]):
-            current_section = "skills"
-            continue
-        elif any(keyword in line_lower for keyword in ["experience", "work experience", "employment"]):
-            current_section = "experience"
-            continue
-        elif any(keyword in line_lower for keyword in ["projects", "personal projects", "academic projects"]):
-            current_section = "projects"
-            continue
-        elif any(keyword in line_lower for keyword in ["education", "academic background"]):
-            current_section = "education"
-            continue
-        elif any(keyword in line_lower for keyword in ["certifications", "certification", "licenses"]):
-            current_section = "certifications"
-            continue
-        # Add line to current section
-        if current_section and line.strip():
-            sections[current_section] += line + "\n"
-    return sections
-def extract_job_requirements(job_text: str) -> Dict:
-    requirements = {
-        "technical": "",
-        "experience": "",
-        "education": "",
-        "qualifications": ""
-    }
-    lines = job_text.split('\n')
-    current_section = None
-    for line in lines:
-        line_lower = line.strip().lower()
-        if any(keyword in line_lower for keyword in ["requirements", "qualifications", "what we're looking for"]):
-            current_section = "qualifications"
-            continue
-        elif any(keyword in line_lower for keyword in ["technical skills", "skills required", "requirements"]):
-            current_section = "technical"
-            continue
-        elif any(keyword in line_lower for keyword in ["experience", "years of experience"]):
-            current_section = "experience"
-            continue
-        elif any(keyword in line_lower for keyword in ["education", "degree", "qualification"]):
-            current_section = "education"
-            continue
-        if current_section and line.strip():
-            requirements[current_section] += line + "\n"
-    return requirements
 # --------------------------
 # Embedding helpers
 # --------------------------
@@ -226,107 +138,51 @@ def calculate_similarity(resume_text: str, job_text: str, mode: str = "sbert") -
 # --------------------------
-# Enhanced keyword analysis with weighted scoring
 # --------------------------
-def calculate_technical_match(resume_skills: str, job_requirements: str, weight: float = 0.4) -> float:
-    if not resume_skills or not job_requirements:
-        return 0.0
-    resume_clean = preprocess_text(resume_skills)
-    job_clean = preprocess_text(job_requirements)
-    resume_words = set(resume_clean.split())
-    job_words = set(job_clean.split())
-    # Use ML_ENGINEERING_KEYWORDS for specific role matching
-    ml_keywords = ML_ENGINEERING_KEYWORDS["technical_skills"]
-    matched_keywords = resume_words.intersection(ml_keywords)
-    total_keywords = len(ml_keywords)
-    if total_keywords == 0:
-        return 0.0
-    match_score = (len(matched_keywords) / total_keywords) * 100
-    return match_score * weight
-def calculate_experience_match(resume_exp: str, job_exp: str, weight: float = 0.3) -> float:
-    if not resume_exp or not job_exp:
-        return 0.0
-    sim = calculate_similarity(resume_exp, job_exp)
-    return sim * weight
-def calculate_education_match(resume_edu: str, job_edu: str, weight: float = 0.15) -> float:
-    if not resume_edu or not job_edu:
-        return 0.0
-    sim = calculate_similarity(resume_edu, job_edu)
-    return sim * weight
-def calculate_project_match(resume_projects: str, job_projects: str, weight: float = 0.15) -> float:
-    if not resume_projects or not job_projects:
-        return 0.0
-    sim = calculate_similarity(resume_projects, job_projects)
-    return sim * weight
-def analyze_resume_with_context(resume_text: str, job_description: str) -> Dict:
-    # Extract sections
-    resume_sections = extract_resume_sections(resume_text)
-    job_requirements = extract_job_requirements(job_description)
-    # Calculate weighted scores
-    technical_score = calculate_technical_match(
-        resume_sections["skills"],
-        job_requirements["technical"]
-    )
-    experience_score = calculate_experience_match(
-        resume_sections["experience"],
-        job_requirements["experience"]
-    )
-    education_score = calculate_education_match(
-        resume_sections["education"],
-        job_requirements["education"]
-    )
-    project_score = calculate_project_match(
-        resume_sections["projects"],
-        job_requirements.get("qualifications", "")
-    )
-    # Calculate overall score
-    overall_score = technical_score + experience_score + education_score + project_score
-    # Generate insights
-    insights = []
-    if technical_score < 30:
-        insights.append("⚠️ Consider adding more technical skills mentioned in the job description")
-    if experience_score < 20:
-        insights.append("⚠️ Highlight relevant experience that matches the job requirements")
-    if project_score < 15:
-        insights.append("⚠️ Showcase projects that demonstrate required skills")
-    if not insights:
-        insights.append("✅ Your resume shows good alignment with the job requirements")
-    return {
-        "overall_score": overall_score,
-        "technical_score": technical_score,
-        "experience_score": experience_score,
-        "education_score": education_score,
-        "project_score": project_score,
-        "insights": "\n".join(insights)
     }
 # --------------------------
-# Project Section Analysis - Enhanced
 # --------------------------
 def extract_projects_section(resume_text: str) -> str:
     project_headings = ["projects", "personal projects", "academic projects", "portfolio"]
@@ -337,22 +193,18 @@ def extract_projects_section(resume_text: str) -> str:
     lines = resume_text.split('\n')
     start_index = -1
     end_index = len(lines)
     for i, line in enumerate(lines):
         cleaned_line = line.strip().lower()
-        if any(heading in cleaned_line for heading in project_headings):
             start_index = i
             break
     if start_index == -1:
         return "Could not automatically identify a 'Projects' section in this resume."
     for i in range(start_index + 1, len(lines)):
-        cleaned_line = lines[i].strip().lower()
-        if len(cleaned_line.split()) < 4 and any(heading in cleaned_line for heading in end_headings):
             end_index = i
             break
     project_section_lines = lines[start_index:end_index]
     return "\n".join(project_section_lines)
@@ -423,7 +275,7 @@ def extract_top_keywords(text: str, top_n: int = 15) -> str:
 # --------------------------
-# Main Gradio app logic - Enhanced with context analysis
 # --------------------------
 def analyze_resume(file, job_description: str, mode: str):
     if file is None or not job_description.strip():
@@ -434,55 +286,34 @@ def analyze_resume(file, job_description: str, mode: str):
         if resume_text.strip().startswith("[Error"):
             raise RuntimeError(resume_text)
-        # Enhanced analysis with context
-        analysis_results = analyze_resume_with_context(resume_text, job_description)
-        overall_score = analysis_results["overall_score"]
-        # Generate verdict based on overall score
-        if overall_score >= 80:
-            verdict = f"<h3 style='color:green;'>✅ Excellent Match ({overall_score:.2f}%)</h3>"
-        elif overall_score >= 60:
-            verdict = f"<h3 style='color:limegreen;'>👍 Good Match ({overall_score:.2f}%)</h3>"
-        elif overall_score >= 40:
-            verdict = f"<h3 style='color:orange;'>⚠️ Fair Match ({overall_score:.2f}%)</h3>"
         else:
-            verdict = f"<h3 style='color:red;'>❌ Low Match ({overall_score:.2f}%)</h3>"
-        # Extract sections for display
-        resume_sections = extract_resume_sections(resume_text)
-        job_requirements = extract_job_requirements(job_description)
-        # Generate suggestions
-        suggestions = []
-        if analysis_results["technical_score"] < 30:
-            suggestions.append("Add more technical skills mentioned in the job description")
-        if analysis_results["experience_score"] < 20:
-            suggestions.append("Highlight relevant experience that matches the job requirements")
-        if analysis_results["project_score"] < 15:
-            suggestions.append("Showcase projects that demonstrate required skills")
-        suggestions_text = "\n".join(f"- {s}" for s in suggestions) if suggestions else "Great job! Your resume shows good alignment with the job requirements."
-        # Job suggestions
         job_suggestions = suggest_jobs(resume_text)
-        # Project analysis
         projects_section = extract_projects_section(resume_text)
         project_fit_verdict = analyze_projects_fit(projects_section, job_description, mode)
-        # Keyword extraction
-        resume_keywords_text = extract_top_keywords(preprocess_text(resume_text))
-        jd_keywords_text = extract_top_keywords(preprocess_text(job_description))
         return (
-            float(overall_score), verdict,
-            f"### 📊 Detailed Breakdown\n- Technical Skills: {analysis_results['technical_score']:.2f}%\n- Experience: {analysis_results['experience_score']:.2f}%\n- Education: {analysis_results['education_score']:.2f}%\n- Projects: {analysis_results['project_score']:.2f}%",
-            suggestions_text,
-            job_suggestions,
-            projects_section,
-            project_fit_verdict,
-            resume_keywords_text,
-            jd_keywords_text
         )
     except Exception as e:
@@ -561,4 +392,4 @@ def build_ui():
 if __name__ == "__main__":
     demo = build_ui()
     demo.launch()
-    #demo.launch(server_name="0.0.0.0")

 import re
 import tempfile
 import traceback
+from typing import Tuple, Dict
 import fitz  # PyMuPDF
 import docx  # python-docx
 }
 # --------------------------
+# Job Suggestions Database
 # --------------------------
 JOB_SUGGESTIONS_DB = {
+    "Data Scientist": {"python", "sql", "machine", "learning", "tensorflow", "pytorch", "analysis"},
+    "Data Analyst": {"sql", "python", "excel", "tableau", "analysis", "statistics"},
+    "Backend Developer": {"python", "java", "sql", "docker", "aws", "api", "git"},
+    "Frontend Developer": {"react", "javascript", "html", "css", "git", "ui", "ux"},
+    "Full-Stack Developer": {"python", "javascript", "react", "sql", "docker", "git"},
+    "Machine Learning Engineer": {"python", "tensorflow", "pytorch", "machine", "learning", "docker", "cloud"},
+    "Project Manager": {"agile", "scrum", "project", "management", "jira"}
 }
 # --------------------------
 # Utilities: text extraction
 # --------------------------
+# Text preprocessing
 # --------------------------
 def preprocess_text(text: str, remove_stopwords: bool = True) -> str:
     if not text:
     return " ".join(words)
 # --------------------------
 # Embedding helpers
 # --------------------------
 # --------------------------
+# Keyword analysis
 # --------------------------
+DEFAULT_KEYWORDS = {
+    "skills": {"python", "nlp", "java", "sql", "tensorflow", "pytorch", "docker", "git", "react", "cloud", "aws",
+               "azure"},
+    "concepts": {"machine", "learning", "data", "analysis", "nlp", "vision", "agile", "scrum"},
+    "roles": {"software", "engineer", "developer", "manager", "scientist", "analyst", "architect"},
+}
+def analyze_resume_keywords(resume_text: str, job_description: str):
+    clean_resume = preprocess_text(resume_text)
+    clean_job = preprocess_text(job_description)
+    resume_words = set(clean_resume.split())
+    job_words = set(clean_job.split())
+    missing = {}
+    for cat, kws in DEFAULT_KEYWORDS.items():
+        missing_from_cat = [kw for kw in kws if kw in job_words and kw not in resume_words]
+        if missing_from_cat:
+            missing[cat] = sorted(missing_from_cat)
+    low_resume = (resume_text or "").lower()
+    sections_present = {
+        "skills": "skills" in low_resume,
+        "experience": "experience" in low_resume or "employment" in low_resume,
+        "summary": "summary" in low_resume or "objective" in low_resume,
     }
+    suggestions = []
+    if any(missing.values()):
+        for cat, kws in missing.items():
+            for kw in kws:
+                if cat == "skills":
+                    suggestions.append(f"Add keyword '{kw}' to your Skills section." if sections_present[
+                        "skills"] else f"Consider creating a Skills section to include '{kw}'.")
+                elif cat == "concepts":
+                    suggestions.append(
+                        f"Try to demonstrate your knowledge of '{kw}' in your Experience or Projects section.")
+                elif cat == "roles":
+                    suggestions.append(f"Align your Summary/Objective to mention the title '{kw}'.")
+    else:
+        suggestions.append("Great job! Your resume contains many of the keywords found in the job description.")
+    return missing, "\n".join(f"- {s}" for s in suggestions)
 # --------------------------
+# Project Section Analysis
 # --------------------------
 def extract_projects_section(resume_text: str) -> str:
     project_headings = ["projects", "personal projects", "academic projects", "portfolio"]
     lines = resume_text.split('\n')
     start_index = -1
     end_index = len(lines)
     for i, line in enumerate(lines):
         cleaned_line = line.strip().lower()
+        if cleaned_line in project_headings:
             start_index = i
             break
     if start_index == -1:
         return "Could not automatically identify a 'Projects' section in this resume."
     for i in range(start_index + 1, len(lines)):
+        cleaned_line = line.strip().lower()
+        if len(cleaned_line.split()) < 4 and cleaned_line in end_headings:
             end_index = i
             break
     project_section_lines = lines[start_index:end_index]
     return "\n".join(project_section_lines)
 # --------------------------
+# Main Gradio app logic
 # --------------------------
 def analyze_resume(file, job_description: str, mode: str):
     if file is None or not job_description.strip():
         if resume_text.strip().startswith("[Error"):
             raise RuntimeError(resume_text)
+        cleaned_resume = preprocess_text(resume_text)
+        cleaned_job = preprocess_text(job_description)
+        sim_pct = calculate_similarity(cleaned_resume, cleaned_job, mode=mode)
+        if sim_pct >= 80:
+            verdict = f"<h3 style='color:green;'>✅ Excellent Match ({sim_pct:.2f}%)</h3>"
+        elif sim_pct >= 60:
+            verdict = f"<h3 style='color:limegreen;'>👍 Good Match ({sim_pct:.2f}%)</h3>"
+        elif sim_pct >= 40:
+            verdict = f"<h3 style='color:orange;'>⚠️ Fair Match ({sim_pct:.2f}%)</h3>"
         else:
+            verdict = f"<h3 style='color:red;'>❌ Low Match ({sim_pct:.2f}%)</h3>"
+        missing_dict, suggestions_text = analyze_resume_keywords(resume_text, job_description)
+        missing_formatted = format_missing_keywords(missing_dict)
         job_suggestions = suggest_jobs(resume_text)
         projects_section = extract_projects_section(resume_text)
         project_fit_verdict = analyze_projects_fit(projects_section, job_description, mode)
+        resume_keywords_text = extract_top_keywords(cleaned_resume)
+        jd_keywords_text = extract_top_keywords(cleaned_job)
         return (
+            float(sim_pct), verdict, missing_formatted, suggestions_text,
+            job_suggestions, projects_section, project_fit_verdict, resume_keywords_text, jd_keywords_text
         )
     except Exception as e:
 if __name__ == "__main__":
     demo = build_ui()
     demo.launch()
+    #demo.launch(server_name="0.0.0.0")