Spaces:

Mangesh223
/

DefendModel

Sleeping

App Files Files Community

Mangesh223 commited on Mar 31, 2025

Commit

82c5020

verified ·

1 Parent(s): fb52249

Update another_approch_of_resume_analysis.txt

Browse files

Files changed (1) hide show

another_approch_of_resume_analysis.txt +76 -239

another_approch_of_resume_analysis.txt CHANGED Viewed

@@ -1,264 +1,101 @@
 import gradio as gr
 import PyPDF2
-import io
-import re
 import json
-import os
-import gc
-from huggingface_hub import login
-from dotenv import load_dotenv
-# --- Configuration --- #
-load_dotenv()
-login(token=os.getenv("HF_TOKEN"))
-# Precompiled regex patterns
-YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})')
-ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved|optimized)\s+.*?(?:\s+by\s+)?(\d+%|\$\d+|\d+\s*[a-z]+)', re.I)
-TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
-SECTION_PATTERN = re.compile(r'^(experience|skills|education|projects|achievements|github)\s*:?', re.I | re.M)
-DENSITY_PATTERN = re.compile(r'\b(\w+)\b.*\b\1\b', re.I)  # Detect repeated keywords
-LEADERSHIP_PATTERN = re.compile(r'(mentor|led|managed|team lead|open source|contributor|tech talk)', re.I)
-# Skill equivalence and inference
-SKILL_EQUIVALENTS = {
-    "node.js": {"nodejs"}, "react": {"preact"}, "mongodb": {"dynamodb"},
-    "javascript": {"js"}, "sql": {"mysql", "postgresql"}
-}
-SKILL_INFERENCES = {
-    "mern stack": {"mongodb", "express.js", "react", "node.js"},
-    "mean stack": {"mongodb", "express.js", "angular", "node.js"}
-}
-RECENT_TECH = {"next.js", "react 18", "node 20", "python 3.11"}
-OUTDATED_TECH = {"jquery", "angularjs", "php 5"}
-def extract_text_from_pdf(pdf_file):
-    """Extract text from PDF with detailed error handling"""
-    if pdf_file is None:
-        raise ValueError("No PDF file uploaded")
-    if isinstance(pdf_file, str):
-        with open(pdf_file, 'rb') as f:
-            file_bytes = f.read()
-    elif isinstance(pdf_file, bytes):
-        file_bytes = pdf_file
     else:
-        raise TypeError(f"Expected file path or bytes, got {type(pdf_file)}")
-    try:
-        pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
-        if len(pdf_reader.pages) == 0:
-            raise ValueError("PDF has no pages")
-        text = "\n".join(page.extract_text() or "" for page in pdf_reader.pages)
-        if not text.strip():
-            raise ValueError("No text extracted from PDF (possibly image-based or empty)")
-        return text[:10000]
-    except PyPDF2.errors.PdfReadError as e:
-        raise Exception(f"PDF read error: {str(e)}")
-    except Exception as e:
-        raise Exception(f"Extraction error: {str(e)}")
-    finally:
-        gc.collect()
-def extract_keywords(job_desc, role_type="general"):
-    """Extract job-specific keywords with role-based weighting"""
-    if not job_desc:
-        return set(), set(), set()
-    job_lower = job_desc.lower()
-    skill_pattern = re.compile(r'\b(python|sql|excel|java|react|node\.?js|mongodb|aws|docker|api|ui|ux|devops|[a-z]{2,}\d*)\b', re.I)
-    keywords = set(skill_pattern.findall(job_lower))
-    frontend_terms = {"react", "vue", "angular", "ui", "ux", "css", "html", "javascript"}
-    backend_terms = {"node.js", "python", "sql", "mongodb", "api", "django", "flask", "devops"}
-    # Role-specific weighting
-    critical_keywords = set()
-    if "frontend" in role_type.lower():
-        critical_keywords = keywords & frontend_terms
-    elif "backend" in role_type.lower():
-        critical_keywords = keywords & backend_terms
-    else:
-        critical_keywords = keywords
-    return keywords, critical_keywords, set(re.findall(r'\w+', job_lower))
-def calculate_scores(resume_text, job_desc=None, role_type="general"):
-    """Advanced scoring with semantic matching, seniority, and recency"""
-    resume_lower = resume_text.lower()
-    scores = {
-        "relevance_to_job": 0, "experience_quality": 0, "skills_match": 0,
-        "education": 0, "achievements": 0, "clarity": 10, "customization": 0,
-        "seniority": 0, "fresher_potential": 0
     }
-    job_keywords, critical_keywords, job_words = extract_keywords(job_desc, role_type)
-    resume_words = set(re.findall(r'\w+', resume_lower))
-    # Semantic Skill Matching & Inference
-    effective_skills = set()
-    for skill in resume_words:
-        effective_skills.add(skill)
-        for base_skill, equivalents in SKILL_EQUIVALENTS.items():
-            if skill in equivalents:
-                effective_skills.add(base_skill)
-        for stack, inferred in SKILL_INFERENCES.items():
-            if stack in resume_lower:
-                effective_skills.update(inferred)
-    # Skills Match & Transfer
-    if job_keywords:
-        matches = job_keywords & effective_skills
-        critical_matches = critical_keywords & effective_skills
-        scores["skills_match"] = min(20, len(matches) * 2 + len(critical_matches) * 3)
-        scores["relevance_to_job"] = min(20, int(20 * len(matches) / max(1, len(job_keywords))))
     else:
-        scores["skills_match"] = min(10, len(effective_skills) * 2)
-        scores["relevance_to_job"] = min(10, len(effective_skills))
-    # Experience: Projects = Work
-    years = len(YEAR_PATTERN.findall(resume_text))
-    project_count = len(re.findall(r'(project|github|freelance)', resume_lower, re.I))
-    scores["experience_quality"] = min(15, years * 2 + project_count * 1)
-    # Seniority & Leadership
-    leadership_signals = len(LEADERSHIP_PATTERN.findall(resume_text))
-    scores["seniority"] = min(10, years + leadership_signals) if years > 3 else 0
-    # Fresher Potential
-    if years < 2:
-        learning_signals = len(re.findall(r'(learned|bootcamp|course|upskill)', resume_lower, re.I))
-        scores["fresher_potential"] = min(10, learning_signals * 2)
-    # Education
-    if 'phd' in resume_lower or 'doctorate' in resume_lower:
-        scores["education"] = 8
-    elif 'master' in resume_lower or 'msc' in resume_lower or 'mba' in resume_lower:
-        scores["education"] = 6
-    elif 'bachelor' in resume_lower or 'bs' in resume_lower or 'ba' in resume_lower:
-        scores["education"] = 4
-    # Achievements (Mandatory for Mid/Senior)
-    achievements = len(ACHIEVEMENT_PATTERN.findall(resume_text))
-    scores["achievements"] = min(10, achievements * 3)
-    if years > 3 and achievements == 0:
-        scores["achievements"] -= 5  # Penalty for missing metrics
-    # Recency Weighting
-    recent_bonus = sum(2 for tech in RECENT_TECH if tech in resume_lower)
-    outdated_penalty = sum(-1 for tech in OUTDATED_TECH if tech in resume_lower)
-    scores["skills_match"] = max(0, scores["skills_match"] + recent_bonus + outdated_penalty)
-    # Clarity & ATS Compliance
-    scores["clarity"] -= min(8, len(TYPO_PATTERN.findall(resume_text)))
-    if "column" in resume_lower or not resume_text.strip():  # Basic ATS formatting check
-        scores["clarity"] -= 5
-    # Keyword Density & Anti-Gaming
-    density_count = len(DENSITY_PATTERN.findall(resume_text))
-    if density_count > 10:  # Excessive repetition
-        scores["customization"] -= 5
-    elif job_keywords:
-        scores["customization"] = min(10, int(10 * len(job_keywords & resume_words) / max(1, len(job_keywords))))
-    return scores, min(100, sum(scores.values())), job_keywords, critical_keywords
-def analyze_resume(pdf_file, job_desc=None, role_type="general", inference_fn=None):
-    """Smart ATS analysis with detailed feedback"""
     try:
-        resume_text = extract_text_from_pdf(pdf_file)
     except Exception as e:
-        return f"Extraction failed: {str(e)}", {"error": str(e)}
-    scores, total_score, job_keywords, critical_keywords = calculate_scores(resume_text, job_desc, role_type)
-    resume_words = set(re.findall(r'\w+', resume_text.lower()))
-    # Basic analysis
-    ats_score = scores["relevance_to_job"] + scores["skills_match"] + scores["clarity"]
-    human_potential = scores["seniority"] + scores["fresher_potential"] + scores["achievements"]
-    flag = "High human potential but low ATS score" if human_potential > 15 and ats_score < 20 else ""
-    basic_analysis = {
-        "strengths": [
-            f"Strong {role_type} skills (score: {scores['skills_match']})" if scores["skills_match"] > 10 else "",
-            f"Clear seniority signals (score: {scores['seniority']})" if scores["seniority"] > 5 else "",
-            f"High fresher potential (score: {scores['fresher_potential']})" if scores["fresher_potential"] > 5 else ""
-        ],
-        "improvements": [
-            f"Add critical {role_type} keywords (e.g., {list(critical_keywords)[:2]})" if scores["relevance_to_job"] < 10 else "",
-            "Include measurable achievements (e.g., 'Reduced latency by 30%')" if scores["achievements"] < 5 else "",
-            "Use recent tech (e.g., Next.js) over outdated (e.g., jQuery)" if any(t in resume_text.lower() for t in OUTDATED_TECH) else ""
-        ],
-        "missing_skills": list(critical_keywords - resume_words)[:3] if critical_keywords else ["e.g., Python", "e.g., SQL"],
-        "flags": [flag] if flag else []
-    }
-    basic_analysis["strengths"] = [s for s in basic_analysis["strengths"] if s]
-    basic_analysis["improvements"] = [s for s in basic_analysis["improvements"] if s]
-    # Enhanced analysis with inference
-    if inference_fn:
-        prompt = f"""[Return valid JSON]: Analyze this resume against job description: {job_desc or "None"} (role: {role_type}).
-        Resume sample: {resume_text[:200]}, scores: {scores}, job keywords: {list(job_keywords)[:5]}, critical keywords: {list(critical_keywords)[:5]}.
-        Provide:
-        - "strengths": 2 specific strengths (e.g., 'Uses Next.js for modern frontend'),
-        - "improvements": 3 actionable improvements (e.g., 'Add MongoDB to skills'),
-        - "missing_skills": 3 skills missing from resume but in job desc,
-        - "flags": 1-2 flags (e.g., 'High potential but low ATS score', 'Possible keyword stuffing').
-        Account for:
-        - Semantic skill matches (e.g., Node.js = NodeJS),
-        - Contextual inference (e.g., MERN → Express.js),
-        - Seniority (require achievements for >3 years exp),
-        - Recency (favor Next.js over jQuery),
-        - Role-specific focus (e.g., frontend: UI, backend: APIs).
-        Return valid JSON only."""
-        try:
-            result = inference_fn(prompt)
-            if result and result.strip():
-                enhanced_analysis = json.loads(result)
-                return (
-                    resume_text[:5000],
-                    {
-                        "score": {"total": total_score, "breakdown": scores},
-                        "analysis": enhanced_analysis,
-                        "raw_text_sample": resume_text[:200]
-                    }
-                )
-        except Exception as e:
-            print(f"Inference error: {str(e)}")
-    return (
-        resume_text[:5000],
-        {
-            "score": {"total": total_score, "breakdown": scores},
-            "analysis": basic_analysis,
-            "raw_text_sample": resume_text[:200]
-        }
-    )
-# --- Gradio Interface --- #
-with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     with gr.Sidebar():
         gr.Markdown("# Smart ATS Resume Analyzer")
-        gr.Markdown("Upload a PDF resume and optionally provide a job description and role type.")
     with gr.Row():
         with gr.Column(scale=1):
-            pdf_input = gr.File(label="PDF Resume", type="binary")
-            job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
-            role_type_input = gr.Dropdown(label="Role Type", choices=["General", "Frontend", "Backend"], value="General")
-            submit_btn = gr.Button("Analyze")
         with gr.Column(scale=2):
-            extracted_text = gr.Textbox(label="Extracted Text", lines=10, interactive=False)
-            analysis_output = gr.JSON(label="Analysis Results")
     submit_btn.click(
         fn=analyze_resume,
-        inputs=[pdf_input, job_desc_input, role_type_input],
-        outputs=[extracted_text, analysis_output]
     )
-demo.launch(share=True)

+import os
 import gradio as gr
 import PyPDF2
+import docx
+import requests
 import json
+# Function to extract text from PDF
+def extract_text_from_pdf(file):
+    pdf_reader = PyPDF2.PdfReader(file)
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+    return text
+# Function to extract text from Word document
+def extract_text_from_docx(file):
+    doc = docx.Document(file)
+    text = "\n".join([para.text for para in doc.paragraphs])
+    return text
+# Function to process uploaded file based on type
+def process_uploaded_file(file):
+    if file.name.endswith(".pdf"):
+        return extract_text_from_pdf(file)
+    elif file.name.endswith(".docx"):
+        return extract_text_from_docx(file)
     else:
+        raise ValueError("Unsupported file format. Please upload a PDF or Word document.")
+# Function to call Together API for Mistral inference
+def analyze_with_mistral(resume_text, job_description):
+    TOGETHER_API_KEY = os.getenv("HUGGINGFACE_API_KEY")  # Ensure your API key is set in environment variables
+    url = "https://api.together.xyz/v1/chat/completions"
+    # Constructing the message format
+    messages = [
+        {"role": "system", "content": "You are an AI expert in ATS resume analysis."},
+        {"role": "user", "content": f"""
+        Analyze the following resume against the job description for ATS compatibility.
+        Provide a detailed breakdown of ATS parameters (keywords, formatting, skills match,
+        experience relevance, education) and assign a score out of 100 for each, along with an overall score.
+        Return the result in JSON format.
+        Resume:
+        {resume_text}
+        Job Description:
+        {job_description}
+        """}
+    ]
+    payload = {
+        "model": "mistralai/Mistral-7B-Instruct-v0.3",
+        "messages": messages,
+        "max_tokens": 1000,
+        "temperature": 0.7,
+        "top_p": 0.9,
     }
+    headers = {
+        "Authorization": f"Bearer {TOGETHER_API_KEY}",
+        "Content-Type": "application/json",
+    }
+    response = requests.post(url, json=payload, headers=headers)
+    if response.status_code == 200:
+        result = response.json()
+        return result.get("choices", [{}])[0].get("message", {}).get("content", "No response from API")
     else:
+        return json.dumps({"error": f"API request failed with status {response.status_code}: {response.text}"}, indent=2)
+# Main function to analyze resume
+def analyze_resume(file, job_description):
     try:
+        resume_text = process_uploaded_file(file)
+        result = analyze_with_mistral(resume_text, job_description)
+        return result
     except Exception as e:
+        return json.dumps({"error": str(e)}, indent=2)
+# Gradio interface
+with gr.Blocks(fill_height=True, title="Smart ATS Resume Analyzer") as demo:
     with gr.Sidebar():
         gr.Markdown("# Smart ATS Resume Analyzer")
+        gr.Markdown("Upload your resume (PDF/Word) and enter a job description to get an ATS compatibility score.")
     with gr.Row():
         with gr.Column(scale=1):
+            resume_upload = gr.File(label="Upload Resume (PDF or Word)", file_types=[".pdf", ".docx"])
+            job_desc = gr.Textbox(label="Job Description", lines=10, placeholder="Paste the job description here...")
+            submit_btn = gr.Button("Analyze Resume")
         with gr.Column(scale=2):
+            output = gr.JSON(label="ATS Analysis Result")
     submit_btn.click(
         fn=analyze_resume,
+        inputs=[resume_upload, job_desc],
+        outputs=output
     )
+demo.launch()