Spaces:

Mangesh223
/

DefendModel

Sleeping

App Files Files Community

Mangesh223 commited on Mar 27, 2025

Commit

c9c405f

verified ·

1 Parent(s): 9b62178

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -44

app.py CHANGED Viewed

@@ -12,23 +12,17 @@ from dotenv import load_dotenv
 load_dotenv()
 login(token=os.getenv("HF_TOKEN"))
-# Skills set for faster lookups
-GENERAL_SKILLS = {
-    'communication', 'problem solving', 'project management',
-    'python', 'sql', 'excel', 'teamwork'
-}
 # Precompiled regex patterns
 YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})')
 ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved)\s+by\s+(\d+%|\$\d+)', re.I)
 TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
 def extract_text_from_pdf(pdf_file):
     """Extract text from PDF with detailed error handling"""
     if pdf_file is None:
         raise ValueError("No PDF file uploaded")
-    # Handle both file path and bytes input
     if isinstance(pdf_file, str):
         with open(pdf_file, 'rb') as f:
             file_bytes = f.read()
@@ -42,8 +36,8 @@ def extract_text_from_pdf(pdf_file):
         if len(pdf_reader.pages) == 0:
             raise ValueError("PDF has no pages")
-        text = "\n".join(page.extract_text() for page in pdf_reader.pages)
-        if text is None or text.strip() == "":
             raise ValueError("No text extracted from PDF (possibly image-based or empty)")
         return text[:10000]  # Limit to first 10,000 characters
@@ -54,8 +48,23 @@ def extract_text_from_pdf(pdf_file):
     finally:
         gc.collect()
 def calculate_scores(resume_text, job_desc=None):
-    """Optimized scoring function"""
     resume_lower = resume_text.lower()
     scores = {
         "relevance_to_job": 0,
@@ -67,71 +76,92 @@ def calculate_scores(resume_text, job_desc=None):
         "customization": 0
     }
-    if job_desc:
-        job_words = set(re.findall(r'\w+', job_desc.lower()))
-        resume_words = set(re.findall(r'\w+', resume_lower))
-        scores["relevance_to_job"] = min(20, int(20 * len(job_words & resume_words) / len(job_words)))
     else:
-        scores["relevance_to_job"] = min(10, sum(1 for skill in GENERAL_SKILLS if skill in resume_lower))
-    scores["experience_quality"] = min(10, len(YEAR_PATTERN.findall(resume_text)))
-    scores["experience_quality"] += min(10, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 2)
     if 'phd' in resume_lower or 'doctorate' in resume_lower:
         scores["education"] = 8
     elif 'master' in resume_lower or 'msc' in resume_lower or 'mba' in resume_lower:
         scores["education"] = 6
-    elif 'bachelor' in resume_lower or ' bs ' in resume_lower or ' ba ' in resume_lower:
         scores["education"] = 4
     elif 'high school' in resume_lower:
         scores["education"] = 2
-    return scores, min(100, sum(scores.values()))
 def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
-    """Analyze resume and return extracted text and analysis as separate outputs"""
     try:
         resume_text = extract_text_from_pdf(pdf_file)
     except Exception as e:
         return (
-            f"Extraction failed: {str(e)}",  # First output for textbox
-            {"error": str(e)}  # Second output for JSON
         )
-    scores, total_score = calculate_scores(resume_text, job_desc)
-    # Basic analysis if inference fails
     basic_analysis = {
-        "score": {
-            "total": total_score,
-            "breakdown": scores
-        },
         "strengths": [
-            "Good clarity score" if scores["clarity"] > 7 else None,
-            "Relevant skills" if scores["relevance_to_job"] > 5 else None
         ],
         "improvements": [
-            "Add more measurable achievements" if scores["achievements"] < 3 else None,
-            "Include more relevant keywords" if scores["relevance_to_job"] < 5 else None,
-            "Check for typos" if scores["clarity"] < 9 else None
         ],
-        "missing_skills": list(GENERAL_SKILLS - set(re.findall(r'\w+', resume_text.lower())))[:2]
     }
-    # Try to get enhanced analysis if inference function is available
     if inference_fn:
-        prompt = f"""[Return valid JSON]: Based on these scores: {scores}, provide:
-        - "strengths": 2 key strengths,
-        - "improvements": 3 specific improvements,
-        - "missing_skills": 2 missing skills (use job description if provided: {job_desc or "None"}).
-        Output a valid JSON string only, no extra text."""
         try:
             result = inference_fn(prompt)
             if result and result.strip():
                 enhanced_analysis = json.loads(result)
                 return (
-                    resume_text[:5000],  # First output for textbox (limited to 5000 chars)
                     {
                         "score": {"total": total_score, "breakdown": scores},
                         "analysis": enhanced_analysis,
@@ -140,10 +170,9 @@ def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
                 )
         except Exception as e:
             print(f"Inference error: {str(e)}")
-            # Fall through to basic analysis
     return (
-        resume_text[:5000],  # First output for textbox
         {
             "score": {"total": total_score, "breakdown": scores},
             "analysis": basic_analysis,
@@ -155,7 +184,7 @@ def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
 with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     with gr.Sidebar():
         gr.Markdown("# Resume Analyzer")
-        gr.Markdown("Upload your resume in PDF format for analysis")
     with gr.Row():
         with gr.Column(scale=1):

 load_dotenv()
 login(token=os.getenv("HF_TOKEN"))
 # Precompiled regex patterns
 YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})')
 ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved)\s+by\s+(\d+%|\$\d+)', re.I)
 TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
+SECTION_PATTERN = re.compile(r'^(experience|skills|education|projects|achievements)\s*:?', re.I | re.M)
 def extract_text_from_pdf(pdf_file):
     """Extract text from PDF with detailed error handling"""
     if pdf_file is None:
         raise ValueError("No PDF file uploaded")
     if isinstance(pdf_file, str):
         with open(pdf_file, 'rb') as f:
             file_bytes = f.read()
         if len(pdf_reader.pages) == 0:
             raise ValueError("PDF has no pages")
+        text = "\n".join(page.extract_text() or "" for page in pdf_reader.pages)
+        if not text.strip():
             raise ValueError("No text extracted from PDF (possibly image-based or empty)")
         return text[:10000]  # Limit to first 10,000 characters
     finally:
         gc.collect()
+def extract_keywords(job_desc):
+    """Extract key skills, tools, and qualifications from job description"""
+    if not job_desc:
+        return set()
+    job_lower = job_desc.lower()
+    # Common skills/tools pattern (customize based on your domain)
+    skill_pattern = re.compile(r'\b(python|sql|excel|java|project management|communication|teamwork|aws|docker|[a-z]{2,}\d*)\b', re.I)
+    keywords = set(skill_pattern.findall(job_lower))
+    # Boost priority for repeated terms
+    for word in set(re.findall(r'\w+', job_lower)):
+        if job_lower.count(word) > 2 and len(word) > 3:  # Frequent, non-trivial words
+            keywords.add(word)
+    return keywords
 def calculate_scores(resume_text, job_desc=None):
+    """Smart scoring tailored to job description"""
     resume_lower = resume_text.lower()
     scores = {
         "relevance_to_job": 0,
         "customization": 0
     }
+    job_keywords = extract_keywords(job_desc) if job_desc else set()
+    resume_words = set(re.findall(r'\w+', resume_lower))
+    # Relevance: Exact matches with job keywords
+    if job_keywords:
+        matches = job_keywords & resume_words
+        scores["relevance_to_job"] = min(20, int(20 * len(matches) / max(1, len(job_keywords))))
+        scores["skills_match"] = min(20, sum(2 for word in matches if len(word) > 3) + sum(1 for word in matches))
     else:
+        # Fallback: Infer skills from resume if no job desc
+        inferred_skills = set(re.findall(r'\b(python|sql|excel|java|management|teamwork|analysis)\b', resume_lower, re.I))
+        scores["skills_match"] = min(10, len(inferred_skills) * 2)
+        scores["relevance_to_job"] = min(10, len(inferred_skills))
+    # Experience: Years + context
+    years = len(YEAR_PATTERN.findall(resume_text))
+    scores["experience_quality"] = min(10, years * 2)
+    if "experience" in resume_lower:
+        scores["experience_quality"] += min(5, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 2)
+    # Education
     if 'phd' in resume_lower or 'doctorate' in resume_lower:
         scores["education"] = 8
     elif 'master' in resume_lower or 'msc' in resume_lower or 'mba' in resume_lower:
         scores["education"] = 6
+    elif 'bachelor' in resume_lower or 'bs' in resume_lower or 'ba' in resume_lower:
         scores["education"] = 4
     elif 'high school' in resume_lower:
         scores["education"] = 2
+    # Achievements
+    scores["achievements"] = min(10, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 3)
+    # Customization: Check if resume mirrors job desc structure
+    if job_desc and job_keywords:
+        scores["customization"] = min(10, int(10 * len(job_keywords & resume_words) / max(1, len(job_keywords))))
+    return scores, min(100, sum(scores.values())), job_keywords
 def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
+    """Analyze resume with smart, job-specific feedback"""
     try:
         resume_text = extract_text_from_pdf(pdf_file)
     except Exception as e:
         return (
+            f"Extraction failed: {str(e)}",
+            {"error": str(e)}
         )
+    scores, total_score, job_keywords = calculate_scores(resume_text, job_desc)
+    resume_words = set(re.findall(r'\w+', resume_text.lower()))
+    # Basic analysis
     basic_analysis = {
         "strengths": [
+            f"Clear formatting (score: {scores['clarity']})" if scores["clarity"] > 7 else "",
+            f"Strong experience (score: {scores['experience_quality']})" if scores["experience_quality"] > 5 else ""
         ],
         "improvements": [
+            "Add specific achievements (e.g., 'Increased sales by 20%')" if scores["achievements"] < 5 else "",
+            f"Include more job-specific keywords (e.g., {list(job_keywords)[:2]})" if scores["relevance_to_job"] < 10 and job_keywords else "",
+            "Correct typos for better ATS parsing" if scores["clarity"] < 8 else ""
         ],
+        "missing_skills": list(job_keywords - resume_words)[:3] if job_keywords else ["e.g., Python", "e.g., SQL"]
     }
+    # Filter out empty strings
+    basic_analysis["strengths"] = [s for s in basic_analysis["strengths"] if s]
+    basic_analysis["improvements"] = [s for s in basic_analysis["improvements"] if s]
+    # Enhanced analysis with inference (if available)
     if inference_fn:
+        prompt = f"""[Return valid JSON]: Analyze this resume against the job description: {job_desc or "None"}.
+        Based on scores: {scores}, resume sample: {resume_text[:200]}, and job keywords: {list(job_keywords)[:5]},
+        provide:
+        - "strengths": 2 specific strengths (e.g., 'Lists 3+ years of Python experience'),
+        - "improvements": 3 actionable improvements (e.g., 'Add "AWS" to skills section'),
+        - "missing_skills": 3 skills missing from resume but in job desc (or inferred if no job desc).
+        Return valid JSON only."""
         try:
             result = inference_fn(prompt)
             if result and result.strip():
                 enhanced_analysis = json.loads(result)
                 return (
+                    resume_text[:5000],
                     {
                         "score": {"total": total_score, "breakdown": scores},
                         "analysis": enhanced_analysis,
                 )
         except Exception as e:
             print(f"Inference error: {str(e)}")
     return (
+        resume_text[:5000],
         {
             "score": {"total": total_score, "breakdown": scores},
             "analysis": basic_analysis,
 with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     with gr.Sidebar():
         gr.Markdown("# Resume Analyzer")
+        gr.Markdown("Upload your resume in PDF format and optionally provide a job description.")
     with gr.Row():
         with gr.Column(scale=1):