Spaces:

Mangesh223
/

DefendModel

Sleeping

App Files Files Community

Mangesh223 commited on Mar 27, 2025

Commit

d742c72

verified ·

1 Parent(s): dfa143b

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -135

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import io
 import re
 import json
 import os
-import gc
 from huggingface_hub import login
 from dotenv import load_dotenv
@@ -12,165 +11,119 @@ from dotenv import load_dotenv
 load_dotenv()
 login(token=os.getenv("HF_TOKEN"))
-# Skills set for faster lookups
-GENERAL_SKILLS = {
-    'communication', 'problem solving', 'project management',
-    'python', 'sql', 'excel', 'teamwork'
-}
-# Precompiled regex patterns
-YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})')
-ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved)\s+by\s+(\d+%|\$\d+)', re.I)
-TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
 def extract_text_from_pdf(pdf_file):
-    """Extract text from PDF with detailed error handling"""
-    if pdf_file is None:
-        raise ValueError("No PDF file uploaded")
-    # Handle both file path and bytes input
-    if isinstance(pdf_file, str):
-        with open(pdf_file, 'rb') as f:
-            file_bytes = f.read()
-    elif isinstance(pdf_file, bytes):
-        file_bytes = pdf_file
-    else:
-        raise TypeError(f"Expected file path or bytes, got {type(pdf_file)}")
     try:
         pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
-        if len(pdf_reader.pages) == 0:
-            raise ValueError("PDF has no pages")
-        text = "\n".join(page.extract_text() for page in pdf_reader.pages)
-        if text is None or text.strip() == "":
-            raise ValueError("No text extracted from PDF (possibly image-based or empty)")
-        return text[:10000]  # Limit to first 10,000 characters
-    except PyPDF2.errors.PdfReadError as e:
-        raise Exception(f"PDF read error: {str(e)}")
     except Exception as e:
-        raise Exception(f"Extraction error: {str(e)}")
-    finally:
-        gc.collect()
-def calculate_scores(resume_text, job_desc=None):
-    """Optimized scoring function"""
-    resume_lower = resume_text.lower()
-    scores = {
-        "relevance_to_job": 0,
-        "experience_quality": 0,
-        "skills_match": 0,
-        "education": 0,
-        "achievements": 0,
-        "clarity": 10 - min(8, len(TYPO_PATTERN.findall(resume_text))),
-        "customization": 0
-    }
-    if job_desc:
-        job_words = set(re.findall(r'\w+', job_desc.lower()))
-        resume_words = set(re.findall(r'\w+', resume_lower))
-        scores["relevance_to_job"] = min(20, int(20 * len(job_words & resume_words) / len(job_words)))
-    else:
-        scores["relevance_to_job"] = min(10, sum(1 for skill in GENERAL_SKILLS if skill in resume_lower))
-    scores["experience_quality"] = min(10, len(YEAR_PATTERN.findall(resume_text)))
-    scores["experience_quality"] += min(10, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 2)
-    if 'phd' in resume_lower or 'doctorate' in resume_lower:
-        scores["education"] = 8
-    elif 'master' in resume_lower or 'msc' in resume_lower or 'mba' in resume_lower:
-        scores["education"] = 6
-    elif 'bachelor' in resume_lower or ' bs ' in resume_lower or ' ba ' in resume_lower:
-        scores["education"] = 4
-    elif 'high school' in resume_lower:
-        scores["education"] = 2
-    return scores, min(100, sum(scores.values()))
 def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
-    """Analyze resume and return extracted text and analysis as separate outputs"""
     try:
         resume_text = extract_text_from_pdf(pdf_file)
     except Exception as e:
         return (
-            f"Extraction failed: {str(e)}",  # First output for textbox
-            {"error": str(e)}  # Second output for JSON
         )
-    scores, total_score = calculate_scores(resume_text, job_desc)
-    # Basic analysis if inference fails
-    basic_analysis = {
-        "score": {
-            "total": total_score,
-            "breakdown": scores
-        },
-        "strengths": [
-            "Good clarity score" if scores["clarity"] > 7 else None,
-            "Relevant skills" if scores["relevance_to_job"] > 5 else None
-        ],
-        "improvements": [
-            "Add more measurable achievements" if scores["achievements"] < 3 else None,
-            "Include more relevant keywords" if scores["relevance_to_job"] < 5 else None,
-            "Check for typos" if scores["clarity"] < 9 else None
-        ],
-        "missing_skills": list(GENERAL_SKILLS - set(re.findall(r'\w+', resume_text.lower())))[:2]
-    }
-    # Try to get enhanced analysis if inference function is available
-    if inference_fn:
-        prompt = f"""[Return valid JSON]: Based on these scores: {scores}, provide:
-        - "strengths": 2 key strengths,
-        - "improvements": 3 specific improvements,
-        - "missing_skills": 2 missing skills (use job description if provided: {job_desc or "None"}).
-        Output a valid JSON string only, no extra text."""
-        try:
             result = inference_fn(prompt)
-            if result and result.strip():
-                enhanced_analysis = json.loads(result)
-                return (
-                    resume_text[:5000],  # First output for textbox (limited to 5000 chars)
-                    {
-                        "score": {"total": total_score, "breakdown": scores},
-                        "analysis": enhanced_analysis,
-                        "raw_text_sample": resume_text[:200]
-                    }
-                )
-        except Exception as e:
-            print(f"Inference error: {str(e)}")
-            # Fall through to basic analysis
     return (
-        resume_text[:5000],  # First output for textbox
         {
-            "score": {"total": total_score, "breakdown": scores},
-            "analysis": basic_analysis,
-            "raw_text_sample": resume_text[:200]
         }
     )
-# --- Gradio Interface --- #
-with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
-    with gr.Sidebar():
-        gr.Markdown("# Resume Analyzer")
-        gr.Markdown("Upload your resume in PDF format for analysis")
     with gr.Row():
-        with gr.Column(scale=1):
-            pdf_input = gr.File(label="PDF Resume", type="binary")
-            job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
-            submit_btn = gr.Button("Analyze")
-        with gr.Column(scale=2):
-            extracted_text = gr.Textbox(label="Extracted Text", lines=10, interactive=False)
-            analysis_output = gr.JSON(label="Analysis Results")
-    submit_btn.click(
         fn=analyze_resume,
         inputs=[pdf_input, job_desc_input],
-        outputs=[extracted_text, analysis_output]
     )
-demo.launch(share=True)

 import re
 import json
 import os
 from huggingface_hub import login
 from dotenv import load_dotenv
 load_dotenv()
 login(token=os.getenv("HF_TOKEN"))
 def extract_text_from_pdf(pdf_file):
+    """Improved PDF text extraction with error handling"""
     try:
+        if isinstance(pdf_file, bytes):
+            file_bytes = pdf_file
+        else:
+            raise ValueError("Invalid file format")
         pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
+        text = "\n".join(page.extract_text() for page in pdf_reader.pages if page.extract_text())
+        return text[:15000]  # Increased character limit
     except Exception as e:
+        raise Exception(f"PDF processing error: {str(e)}")
+def generate_ai_prompt(resume_text, job_desc=None):
+    """Generates smart analysis prompt for AI"""
+    return f"""
+    Analyze this resume comprehensively:
+    {resume_text[:10000]}
+    {f"Compare against this job description: {job_desc[:2000]}" if job_desc else ""}
+    Return JSON with:
+    {{
+      "score": 0-100 (overall quality),
+      "score_breakdown": {{
+        "skills": 0-25 (variety and relevance),
+        "experience": 0-20 (duration and roles),
+        "achievements": 0-20 (quantifiable impact),
+        "education": 0-15,
+        "clarity": 0-10 (readability and structure),
+        "customization": 0-10 (job fit if JD provided)
+      }},
+      "detected_skills": ["list", "of", "skills", "with", "variants"],
+      "strengths": ["list", "of", "2-3", "key", "strengths"],
+      "improvements": ["3-5", "specific", "actionable", "suggestions"],
+      "missing_keywords": ["important", "missing", "terms"] {if job_desc else ""}
+    }}
+    """
 def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
+    """Main analysis function with AI integration"""
     try:
         resume_text = extract_text_from_pdf(pdf_file)
     except Exception as e:
         return (
+            f"Error: {str(e)}",
+            {"error": str(e)}
         )
+    # Generate AI-powered analysis
+    prompt = generate_ai_prompt(resume_text, job_desc)
+    try:
+        if inference_fn:
             result = inference_fn(prompt)
+            analysis = json.loads(result)
+            # Ensure score calculation
+            if "score" not in analysis:
+                analysis["score"] = min(100, sum(analysis["score_breakdown"].values()))
+            return (
+                resume_text[:5000],
+                {
+                    "analysis": analysis,
+                    "raw_prompt": prompt[:1000] if len(prompt) > 1000 else prompt
+                }
+            )
+    except Exception as e:
+        print(f"AI analysis error: {str(e)}")
+    # Fallback basic analysis
     return (
+        resume_text[:5000],
         {
+            "error": "AI analysis unavailable",
+            "raw_text": resume_text[:1000]
         }
     )
+# --- Modern Gradio Interface --- #
+with gr.Blocks(theme=gr.themes.Soft(), title="AI Resume Analyzer") as demo:
     with gr.Row():
+        with gr.Column():
+            gr.Markdown("## 🚀 Smart Resume Analysis")
+            with gr.Tab("Upload"):
+                pdf_input = gr.File(label="Resume (PDF)", type="binary")
+                job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=5)
+                analyze_btn = gr.Button("Analyze", variant="primary")
+            with gr.Tab("Example"):
+                gr.Examples(
+                    examples=["sample_resume.pdf"],
+                    inputs=pdf_input,
+                    label="Try with sample resume"
+                )
+        with gr.Column():
+            with gr.Tab("Results"):
+                score_gauge = gr.Gauge(label="Overall Score", minimum=0, maximum=100)
+                gr.Markdown("### 🔍 Analysis Breakdown")
+                analysis_output = gr.JSON(label="Details")
+            with gr.Tab("Text Preview"):
+                extracted_text = gr.Textbox(label="Extracted Content", lines=15)
+    analyze_btn.click(
         fn=analyze_resume,
         inputs=[pdf_input, job_desc_input],
+        outputs=[extracted_text, analysis_output],
+        api_name="analyze"
     )
+if __name__ == "__main__":
+    demo.launch(server_port=7860, share=True)