Spaces:

Mangesh223
/

DefendModel

Sleeping

App Files Files Community

Mangesh223 commited on Mar 27, 2025

Commit

aac2ac6

verified ·

1 Parent(s): 06b3165

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -135

app.py CHANGED Viewed

@@ -1,168 +1,149 @@
 import gradio as gr
 import PyPDF2
 import io
-import json
 import os
-import gc
-from huggingface_hub import login
 from dotenv import load_dotenv
-# --- Configuration --- #
-load_dotenv()
-login(token=os.getenv("HF_TOKEN"))
 def extract_text_from_pdf(pdf_file):
-    """Extract raw text from PDF"""
     if pdf_file is None:
         raise ValueError("No PDF file uploaded")
-    if isinstance(pdf_file, str):
-        with open(pdf_file, 'rb') as f:
-            file_bytes = f.read()
-    elif isinstance(pdf_file, bytes):
-        file_bytes = pdf_file
-    else:
-        raise TypeError(f"Expected file path or bytes, got {type(pdf_file)}")
     try:
         pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
-        text = "\n".join(page.extract_text() or "" for page in pdf_reader.pages)
-        if not text.strip():
-            raise ValueError("No text extracted")
-        return text[:10000]  # Limit to avoid overwhelming AI
     except Exception as e:
-        raise Exception(f"Extraction error: {str(e)}")
-    finally:
-        gc.collect()
-# Placeholder inference function to match your example output
-def dummy_inference_fn(prompt):
-    """Temporary inference function - replace with your actual AI model"""
-    # Simulate response based on your resume and job description in prompt
-    resume_text = prompt.split("Analyze this resume: '")[1].split("' against job description")[0]
-    job_desc = prompt.split("against job description: '")[1].split("'")[0]
-    # Default response mimicking your example
-    response = {
-        "score": {
-            "total": 85,
-            "breakdown": {
-                "competency": 25,
-                "experience": 15,
-                "impact": 20,
-                "potential": 5,
-                "leadership": 0,
-                "adaptability": 20
-            }
-        },
-        "analysis": {
-            "strengths": [
-                "Strong frontend skills (React.js, JavaScript, UI components)",
-                "Proven performance impact (e.g., 30% code redundancy reduction, 20% efficiency boost)",
-                "Matches experience requirement (3+ years with relevant projects)"
-            ],
-            "improvements": [
-                "Emphasize UI/UX contributions in projects",
-                "Add leadership or teamwork examples for well-roundedness"
-            ],
-            "missing_skills": [],
-            "flags": []
-        }
-    }
-    # Adjust slightly if no job description
-    if job_desc == "None":
-        response["score"]["adaptability"] = 10
-        response["score"]["total"] = 75
-        response["analysis"]["strengths"] = [
-            "Strong technical skills (MERN stack, blockchain)",
-            "Proven project impact (e.g., 25% session time increase)",
-            "Solid experience (3+ years)"
-        ]
-        response["analysis"]["improvements"] = [
-            "Add leadership or teamwork examples",
-            "Highlight learning initiatives"
-        ]
-    return json.dumps(response)  # Return as JSON string
-# Real inference function example (uncomment and configure if you have a model)
-"""
-from transformers import pipeline
-def inference_fn(prompt):
-    model = pipeline("text-generation", model="gpt2", token=os.getenv("HF_TOKEN"))  # Replace with your model
-    response = model(prompt, max_length=2000, num_return_sequences=1)[0]["generated_text"]
-    start = response.find("[Return valid JSON]:") + len("[Return valid JSON]:")
-    return response[start:].strip()
-"""
-def analyze_resume(pdf_file, job_desc=None, inference_fn=dummy_inference_fn):
-    """Smart ATS relying fully on AI for analysis"""
     try:
         resume_text = extract_text_from_pdf(pdf_file)
-    except Exception as e:
-        return f"Extraction failed: {str(e)}", {"error": str(e)}
-    # Fallback if no inference function
-    if not inference_fn:
-        print("No inference function provided - using fallback")
-        basic_analysis = {
-            "score": {"total": 10, "breakdown": {"competency": 10}},
-            "analysis": {
-                "strengths": ["Resume text extracted"],
-                "improvements": ["Provide a job description for detailed analysis" if not job_desc else "Add more details"],
-                "missing_skills": [],
-                "flags": []
-            },
-            "raw_text_sample": resume_text[:200]
         }
-        return resume_text[:5000], basic_analysis
-    # AI-driven analysis
-    prompt = f"""[Return valid JSON]: You are a smart ATS designed to evaluate resumes without rejecting worthy candidates. Analyze this resume: '{resume_text[:2000]}' against job description: '{job_desc or "None"}'.
-    Provide:
-    - "score": {{total: X (0-100), breakdown: {{competency: X (technical/non-technical skills), experience: X (duration and depth), impact: X (achievements), potential: X (learning ability), leadership: X (influence), adaptability: X (fit to role or general)}}}}
-    - "analysis": {{"strengths": [2-3 items, e.g., "Strong React skills"], "improvements": [2-3 items, e.g., "Add teamwork examples"], "missing_skills": [0-3 items, only if job_desc provided], "flags": [0-2 items, e.g., "High potential candidate"]}}
-    Rules:
-    - Detect skills, experience, achievements, learning signals, and leadership dynamically from the resume text.
-    - If no job description, assess general potential across technical and non-technical domains.
-    - If job description exists, prioritize role-relevant traits but don’t penalize unrelated strengths.
-    - Infer skills (e.g., 'MERN' → 'MongoDB'), normalize variations (e.g., 'React.js' = 'React'), and weigh recent tech (e.g., 'Next.js') over outdated (e.g., 'jQuery').
-    - Focus on potential: Highlight capability even if formatting or keywords don’t perfectly match.
-    - Avoid rejection: Low scores should still come with positive feedback or flags for human review.
-    Return valid JSON only."""
-    try:
-        print("Calling inference_fn with prompt:", prompt[:200])  # Debug
-        result = inference_fn(prompt)
-        print("Inference result:", result)  # Debug
-        if result and result.strip():
-            analysis = json.loads(result)
-            analysis["raw_text_sample"] = resume_text[:200]
-            return resume_text[:5000], analysis
-        else:
-            raise ValueError("Empty AI response")
     except Exception as e:
-        print(f"AI analysis error: {str(e)}")
-        return resume_text[:5000], {
-            "score": {"total": 10, "breakdown": {"competency": 10}},
-            "analysis": {"strengths": ["Text processed"], "improvements": [f"Analysis failed: {str(e)}"], "missing_skills": [], "flags": []},
-            "raw_text_sample": resume_text[:200]
         }
-# --- Gradio Interface --- #
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Row():
         with gr.Column(scale=1):
-            pdf_input = gr.File(label="PDF Resume", type="binary")
             job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
-            submit_btn = gr.Button("Analyze")
         with gr.Column(scale=2):
-            extracted_text = gr.Textbox(label="Extracted Text", lines=10, interactive=False)
-            analysis_output = gr.JSON(label="Analysis Results")
-    submit_btn.click(
-        fn=analyze_resume,
         inputs=[pdf_input, job_desc_input],
         outputs=[extracted_text, analysis_output]
     )

 import gradio as gr
 import PyPDF2
 import io
 import os
 from dotenv import load_dotenv
 def extract_text_from_pdf(pdf_file):
+    """
+    Robust PDF text extraction with comprehensive error handling
+    Args:
+        pdf_file (str/bytes): PDF file path or bytes
+    Returns:
+        str: Extracted text from PDF
+    """
     if pdf_file is None:
         raise ValueError("No PDF file uploaded")
     try:
+        # Handle different input types
+        if isinstance(pdf_file, str):
+            with open(pdf_file, 'rb') as f:
+                file_bytes = f.read()
+        elif isinstance(pdf_file, bytes):
+            file_bytes = pdf_file
+        else:
+            raise TypeError(f"Unsupported file type: {type(pdf_file)}")
+        # Advanced PDF text extraction
         pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
+        # Extract text from all pages, handle potential encoding issues
+        pages_text = []
+        for page in pdf_reader.pages:
+            try:
+                page_text = page.extract_text() or ""
+                pages_text.append(page_text.strip())
+            except Exception as page_error:
+                print(f"Error extracting page text: {page_error}")
+        # Join pages, handle empty extraction
+        full_text = "\n".join(pages_text)
+        if not full_text.strip():
+            raise ValueError("No text could be extracted from the PDF")
+        # Limit text to prevent overwhelming AI
+        return full_text[:15000]  # Increased limit for more comprehensive analysis
     except Exception as e:
+        raise ValueError(f"PDF Extraction Error: {str(e)}")
+def prepare_resume_prompt(resume_text, job_description=None):
+    """
+    Prepare a structured, clear prompt for AI analysis
+    Args:
+        resume_text (str): Extracted resume text
+        job_description (str, optional): Job description for context
+    Returns:
+        str: Formatted prompt for AI analysis
+    """
+    prompt = f"""Professional Resume Analysis:
+Resume Content:
+{resume_text[:10000]}
+{'Job Description: ' + job_description if job_description else 'No specific job description provided'}
+Instructions for Analysis:
+1. Perform a comprehensive assessment of the resume
+2. Evaluate professional skills, experience, and potential
+3. Provide a structured JSON response with:
+   - Overall Score (0-100)
+   - Skill Match Percentage
+   - Key Strengths
+   - Areas for Improvement
+   - Potential Red Flags
+   - Recommended Next Steps
+Output Format (JSON):
+{{
+  "total_score": int,
+  "skill_match_percentage": int,
+  "strengths": [str],
+  "improvements": [str],
+  "red_flags": [str],
+  "recommended_actions": [str]
+}}"""
+    return prompt
+def analyze_resume(pdf_file, job_description=None):
+    """
+    Main resume analysis function
+    Args:
+        pdf_file (bytes): Uploaded PDF file
+        job_description (str, optional): Job description for context
+    Returns:
+        tuple: Extracted text and AI analysis
+    """
     try:
+        # Extract text from PDF
         resume_text = extract_text_from_pdf(pdf_file)
+        # Prepare prompt for AI
+        ai_prompt = prepare_resume_prompt(resume_text, job_description)
+        # Note: Replace this with actual Mistral-7B inference
+        # This is a placeholder - you'll need to integrate your actual AI model
+        print("AI Prompt Prepared. Replace this with actual model inference.")
+        return resume_text, {
+            "total_score": 75,
+            "skill_match_percentage": 80,
+            "strengths": ["Robust text extraction", "Structured prompt generation"],
+            "improvements": ["Integrate actual AI model inference"],
+            "red_flags": [],
+            "recommended_actions": ["Connect Mistral-7B model"]
         }
     except Exception as e:
+        return str(e), {
+            "error": str(e),
+            "total_score": 0,
+            "skill_match_percentage": 0
         }
+# Gradio Interface
+with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column(scale=1):
+            pdf_input = gr.File(label="Upload Resume PDF", type="binary")
             job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
+            analyze_btn = gr.Button("Analyze Resume")
         with gr.Column(scale=2):
+            extracted_text = gr.Textbox(label="Extracted Text", lines=10)
+            analysis_output = gr.JSON(label="AI Analysis")
+    analyze_btn.click(
+        fn=analyze_resume,
         inputs=[pdf_input, job_desc_input],
         outputs=[extracted_text, analysis_output]
     )