Spaces:

Mangesh223
/

DefendModel

Sleeping

File size: 7,676 Bytes

import os
import gradio as gr
import PyPDF2
import docx
import requests
import json
import re
from typing import Union, Dict, Any

# Text extraction functions
def extract_text_from_pdf(file) -> str:
    """Extract text from PDF file"""
    try:
        pdf_reader = PyPDF2.PdfReader(file)
        return " ".join(page.extract_text() or "" for page in pdf_reader.pages)
    except Exception as e:
        raise ValueError(f"PDF extraction failed: {str(e)}")

def extract_text_from_docx(file) -> str:
    """Extract text from Word document"""
    try:
        doc = docx.Document(file)
        return "\n".join(para.text for para in doc.paragraphs if para.text)
    except Exception as e:
        raise ValueError(f"DOCX extraction failed: {str(e)}")

def process_uploaded_file(file) -> str:
    """Process uploaded file based on its type"""
    if not file.name:
        raise ValueError("No filename provided")
    
    filename = file.name.lower()
    if filename.endswith(".pdf"):
        return extract_text_from_pdf(file)
    elif filename.endswith(".docx"):
        return extract_text_from_docx(file)
    raise ValueError("Unsupported file format. Please upload PDF or Word document.")

def extract_json_from_text(text: str) -> Union[Dict[str, Any], None]:
    """Extract JSON from text response with robust error handling"""
    try:
        # First try parsing the entire text as JSON
        return json.loads(text)
    except json.JSONDecodeError:
        # If that fails, try to find JSON within the text
        json_match = re.search(r'\{[\s\S]*\}', text)
        if json_match:
            try:
                return json.loads(json_match.group())
            except json.JSONDecodeError:
                pass
    return None

def analyze_with_huggingface(file, job_description: str) -> Dict[str, Any]:
    """Analyze resume using Hugging Face API with enhanced error handling"""
    try:
        # Process file and validate inputs
        resume_text = process_uploaded_file(file)
        if not resume_text.strip():
            return {"error": "Extracted resume text is empty"}
        
        HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
        if not HF_API_KEY:
            return {"error": "Hugging Face API key not configured"}
        
        # Prepare the prompt with strict JSON instructions
        prompt = """<s>[INST] <<SYS>>
You are an ATS resume analyzer. Return ONLY valid JSON with this exact structure:
{
  "analysis": {
    "keywords": {"score": 0-100, "recommendations": []},
    "formatting": {"score": 0-100, "recommendations": []},
    "skills": {"score": 0-100, "recommendations": []},
    "experience": {"score": 0-100, "recommendations": []},
    "education": {"score": 0-100, "recommendations": []}
  },
  "overall_score": 0-100,
  "summary": "Brief overall assessment"
}
Important:
1. Generate actual scores based on content
2. Provide specific recommendations
3. Return ONLY the JSON object
4. No additional text or explanations
<</SYS>>

Resume: {resume}
Job Description: {jd}
[/INST]""".format(
            resume=resume_text[:3000],
            jd=job_description[:1000]
        )

        # Make API request
        response = requests.post(
            "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3",
            headers={"Authorization": f"Bearer {HF_API_KEY}"},
            json={
                "inputs": prompt,
                "parameters": {
                    "max_new_tokens": 800,
                    "temperature": 0.3,
                    "do_sample": False
                }
            },
            timeout=45
        )

        # Handle API errors
        if response.status_code != 200:
            error_msg = response.json().get("error", "Unknown API error")
            return {
                "error": f"API request failed: {error_msg}",
                "api_response": response.text
            }

        # Process response
        raw_output = response.json()[0]['generated_text']
        
        # Try to extract JSON
        result = extract_json_from_text(raw_output)
        
        if result:
            # Validate JSON structure
            required_keys = {"analysis", "overall_score"}
            if all(key in result for key in required_keys):
                return {
                    "structured_result": result,
                    "raw_response": raw_output
                }
        
        # If JSON extraction failed, return raw output
        return {
            "raw_response": raw_output,
            "error": "Could not extract valid JSON"
        }

    except requests.exceptions.RequestException as e:
        return {"error": f"Network error: {str(e)}"}
    except Exception as e:
        return {"error": f"Analysis failed: {str(e)}"}

# Enhanced Gradio Interface
with gr.Blocks(title="ATS Resume Analyzer", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""# ATS Resume Analyzer""")
    
    with gr.Row():
        with gr.Column():
            file_input = gr.File(
                label="Upload Resume (PDF or DOCX)",
                file_types=[".pdf", ".docx"],
                type="filepath"
            )
            jd_input = gr.Textbox(
                label="Job Description", 
                lines=8,
                placeholder="Paste the job description here..."
            )
            analyze_btn = gr.Button("Analyze", variant="primary")
        
        with gr.Column():
            output_tabs = gr.Tabs()
            with output_tabs:
                with gr.Tab("Analysis Results"):
                    json_output = gr.JSON(label="Structured Analysis")
                    summary_output = gr.Textbox(label="Summary", interactive=False)
                with gr.Tab("API Response"):
                    raw_output = gr.Textbox(label="Raw API Response", lines=10)
                with gr.Tab("Debug Info"):
                    status_output = gr.Textbox(label="Status Info", lines=5)
            status = gr.Textbox(label="Status", interactive=False)

    def display_results(file, job_description):
        """Handle results display with proper error handling"""
        result = analyze_with_huggingface(file, job_description)
        
        output = {
            json_output: None,
            summary_output: None,
            raw_output: None,
            status_output: None,
            status: ""
        }
        
        if "error" in result:
            output.update({
                status: f"Error: {result['error']}",
                status_output: str(result),
                raw_output: result.get("api_response") or result.get("raw_response") or ""
            })
        elif "structured_result" in result:
            structured = result["structured_result"]
            output.update({
                json_output: structured["analysis"],
                summary_output: structured.get("summary", ""),
                raw_output: result["raw_response"],
                status: "Analysis complete!",
                status_output: "Successfully parsed JSON response"
            })
        else:
            output.update({
                raw_output: result.get("raw_response", "No response received"),
                status: "Received non-JSON response",
                status_output: "The API returned a response but it couldn't be parsed as JSON"
            })
        
        return output

    analyze_btn.click(
        fn=display_results,
        inputs=[file_input, jd_input],
        outputs=[json_output, summary_output, raw_output, status_output, status]
    )

if __name__ == "__main__":
    demo.queue().launch(
        allowed_paths=["*"],
        
    )