DefendModel / app.py
Mangesh223's picture
Update app.py
5e2cda4 verified
raw
history blame
6.97 kB
import os
import gradio as gr
import PyPDF2
import docx
import requests
import json
import re
from typing import Union, Dict, Any
# Text extraction functions
def extract_text_from_pdf(file) -> str:
"""Extract text from PDF file"""
try:
pdf_reader = PyPDF2.PdfReader(file)
return " ".join(page.extract_text() or "" for page in pdf_reader.pages)
except Exception as e:
raise ValueError(f"PDF extraction failed: {str(e)}")
def extract_text_from_docx(file) -> str:
"""Extract text from Word document"""
try:
doc = docx.Document(file)
return "\n".join(para.text for para in doc.paragraphs if para.text)
except Exception as e:
raise ValueError(f"DOCX extraction failed: {str(e)}")
def process_uploaded_file(file) -> str:
"""Process uploaded file based on its type"""
if not file.name:
raise ValueError("No filename provided")
filename = file.name.lower()
if filename.endswith(".pdf"):
return extract_text_from_pdf(file)
elif filename.endswith(".docx"):
return extract_text_from_docx(file)
raise ValueError("Unsupported file format. Please upload PDF or Word document.")
def extract_json_from_text(text: str) -> Union[Dict[str, Any], None]:
"""Extract JSON from text response with robust error handling"""
try:
# First try parsing the entire text as JSON
return json.loads(text)
except json.JSONDecodeError:
# If that fails, try to find JSON within the text
json_match = re.search(r'\{[\s\S]*\}', text)
if json_match:
try:
return json.loads(json_match.group())
except json.JSONDecodeError:
pass
return None
def analyze_with_huggingface(file, job_description: str) -> Dict[str, Any]:
"""Analyze resume using Hugging Face API with enhanced error handling"""
try:
# Process file and validate inputs
resume_text = process_uploaded_file(file)
if not resume_text.strip():
return {"error": "Extracted resume text is empty"}
HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
if not HF_API_KEY:
return {"error": "Hugging Face API key not configured"}
# Prepare the prompt with strict JSON instructions
prompt = """<s>[INST] <<SYS>>
You are an ATS resume analyzer. Return ONLY valid JSON with this exact structure:
{
"analysis": {
"keywords": {"score": 0-100, "recommendations": []},
"formatting": {"score": 0-100, "recommendations": []},
"skills": {"score": 0-100, "recommendations": []},
"experience": {"score": 0-100, "recommendations": []},
"education": {"score": 0-100, "recommendations": []}
},
"overall_score": 0-100,
"summary": "Brief overall assessment"
}
Important:
1. Generate actual scores based on content
2. Provide specific recommendations
3. Return ONLY the JSON object
4. No additional text or explanations
<</SYS>>
Resume: {resume}
Job Description: {jd}
[/INST]""".format(
resume=resume_text[:3000], # Truncate to prevent token limit issues
jd=job_description[:1000]
)
# Make API request
response = requests.post(
"https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3",
headers={"Authorization": f"Bearer {HF_API_KEY}"},
json={
"inputs": prompt,
"parameters": {
"max_new_tokens": 800,
"temperature": 0.3, # Lower for more consistent JSON
"do_sample": False # Disable randomness
}
},
timeout=45 # Increased timeout
)
# Handle API errors
if response.status_code != 200:
error_msg = response.json().get("error", "Unknown API error")
return {"error": f"API request failed: {error_msg}"}
# Process response
raw_output = response.json()[0]['generated_text']
result = extract_json_from_text(raw_output)
if not result:
return {
"error": "Could not extract valid JSON",
"raw_response": raw_output # Include raw response for debugging
}
# Validate JSON structure
required_keys = {"analysis", "overall_score"}
if not all(key in result for key in required_keys):
return {
"error": "Incomplete analysis in response",
"partial_response": result
}
return result
except requests.exceptions.RequestException as e:
return {"error": f"Network error: {str(e)}"}
except Exception as e:
return {"error": f"Analysis failed: {str(e)}"}
# Enhanced Gradio Interface
with gr.Blocks(title="ATS Resume Analyzer", theme=gr.themes.Soft()) as demo:
gr.Markdown("""# ATS Resume Analyzer
Upload your resume and job description to get ATS compatibility analysis""")
with gr.Row():
with gr.Column():
file_input = gr.File(
label="Upload Resume (PDF or DOCX)",
file_types=[".pdf", ".docx"],
type="filepath"
)
jd_input = gr.Textbox(
label="Job Description",
lines=8,
placeholder="Paste the job description here..."
)
analyze_btn = gr.Button("Analyze", variant="primary")
with gr.Column():
output_tabs = gr.Tabs()
with output_tabs:
with gr.Tab("Structured Analysis", id="json"):
json_output = gr.JSON(label="Analysis Results")
with gr.Tab("Raw Response", id="raw"):
raw_output = gr.Textbox(label="API Response", interactive=False)
status = gr.Textbox(label="Status", interactive=False)
def display_results(file, job_description):
"""Handle results display with proper error handling"""
result = analyze_with_huggingface(file, job_description)
if "error" in result:
return {
json_output: None,
raw_output: result.get("raw_response", str(result)),
status: f"Error: {result['error']}"
}
return {
json_output: result,
raw_output: json.dumps(result, indent=2),
status: "Analysis complete!"
}
analyze_btn.click(
fn=display_results,
inputs=[file_input, jd_input],
outputs=[json_output, raw_output, status]
)
# ... (all your existing code remains the same until the launch part) ...
if __name__ == "__main__":
# Configure queue (choose one option):
# Option 1: Simple queue
demo.queue()
# Then launch
demo.launch(
allowed_paths=["*"],
)