DefendModel / app.py
Mangesh223's picture
Update app.py
6ff26ad verified
import os
import gradio as gr
import PyPDF2
import docx
import requests
import json
import re
from typing import Union, Dict, Any
# Text extraction functions
def extract_text_from_pdf(file) -> str:
"""Extract text from PDF file"""
try:
pdf_reader = PyPDF2.PdfReader(file)
return " ".join(page.extract_text() or "" for page in pdf_reader.pages)
except Exception as e:
raise ValueError(f"PDF extraction failed: {str(e)}")
def extract_text_from_docx(file) -> str:
"""Extract text from Word document"""
try:
doc = docx.Document(file)
return "\n".join(para.text for para in doc.paragraphs if para.text)
except Exception as e:
raise ValueError(f"DOCX extraction failed: {str(e)}")
def process_uploaded_file(file) -> str:
"""Process uploaded file based on its type"""
if not file.name:
raise ValueError("No filename provided")
filename = file.name.lower()
if filename.endswith(".pdf"):
return extract_text_from_pdf(file)
elif filename.endswith(".docx"):
return extract_text_from_docx(file)
raise ValueError("Unsupported file format. Please upload PDF or Word document.")
def extract_json_from_text(text: str) -> Union[Dict[str, Any], None]:
"""Extract JSON from text response with robust error handling"""
try:
# First try parsing the entire text as JSON
return json.loads(text)
except json.JSONDecodeError:
# If that fails, try to find JSON within the text
json_match = re.search(r'\{[\s\S]*\}', text)
if json_match:
try:
return json.loads(json_match.group())
except json.JSONDecodeError:
pass
return None
def analyze_with_huggingface(file, job_description: str) -> Dict[str, Any]:
"""Analyze resume using Hugging Face API with enhanced error handling"""
try:
# Process file and validate inputs
resume_text = process_uploaded_file(file)
if not resume_text.strip():
return {"error": "Extracted resume text is empty"}
HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
if not HF_API_KEY:
return {"error": "Hugging Face API key not configured"}
# Prepare the prompt with strict JSON instructions
prompt = """<s>[INST] <<SYS>>
You are an ATS resume analyzer. Return ONLY valid JSON with this exact structure:
{
"analysis": {
"keywords": {"score": 0-100, "recommendations": []},
"formatting": {"score": 0-100, "recommendations": []},
"skills": {"score": 0-100, "recommendations": []},
"experience": {"score": 0-100, "recommendations": []},
"education": {"score": 0-100, "recommendations": []}
},
"overall_score": 0-100,
"summary": "Brief overall assessment"
}
Important:
1. Generate actual scores based on content
2. Provide specific recommendations
3. Return ONLY the JSON object
4. No additional text or explanations
<</SYS>>
Resume: {resume}
Job Description: {jd}
[/INST]""".format(
resume=resume_text[:3000],
jd=job_description[:1000]
)
# Make API request
response = requests.post(
"https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3",
headers={"Authorization": f"Bearer {HF_API_KEY}"},
json={
"inputs": prompt,
"parameters": {
"max_new_tokens": 800,
"temperature": 0.3,
"do_sample": False
}
},
timeout=45
)
# Handle API errors
if response.status_code != 200:
error_msg = response.json().get("error", "Unknown API error")
return {
"error": f"API request failed: {error_msg}",
"api_response": response.text
}
# Process response
raw_output = response.json()[0]['generated_text']
# Try to extract JSON
result = extract_json_from_text(raw_output)
if result:
# Validate JSON structure
required_keys = {"analysis", "overall_score"}
if all(key in result for key in required_keys):
return {
"structured_result": result,
"raw_response": raw_output
}
# If JSON extraction failed, return raw output
return {
"raw_response": raw_output,
"error": "Could not extract valid JSON"
}
except requests.exceptions.RequestException as e:
return {"error": f"Network error: {str(e)}"}
except Exception as e:
return {"error": f"Analysis failed: {str(e)}"}
# Enhanced Gradio Interface
with gr.Blocks(title="ATS Resume Analyzer", theme=gr.themes.Soft()) as demo:
gr.Markdown("""# ATS Resume Analyzer""")
with gr.Row():
with gr.Column():
file_input = gr.File(
label="Upload Resume (PDF or DOCX)",
file_types=[".pdf", ".docx"],
type="filepath"
)
jd_input = gr.Textbox(
label="Job Description",
lines=8,
placeholder="Paste the job description here..."
)
analyze_btn = gr.Button("Analyze", variant="primary")
with gr.Column():
output_tabs = gr.Tabs()
with output_tabs:
with gr.Tab("Analysis Results"):
json_output = gr.JSON(label="Structured Analysis")
summary_output = gr.Textbox(label="Summary", interactive=False)
with gr.Tab("API Response"):
raw_output = gr.Textbox(label="Raw API Response", lines=10)
with gr.Tab("Debug Info"):
status_output = gr.Textbox(label="Status Info", lines=5)
status = gr.Textbox(label="Status", interactive=False)
def display_results(file, job_description):
"""Handle results display with proper error handling"""
result = analyze_with_huggingface(file, job_description)
output = {
json_output: None,
summary_output: None,
raw_output: None,
status_output: None,
status: ""
}
if "error" in result:
output.update({
status: f"Error: {result['error']}",
status_output: str(result),
raw_output: result.get("api_response") or result.get("raw_response") or ""
})
elif "structured_result" in result:
structured = result["structured_result"]
output.update({
json_output: structured["analysis"],
summary_output: structured.get("summary", ""),
raw_output: result["raw_response"],
status: "Analysis complete!",
status_output: "Successfully parsed JSON response"
})
else:
output.update({
raw_output: result.get("raw_response", "No response received"),
status: "Received non-JSON response",
status_output: "The API returned a response but it couldn't be parsed as JSON"
})
return output
analyze_btn.click(
fn=display_results,
inputs=[file_input, jd_input],
outputs=[json_output, summary_output, raw_output, status_output, status]
)
if __name__ == "__main__":
demo.queue().launch(
allowed_paths=["*"],
)