Spaces:
Sleeping
Sleeping
File size: 7,676 Bytes
123646a b5f8089 123646a b5f8089 123646a b5f8089 123646a b5f8089 123646a b5f8089 123646a b5f8089 123646a b5f8089 123646a b5f8089 3027706 b5f8089 3027706 b5f8089 123646a b5f8089 3027706 b5f8089 3027706 b5f8089 3027706 b5f8089 3027706 b5f8089 3027706 123646a 3027706 b5f8089 6ff26ad b5f8089 3027706 b5f8089 3027706 b5f8089 3027706 b5f8089 6ff26ad b5f8089 3027706 6ff26ad 3027706 b5f8089 6ff26ad b5f8089 6ff26ad b5f8089 123646a 6ff26ad 123646a 6ff26ad b5f8089 123646a b5f8089 123646a b5f8089 6ff26ad 123646a b5f8089 6ff26ad b5f8089 6ff26ad b5f8089 6ff26ad b5f8089 6ff26ad b5f8089 3027706 b5f8089 3027706 6ff26ad 123646a b5f8089 6ff26ad b5f8089 6ff26ad b5f8089 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 | import os
import gradio as gr
import PyPDF2
import docx
import requests
import json
import re
from typing import Union, Dict, Any
# Text extraction functions
def extract_text_from_pdf(file) -> str:
"""Extract text from PDF file"""
try:
pdf_reader = PyPDF2.PdfReader(file)
return " ".join(page.extract_text() or "" for page in pdf_reader.pages)
except Exception as e:
raise ValueError(f"PDF extraction failed: {str(e)}")
def extract_text_from_docx(file) -> str:
"""Extract text from Word document"""
try:
doc = docx.Document(file)
return "\n".join(para.text for para in doc.paragraphs if para.text)
except Exception as e:
raise ValueError(f"DOCX extraction failed: {str(e)}")
def process_uploaded_file(file) -> str:
"""Process uploaded file based on its type"""
if not file.name:
raise ValueError("No filename provided")
filename = file.name.lower()
if filename.endswith(".pdf"):
return extract_text_from_pdf(file)
elif filename.endswith(".docx"):
return extract_text_from_docx(file)
raise ValueError("Unsupported file format. Please upload PDF or Word document.")
def extract_json_from_text(text: str) -> Union[Dict[str, Any], None]:
"""Extract JSON from text response with robust error handling"""
try:
# First try parsing the entire text as JSON
return json.loads(text)
except json.JSONDecodeError:
# If that fails, try to find JSON within the text
json_match = re.search(r'\{[\s\S]*\}', text)
if json_match:
try:
return json.loads(json_match.group())
except json.JSONDecodeError:
pass
return None
def analyze_with_huggingface(file, job_description: str) -> Dict[str, Any]:
"""Analyze resume using Hugging Face API with enhanced error handling"""
try:
# Process file and validate inputs
resume_text = process_uploaded_file(file)
if not resume_text.strip():
return {"error": "Extracted resume text is empty"}
HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
if not HF_API_KEY:
return {"error": "Hugging Face API key not configured"}
# Prepare the prompt with strict JSON instructions
prompt = """<s>[INST] <<SYS>>
You are an ATS resume analyzer. Return ONLY valid JSON with this exact structure:
{
"analysis": {
"keywords": {"score": 0-100, "recommendations": []},
"formatting": {"score": 0-100, "recommendations": []},
"skills": {"score": 0-100, "recommendations": []},
"experience": {"score": 0-100, "recommendations": []},
"education": {"score": 0-100, "recommendations": []}
},
"overall_score": 0-100,
"summary": "Brief overall assessment"
}
Important:
1. Generate actual scores based on content
2. Provide specific recommendations
3. Return ONLY the JSON object
4. No additional text or explanations
<</SYS>>
Resume: {resume}
Job Description: {jd}
[/INST]""".format(
resume=resume_text[:3000],
jd=job_description[:1000]
)
# Make API request
response = requests.post(
"https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3",
headers={"Authorization": f"Bearer {HF_API_KEY}"},
json={
"inputs": prompt,
"parameters": {
"max_new_tokens": 800,
"temperature": 0.3,
"do_sample": False
}
},
timeout=45
)
# Handle API errors
if response.status_code != 200:
error_msg = response.json().get("error", "Unknown API error")
return {
"error": f"API request failed: {error_msg}",
"api_response": response.text
}
# Process response
raw_output = response.json()[0]['generated_text']
# Try to extract JSON
result = extract_json_from_text(raw_output)
if result:
# Validate JSON structure
required_keys = {"analysis", "overall_score"}
if all(key in result for key in required_keys):
return {
"structured_result": result,
"raw_response": raw_output
}
# If JSON extraction failed, return raw output
return {
"raw_response": raw_output,
"error": "Could not extract valid JSON"
}
except requests.exceptions.RequestException as e:
return {"error": f"Network error: {str(e)}"}
except Exception as e:
return {"error": f"Analysis failed: {str(e)}"}
# Enhanced Gradio Interface
with gr.Blocks(title="ATS Resume Analyzer", theme=gr.themes.Soft()) as demo:
gr.Markdown("""# ATS Resume Analyzer""")
with gr.Row():
with gr.Column():
file_input = gr.File(
label="Upload Resume (PDF or DOCX)",
file_types=[".pdf", ".docx"],
type="filepath"
)
jd_input = gr.Textbox(
label="Job Description",
lines=8,
placeholder="Paste the job description here..."
)
analyze_btn = gr.Button("Analyze", variant="primary")
with gr.Column():
output_tabs = gr.Tabs()
with output_tabs:
with gr.Tab("Analysis Results"):
json_output = gr.JSON(label="Structured Analysis")
summary_output = gr.Textbox(label="Summary", interactive=False)
with gr.Tab("API Response"):
raw_output = gr.Textbox(label="Raw API Response", lines=10)
with gr.Tab("Debug Info"):
status_output = gr.Textbox(label="Status Info", lines=5)
status = gr.Textbox(label="Status", interactive=False)
def display_results(file, job_description):
"""Handle results display with proper error handling"""
result = analyze_with_huggingface(file, job_description)
output = {
json_output: None,
summary_output: None,
raw_output: None,
status_output: None,
status: ""
}
if "error" in result:
output.update({
status: f"Error: {result['error']}",
status_output: str(result),
raw_output: result.get("api_response") or result.get("raw_response") or ""
})
elif "structured_result" in result:
structured = result["structured_result"]
output.update({
json_output: structured["analysis"],
summary_output: structured.get("summary", ""),
raw_output: result["raw_response"],
status: "Analysis complete!",
status_output: "Successfully parsed JSON response"
})
else:
output.update({
raw_output: result.get("raw_response", "No response received"),
status: "Received non-JSON response",
status_output: "The API returned a response but it couldn't be parsed as JSON"
})
return output
analyze_btn.click(
fn=display_results,
inputs=[file_input, jd_input],
outputs=[json_output, summary_output, raw_output, status_output, status]
)
if __name__ == "__main__":
demo.queue().launch(
allowed_paths=["*"],
) |