Spaces:

Mangesh223
/

DefendModel

Sleeping

App Files Files Community

DefendModel / app.py

Mangesh223

Update app.py

5e2cda4 verified about 1 year ago

raw

history blame

6.97 kB

	import os
	import gradio as gr
	import PyPDF2
	import docx
	import requests
	import json
	import re
	from typing import Union, Dict, Any

	# Text extraction functions
	def extract_text_from_pdf(file) -> str:
	"""Extract text from PDF file"""
	try:
	pdf_reader = PyPDF2.PdfReader(file)
	return " ".join(page.extract_text() or "" for page in pdf_reader.pages)
	except Exception as e:
	raise ValueError(f"PDF extraction failed: {str(e)}")

	def extract_text_from_docx(file) -> str:
	"""Extract text from Word document"""
	try:
	doc = docx.Document(file)
	return "\n".join(para.text for para in doc.paragraphs if para.text)
	except Exception as e:
	raise ValueError(f"DOCX extraction failed: {str(e)}")

	def process_uploaded_file(file) -> str:
	"""Process uploaded file based on its type"""
	if not file.name:
	raise ValueError("No filename provided")

	filename = file.name.lower()
	if filename.endswith(".pdf"):
	return extract_text_from_pdf(file)
	elif filename.endswith(".docx"):
	return extract_text_from_docx(file)
	raise ValueError("Unsupported file format. Please upload PDF or Word document.")

	def extract_json_from_text(text: str) -> Union[Dict[str, Any], None]:
	"""Extract JSON from text response with robust error handling"""
	try:
	# First try parsing the entire text as JSON
	return json.loads(text)
	except json.JSONDecodeError:
	# If that fails, try to find JSON within the text
	json_match = re.search(r'\{[\s\S]*\}', text)
	if json_match:
	try:
	return json.loads(json_match.group())
	except json.JSONDecodeError:
	pass
	return None

	def analyze_with_huggingface(file, job_description: str) -> Dict[str, Any]:
	"""Analyze resume using Hugging Face API with enhanced error handling"""
	try:
	# Process file and validate inputs
	resume_text = process_uploaded_file(file)
	if not resume_text.strip():
	return {"error": "Extracted resume text is empty"}

	HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
	if not HF_API_KEY:
	return {"error": "Hugging Face API key not configured"}

	# Prepare the prompt with strict JSON instructions
	prompt = """<s>[INST] <<SYS>>
	You are an ATS resume analyzer. Return ONLY valid JSON with this exact structure:
	{
	"analysis": {
	"keywords": {"score": 0-100, "recommendations": []},
	"formatting": {"score": 0-100, "recommendations": []},
	"skills": {"score": 0-100, "recommendations": []},
	"experience": {"score": 0-100, "recommendations": []},
	"education": {"score": 0-100, "recommendations": []}
	},
	"overall_score": 0-100,
	"summary": "Brief overall assessment"
	}
	Important:
	1. Generate actual scores based on content
	2. Provide specific recommendations
	3. Return ONLY the JSON object
	4. No additional text or explanations
	<</SYS>>

	Resume: {resume}
	Job Description: {jd}
	[/INST]""".format(
	resume=resume_text[:3000], # Truncate to prevent token limit issues
	jd=job_description[:1000]
	)

	# Make API request
	response = requests.post(
	"https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3",
	headers={"Authorization": f"Bearer {HF_API_KEY}"},
	json={
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": 800,
	"temperature": 0.3, # Lower for more consistent JSON
	"do_sample": False # Disable randomness
	}
	},
	timeout=45 # Increased timeout
	)

	# Handle API errors
	if response.status_code != 200:
	error_msg = response.json().get("error", "Unknown API error")
	return {"error": f"API request failed: {error_msg}"}

	# Process response
	raw_output = response.json()[0]['generated_text']
	result = extract_json_from_text(raw_output)

	if not result:
	return {
	"error": "Could not extract valid JSON",
	"raw_response": raw_output # Include raw response for debugging
	}

	# Validate JSON structure
	required_keys = {"analysis", "overall_score"}
	if not all(key in result for key in required_keys):
	return {
	"error": "Incomplete analysis in response",
	"partial_response": result
	}

	return result

	except requests.exceptions.RequestException as e:
	return {"error": f"Network error: {str(e)}"}
	except Exception as e:
	return {"error": f"Analysis failed: {str(e)}"}

	# Enhanced Gradio Interface
	with gr.Blocks(title="ATS Resume Analyzer", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""# ATS Resume Analyzer
	Upload your resume and job description to get ATS compatibility analysis""")

	with gr.Row():
	with gr.Column():
	file_input = gr.File(
	label="Upload Resume (PDF or DOCX)",
	file_types=[".pdf", ".docx"],
	type="filepath"
	)
	jd_input = gr.Textbox(
	label="Job Description",
	lines=8,
	placeholder="Paste the job description here..."
	)
	analyze_btn = gr.Button("Analyze", variant="primary")

	with gr.Column():
	output_tabs = gr.Tabs()
	with output_tabs:
	with gr.Tab("Structured Analysis", id="json"):
	json_output = gr.JSON(label="Analysis Results")
	with gr.Tab("Raw Response", id="raw"):
	raw_output = gr.Textbox(label="API Response", interactive=False)
	status = gr.Textbox(label="Status", interactive=False)

	def display_results(file, job_description):
	"""Handle results display with proper error handling"""
	result = analyze_with_huggingface(file, job_description)

	if "error" in result:
	return {
	json_output: None,
	raw_output: result.get("raw_response", str(result)),
	status: f"Error: {result['error']}"
	}

	return {
	json_output: result,
	raw_output: json.dumps(result, indent=2),
	status: "Analysis complete!"
	}

	analyze_btn.click(
	fn=display_results,
	inputs=[file_input, jd_input],
	outputs=[json_output, raw_output, status]
	)

	# ... (all your existing code remains the same until the launch part) ...

	if __name__ == "__main__":
	# Configure queue (choose one option):

	# Option 1: Simple queue
	demo.queue()



	# Then launch
	demo.launch(
	allowed_paths=["*"],

	)