Spaces:

DreamStream-1
/

HR-For-Management-Roles

Build error

App Files Files Community

HR-For-Management-Roles / app.py

DreamStream-1

Update app.py

6603914 verified over 1 year ago

raw

history blame

6.7 kB

	import gradio as gr
	from sentence_transformers import SentenceTransformer, util
	import docx
	import os
	from PyPDF2 import PdfReader
	import re
	import requests
	import pandas as pd

	# Load pre-trained model for sentence embedding
	model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

	# Define maximum number of resumes
	MAX_RESUMES = 10

	# Function to fetch Google API key from environment variable
	def get_google_api_key():
	api_key = os.getenv('GOOGLE_API_KEY') # Fetching the API key from environment variables
	if not api_key:
	raise ValueError("Google API key not found in environment variables.")
	return api_key

	# Function to extract text from resume (handles .txt, .pdf, .docx)
	def extract_text_from_resume(resume_file):
	file_extension = os.path.splitext(resume_file)[1].lower()
	if file_extension not in ['.txt', '.pdf', '.docx']:
	return "Unsupported file format"

	if file_extension == '.txt':
	return read_text_file(resume_file)
	elif file_extension == '.pdf':
	return read_pdf_file(resume_file)
	elif file_extension == '.docx':
	return read_docx_file(resume_file)

	return "Failed to read the resume text."

	def read_text_file(file_path):
	with open(file_path, 'r') as file:
	return file.read()

	def read_pdf_file(file_path):
	reader = PdfReader(file_path)
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	return text

	def read_docx_file(file_path):
	doc = docx.Document(file_path)
	text = ""
	for para in doc.paragraphs:
	text += para.text
	return text

	# System prompt to extract candidate details from the resume
	def system_prompt_to_extract_info(resume_text):
	prompt = f"""
	Extract the following information from the resume:
	1. Candidate's Full Name
	2. Candidate's Email Address
	3. Candidate's Contact Number

	Resume Text: {resume_text}

	Return the results in the following format:
	- Name: [Extracted Name]
	- Email: [Extracted Email]
	- Contact: [Extracted Contact Number]
	"""
	return prompt

	# Function to extract candidate information from resume text
	def extract_entities_via_gemini(resume_text):
	api_key = get_google_api_key() # Fetch the API key from environment variables
	endpoint = "https://gemini.googleapis.com/v1/documents:analyzeEntities" # Placeholder API endpoint (adjust as necessary)

	headers = {
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/json"
	}

	document = {
	"document": {
	"type": "PLAIN_TEXT",
	"content": resume_text
	}
	}

	# Send request to Gemini or another NLP API
	response = requests.post(endpoint, headers=headers, json=document)

	if response.status_code != 200:
	return {"error": "Failed to extract entities from resume", "status_code": response.status_code, "response": response.text}

	# Process the response from the Gemini API (or similar NLP API)
	entities = response.json().get('entities', [])
	extracted_info = {"name": "Unknown Candidate", "email": "No Email", "contact": "No Contact"}

	for entity in entities:
	if entity['type'] == 'PERSON':
	extracted_info['name'] = entity['name']
	if entity['type'] == 'EMAIL':
	extracted_info['email'] = entity['name']
	if entity['type'] == 'PHONE_NUMBER':
	extracted_info['contact'] = entity['name']

	return extracted_info

	# Function to check similarity between resumes and job description
	def check_similarity(job_description, resume_files):
	results = []
	job_emb = model.encode(job_description, convert_to_tensor=True)

	for resume_file in resume_files:
	resume_text = extract_text_from_resume(resume_file)
	if not resume_text:
	results.append((resume_file.name, 0, "Not Eligible", None, "No leadership experience", "No Email", "No Contact"))
	continue

	# Check for similarity between resume and job description
	resume_emb = model.encode(resume_text, convert_to_tensor=True)
	similarity_score = util.pytorch_cos_sim(job_emb, resume_emb)[0][0].item()

	# Convert similarity score to percentage
	similarity_percentage = similarity_score * 100

	# Extract leadership experience
	leadership_experience = extract_leadership_experience(resume_text)

	# Extract name, email, and contact info using Google Gemini API
	contact_info = extract_entities_via_gemini(resume_text)

	# Set a higher similarity threshold for eligibility
	if similarity_score >= 0.50:
	candidate_name = contact_info.get('name', 'Unknown Candidate')
	results.append((
	resume_file.name,
	similarity_percentage,
	"Eligible",
	candidate_name,
	leadership_experience,
	contact_info.get('email', 'No Email'),
	contact_info.get('contact', 'No Contact')
	))
	else:
	results.append((
	resume_file.name,
	similarity_percentage,
	"Not Eligible",
	None,
	leadership_experience,
	contact_info.get('email', 'No Email'),
	contact_info.get('contact', 'No Contact')
	))

	return results

	# Gradio Interface Components
	job_desc_input = gr.File(label="Upload Job Description (TXT)", type="filepath")
	resumes_input = gr.Files(label="Upload Resumes (TXT, DOCX, PDF)", type="filepath")

	# Gradio Outputs
	results_output = gr.Dataframe(headers=[
	"Resume File",
	"Similarity Score (%)",
	"Eligibility",
	"Candidate Name",
	"Leadership Experience",
	"Email",
	"Contact"],
	label="Analysis Results"
	)

	# Function to allow CSV download
	def download_results(results):
	df = pd.DataFrame(results, columns=["Resume File", "Similarity Score (%)", "Eligibility", "Candidate Name", "Leadership Experience", "Email", "Contact"])
	csv_file = "/tmp/results.csv"
	df.to_csv(csv_file, index=False)
	return csv_file # Return the file path

	# Gradio Interface
	interface = gr.Interface(
	fn=check_similarity,
	inputs=[job_desc_input, resumes_input],
	outputs=[results_output, gr.File(label="Download CSV", file=download_results)], # Corrected this line
	title="HR Assistant - Resume Screening & Leadership Experience",
	description="Upload job description and resumes to screen candidates for managerial and team leadership roles and extract candidate details.",
	allow_flagging="never"
	)

	interface.launch()