DreamStream-1's picture
Update app.py
6603914 verified
raw
history blame
6.7 kB
import gradio as gr
from sentence_transformers import SentenceTransformer, util
import docx
import os
from PyPDF2 import PdfReader
import re
import requests
import pandas as pd
# Load pre-trained model for sentence embedding
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
# Define maximum number of resumes
MAX_RESUMES = 10
# Function to fetch Google API key from environment variable
def get_google_api_key():
api_key = os.getenv('GOOGLE_API_KEY') # Fetching the API key from environment variables
if not api_key:
raise ValueError("Google API key not found in environment variables.")
return api_key
# Function to extract text from resume (handles .txt, .pdf, .docx)
def extract_text_from_resume(resume_file):
file_extension = os.path.splitext(resume_file)[1].lower()
if file_extension not in ['.txt', '.pdf', '.docx']:
return "Unsupported file format"
if file_extension == '.txt':
return read_text_file(resume_file)
elif file_extension == '.pdf':
return read_pdf_file(resume_file)
elif file_extension == '.docx':
return read_docx_file(resume_file)
return "Failed to read the resume text."
def read_text_file(file_path):
with open(file_path, 'r') as file:
return file.read()
def read_pdf_file(file_path):
reader = PdfReader(file_path)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
def read_docx_file(file_path):
doc = docx.Document(file_path)
text = ""
for para in doc.paragraphs:
text += para.text
return text
# System prompt to extract candidate details from the resume
def system_prompt_to_extract_info(resume_text):
prompt = f"""
Extract the following information from the resume:
1. Candidate's Full Name
2. Candidate's Email Address
3. Candidate's Contact Number
Resume Text: {resume_text}
Return the results in the following format:
- Name: [Extracted Name]
- Email: [Extracted Email]
- Contact: [Extracted Contact Number]
"""
return prompt
# Function to extract candidate information from resume text
def extract_entities_via_gemini(resume_text):
api_key = get_google_api_key() # Fetch the API key from environment variables
endpoint = "https://gemini.googleapis.com/v1/documents:analyzeEntities" # Placeholder API endpoint (adjust as necessary)
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
document = {
"document": {
"type": "PLAIN_TEXT",
"content": resume_text
}
}
# Send request to Gemini or another NLP API
response = requests.post(endpoint, headers=headers, json=document)
if response.status_code != 200:
return {"error": "Failed to extract entities from resume", "status_code": response.status_code, "response": response.text}
# Process the response from the Gemini API (or similar NLP API)
entities = response.json().get('entities', [])
extracted_info = {"name": "Unknown Candidate", "email": "No Email", "contact": "No Contact"}
for entity in entities:
if entity['type'] == 'PERSON':
extracted_info['name'] = entity['name']
if entity['type'] == 'EMAIL':
extracted_info['email'] = entity['name']
if entity['type'] == 'PHONE_NUMBER':
extracted_info['contact'] = entity['name']
return extracted_info
# Function to check similarity between resumes and job description
def check_similarity(job_description, resume_files):
results = []
job_emb = model.encode(job_description, convert_to_tensor=True)
for resume_file in resume_files:
resume_text = extract_text_from_resume(resume_file)
if not resume_text:
results.append((resume_file.name, 0, "Not Eligible", None, "No leadership experience", "No Email", "No Contact"))
continue
# Check for similarity between resume and job description
resume_emb = model.encode(resume_text, convert_to_tensor=True)
similarity_score = util.pytorch_cos_sim(job_emb, resume_emb)[0][0].item()
# Convert similarity score to percentage
similarity_percentage = similarity_score * 100
# Extract leadership experience
leadership_experience = extract_leadership_experience(resume_text)
# Extract name, email, and contact info using Google Gemini API
contact_info = extract_entities_via_gemini(resume_text)
# Set a higher similarity threshold for eligibility
if similarity_score >= 0.50:
candidate_name = contact_info.get('name', 'Unknown Candidate')
results.append((
resume_file.name,
similarity_percentage,
"Eligible",
candidate_name,
leadership_experience,
contact_info.get('email', 'No Email'),
contact_info.get('contact', 'No Contact')
))
else:
results.append((
resume_file.name,
similarity_percentage,
"Not Eligible",
None,
leadership_experience,
contact_info.get('email', 'No Email'),
contact_info.get('contact', 'No Contact')
))
return results
# Gradio Interface Components
job_desc_input = gr.File(label="Upload Job Description (TXT)", type="filepath")
resumes_input = gr.Files(label="Upload Resumes (TXT, DOCX, PDF)", type="filepath")
# Gradio Outputs
results_output = gr.Dataframe(headers=[
"Resume File",
"Similarity Score (%)",
"Eligibility",
"Candidate Name",
"Leadership Experience",
"Email",
"Contact"],
label="Analysis Results"
)
# Function to allow CSV download
def download_results(results):
df = pd.DataFrame(results, columns=["Resume File", "Similarity Score (%)", "Eligibility", "Candidate Name", "Leadership Experience", "Email", "Contact"])
csv_file = "/tmp/results.csv"
df.to_csv(csv_file, index=False)
return csv_file # Return the file path
# Gradio Interface
interface = gr.Interface(
fn=check_similarity,
inputs=[job_desc_input, resumes_input],
outputs=[results_output, gr.File(label="Download CSV", file=download_results)], # Corrected this line
title="HR Assistant - Resume Screening & Leadership Experience",
description="Upload job description and resumes to screen candidates for managerial and team leadership roles and extract candidate details.",
allow_flagging="never"
)
interface.launch()