HR-Mn / app.py
DreamStream-1's picture
Update app.py
b7f4f59 verified
import gradio as gr
import PyPDF2
import re
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sentence_transformers import SentenceTransformer
import spacy
import os
# Load SpaCy model for Named Entity Recognition
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
os.system("python -m spacy download en_core_web_sm")
nlp = spacy.load("en_core_web_sm")
# Load Sentence Transformer model
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
def extract_text_from_txt(txt_file):
"""Extracts text from a .txt file."""
if isinstance(txt_file, bytes):
return txt_file.decode('utf-8') # Decode bytes to string for .txt files
return txt_file.read() # If txt_file is a file-like object
def extract_text_from_pdf(pdf_file):
"""Extracts text from a PDF file."""
reader = PyPDF2.PdfReader(pdf_file)
text = ''
for page in reader.pages:
text += page.extract_text() or '' # Ensure we handle cases where extract_text() returns None
return text
def extract_years_of_experience(resume_text):
"""Extracts years of experience using regex."""
matches = re.findall(r'(\d+)\s*[-]?[\s]*(years?|yrs?)', resume_text, re.IGNORECASE)
years = [int(match[0]) for match in matches] # Convert found strings to integers
return max(years, default=0) # Return the max found years or 0
def extract_skills_nlp(resume_text):
"""Extracts skills using Named Entity Recognition."""
doc = nlp(resume_text)
skills = []
for ent in doc.ents:
if ent.label_ == "SKILL": # Adjust based on your model
skills.append(ent.text)
return list(set(skills)) # Return unique skills
def evaluate_with_tfidf(resumes, required_skills):
"""Evaluate resumes against required skills using TF-IDF."""
all_resumes = resumes + [required_skills]
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(all_resumes)
# Calculate similarity of resumes to the required skills
similarities = (tfidf_matrix[-1] * tfidf_matrix[:-1].T).toarray().flatten()
return similarities
def cosine_similarity(a, b):
"""Calculate cosine similarity between two vectors."""
a = a / np.linalg.norm(a)
b = b / np.linalg.norm(b)
return np.dot(a, b)
def analyze_resumes(resume_files, job_desc_file, required_skills, required_experience_years):
"""Analyzes resumes based on job description and required criteria."""
# Read the job description
job_description = extract_text_from_txt(job_desc_file) if job_desc_file.name.endswith('.txt') else extract_text_from_pdf(job_desc_file)
results = {
"job_description": job_description,
"resumes": []
}
# Split required skills into a list
required_skills_list = [skill.strip().lower() for skill in required_skills.split(",")]
for resume in resume_files:
resume_text = ""
if resume.name.endswith('.pdf'):
resume_text = extract_text_from_pdf(resume) # Handle PDF files
elif resume.name.endswith('.txt'):
resume_text = extract_text_from_txt(resume) # Handle TXT files
else:
continue # Skip unsupported file types
# Analyze resume text for skills and experience
found_skills = extract_skills_nlp(resume_text) # Using NER for skills
experience_years = extract_years_of_experience(resume_text)
# Calculate TF-IDF similarity for required skills
tfidf_similarities = evaluate_with_tfidf([resume_text], ", ".join(required_skills_list))
# Use Sentence Transformers to calculate embedding similarity score
resume_embedding = sentence_model.encode(resume_text)
job_desc_embedding = sentence_model.encode(job_description)
similarity_score = cosine_similarity(resume_embedding, job_desc_embedding)
results["resumes"].append({
"resume_text": resume_text,
"found_skills": found_skills,
"experience_years": experience_years,
"tfidf_similarity": tfidf_similarities[0], # Similarity score with required skills
"embedding_similarity": similarity_score
})
return results
# Gradio interface
def build_app():
with gr.Blocks() as app:
gr.Markdown("## Resume Analysis Tool")
with gr.Row():
job_desc_file = gr.File(label="Job Description File (.txt or .pdf)")
resume_files = gr.Files(label="Upload Resumes (.txt, .pdf)", file_types=[".txt", ".pdf"])
required_skills = gr.Textbox(label="Required Skills (comma separated)", placeholder="Python, Machine Learning, etc.")
required_experience_years = gr.Number(label="Required Experience (in years)", value=5)
analyze_button = gr.Button("Analyze Resumes")
output_text = gr.JSON(label="Analysis Results")
analyze_button.click(
analyze_resumes,
inputs=[resume_files, job_desc_file, required_skills, required_experience_years],
outputs=output_text
)
return app
# Launch the app
if __name__ == "__main__":
app = build_app()
app.launch(share=True) # Set share=True to create a public link