File size: 3,714 Bytes
7c2cb12 2181885 7c2cb12 2e432a1 7c2cb12 64870ac 7c2cb12 64870ac 7c2cb12 64870ac 7c2cb12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | import streamlit as st
import pdfplumber
import docx2txt
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# Load the English NLP model from spaCy
@st.cache_resource
def load_spacy_model():
return spacy.load('en_core_web_sm')
nlp = load_spacy_model()
# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_file):
text = ''
with pdfplumber.open(pdf_file) as pdf:
for page in pdf.pages:
page_text = page.extract_text()
if page_text:
text += page_text
return text
# Function to extract text from a DOCX file
def extract_text_from_docx(docx_file):
return docx2txt.process(docx_file)
# Function to extract user-defined skills from resume text
def extract_skills(text, user_skills):
text = text.lower()
extracted = [skill.strip().lower() for skill in user_skills if skill.strip().lower() in text]
return list(set(extracted)) # remove duplicates
# Function to estimate years of experience from dates mentioned
def extract_experience(text):
doc = nlp(text)
years = []
for ent in doc.ents:
if ent.label_ == 'DATE':
try:
if 'year' in ent.text.lower():
num = int(ent.text.split()[0])
years.append(num)
except:
continue
return max(years, default=0)
# Function to compute a similarity score between resume and job description
def match_score(resume_text, job_description):
documents = [resume_text, job_description]
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(documents)
score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
return round(float(score[0][0]) * 100, 2)
# -------- Streamlit Frontend Starts Here -------- #
st.title("π AI Resume Screening App")
# Text area for job description
job_description = st.text_area("π Paste the Job Description Below:", height=200)
# Text input for skills (comma-separated)
skills_input = st.text_input("π οΈ Enter Required Skills (comma-separated):", placeholder="e.g., Python, SQL, Machine Learning")
# File uploader for multiple resumes
uploaded_files = st.file_uploader("π Upload Resume Files (PDF/DOCX)", type=['pdf', 'docx'], accept_multiple_files=True)
# Main logic to process resumes
if uploaded_files and job_description and skills_input:
# Parse user-entered skills
user_skills = [skill.strip() for skill in skills_input.split(',') if skill.strip()]
if not user_skills:
st.warning("β οΈ Please enter at least one skill.")
else:
st.markdown("### π Screening Results")
for resume in uploaded_files:
# Extract text directly from uploaded file
if resume.name.endswith('.pdf'):
resume_text = extract_text_from_pdf(resume)
elif resume.name.endswith('.docx'):
resume_text = extract_text_from_docx(resume)
else:
st.warning(f"Unsupported file type: {resume.name}")
continue
# Extract information
skills = extract_skills(resume_text, user_skills)
experience = extract_experience(resume_text)
score = match_score(resume_text, job_description)
# Display results
st.subheader(f"π€ Candidate: {resume.name}")
st.write(f"β
**Skills Matched**: {', '.join(skills) if skills else 'None'}")
st.write(f"π§ **Estimated Experience**: {experience} year(s)")
st.write(f"π **Match Score**: {score}%")
st.markdown("---") |