| | |
| | import streamlit as st |
| | import nltk |
| | from gensim.models.doc2vec import Doc2Vec, TaggedDocument |
| | from nltk.tokenize import word_tokenize |
| | import PyPDF2 |
| | import pandas as pd |
| | import re |
| | import matplotlib.pyplot as plt |
| | import seaborn as sns |
| | import spacy |
| | from numpy import triu |
| |
|
| | |
| | nltk.download('punkt') |
| |
|
| | |
| | float_regex = re.compile(r'^\d{1,2}(\.\d{1,2})?$') |
| | email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b' |
| | float_digit_regex = re.compile(r'^\d{10}$') |
| | email_with_phone_regex = re.compile(r'(\d{10}).|.(\d{10})') |
| |
|
| | |
| | def extract_text_from_pdf(pdf_file): |
| | pdf_reader = PyPDF2.PdfReader(pdf_file) |
| | text = "" |
| | for page_num in range(len(pdf_reader.pages)): |
| | text += pdf_reader.pages[page_num].extract_text() |
| | return text |
| |
|
| | |
| | def tokenize_text(text, nlp_model): |
| | doc = nlp_model(text, disable=["tagger", "parser"]) |
| | tokens = [(token.text.lower(), token.label_) for token in doc.ents] |
| | return tokens |
| |
|
| | |
| | def extract_cgpa(resume_text): |
| | cgpa_pattern = r'\b(?:CGPA|GPA|C\.G\.PA|Cumulative GPA)\s*:?[\s-]([0-9]+(?:\.[0-9]+)?)\b|\b([0-9]+(?:\.[0-9]+)?)\s(?:CGPA|GPA)\b' |
| | match = re.search(cgpa_pattern, resume_text, re.IGNORECASE) |
| | if match: |
| | cgpa = match.group(1) if match.group(1) else match.group(2) |
| | return float(cgpa) |
| | else: |
| | return None |
| |
|
| | |
| | def extract_skills(text, skills_keywords): |
| | skills = [skill.lower() for skill in skills_keywords if re.search(r'\b' + re.escape(skill.lower()) + r'\b', text.lower())] |
| | return skills |
| |
|
| | |
| | def preprocess_text(text): |
| | return word_tokenize(text.lower()) |
| |
|
| | |
| | def train_doc2vec_model(documents): |
| | model = Doc2Vec(vector_size=20, min_count=2, epochs=50) |
| | model.build_vocab(documents) |
| | model.train(documents, total_examples=model.corpus_count, epochs=model.epochs) |
| | return model |
| |
|
| | |
| | def calculate_similarity(model, text1, text2): |
| | vector1 = model.infer_vector(preprocess_text(text1)) |
| | vector2 = model.infer_vector(preprocess_text(text2)) |
| | return model.dv.cosine_similarities(vector1, [vector2])[0] |
| |
|
| | |
| | def accuracy_calculation(true_positives, false_positives, false_negatives): |
| | total = true_positives + false_positives + false_negatives |
| | accuracy = true_positives / total if total != 0 else 0 |
| | return accuracy |
| |
|
| | |
| | st.markdown("# Resume Matching Tool 📃📃") |
| | st.markdown("An application to match resumes with a job description.") |
| |
|
| | |
| | st.sidebar.markdown("## Upload Resumes PDF") |
| | resumes_files = st.sidebar.file_uploader("Upload Resumes PDF", type=["pdf"], accept_multiple_files=True) |
| |
|
| | if resumes_files: |
| | |
| | st.sidebar.markdown("## Upload Job Description PDF") |
| | job_descriptions_file = st.sidebar.file_uploader("Upload Job Description PDF", type=["pdf"]) |
| |
|
| | if job_descriptions_file: |
| | |
| | nlp_model_path = "en_Resume_Matching_Keywords" |
| | nlp = spacy.load(nlp_model_path) |
| |
|
| | |
| | job_description_text = extract_text_from_pdf(job_descriptions_file) |
| | resumes_texts = [extract_text_from_pdf(resume_file) for resume_file in resumes_files] |
| | job_description_text = extract_text_from_pdf(job_descriptions_file) |
| | job_description_tokens = tokenize_text(job_description_text, nlp) |
| |
|
| | |
| | overall_skill_matches = 0 |
| | overall_qualification_matches = 0 |
| |
|
| | |
| | results_list = [] |
| | job_skills = set() |
| | job_qualifications = set() |
| |
|
| | for job_token, job_label in job_description_tokens: |
| | if job_label == 'QUALIFICATION': |
| | job_qualifications.add(job_token.replace('\n', ' ')) |
| | elif job_label == 'SKILLS': |
| | job_skills.add(job_token.replace('\n', ' ')) |
| |
|
| | job_skills_number = len(job_skills) |
| | job_qualifications_number = len(job_qualifications) |
| |
|
| | |
| | skills_counts_all_resumes = [] |
| |
|
| | |
| | for uploaded_resume in resumes_files: |
| | resume_text = extract_text_from_pdf(uploaded_resume) |
| | resume_tokens = tokenize_text(resume_text, nlp) |
| |
|
| | |
| | skillMatch = 0 |
| | qualificationMatch = 0 |
| | cgpa = "" |
| |
|
| | |
| | matched_skills = set() |
| | matched_qualifications = set() |
| | email = set() |
| | phone = set() |
| | name = set() |
| |
|
| | |
| | for resume_token, resume_label in resume_tokens: |
| | for job_token, job_label in job_description_tokens: |
| | if resume_token.lower().replace('\n', ' ') == job_token.lower().replace('\n', ' '): |
| | if resume_label == 'SKILLS': |
| | matched_skills.add(resume_token.replace('\n', ' ')) |
| | elif resume_label == 'QUALIFICATION': |
| | matched_qualifications.add(resume_token.replace('\n', ' ')) |
| | elif resume_label == 'PHONE' and bool(float_digit_regex.match(resume_token)): |
| | phone.add(resume_token) |
| | elif resume_label == 'QUALIFICATION': |
| | matched_qualifications.add(resume_token.replace('\n', ' ')) |
| |
|
| | skillMatch = len(matched_skills) |
| | qualificationMatch = len(matched_qualifications) |
| |
|
| | |
| | email_set = set(re.findall(email_pattern, resume_text.replace('\n', ' '))) |
| | email.update(email_set) |
| |
|
| | numberphone="" |
| | for email_str in email: |
| | numberphone = email_with_phone_regex.search(email_str) |
| | if numberphone: |
| | email.remove(email_str) |
| | val=numberphone.group(1) or numberphone.group(2) |
| | phone.add(val) |
| | email.add(email_str.strip(val)) |
| |
|
| | |
| | overall_skill_matches += skillMatch |
| | overall_qualification_matches += qualificationMatch |
| |
|
| | |
| | skills_counts_all_resumes.append([resume_text.count(skill.lower()) for skill in job_skills]) |
| |
|
| | |
| | result_dict = { |
| | "Resume": uploaded_resume.name, |
| | "Similarity Score": (skillMatch/job_skills_number)*100, |
| | "Skill Matches": skillMatch, |
| | "Matched Skills": matched_skills, |
| | "CGPA": extract_cgpa(resume_text), |
| | "Email": email, |
| | "Phone": phone, |
| | "Qualification Matches": qualificationMatch, |
| | "Matched Qualifications": matched_qualifications |
| | } |
| |
|
| | results_list.append(result_dict) |
| |
|
| | |
| | st.subheader("Overall Matches") |
| | st.write(f"Total Skill Matches: {overall_skill_matches}") |
| | st.write(f"Total Qualification Matches: {overall_qualification_matches}") |
| | st.write(f"Job Qualifications: {job_qualifications}") |
| | st.write(f"Job Skills: {job_skills}") |
| |
|
| | |
| | results_df = pd.DataFrame(results_list) |
| | st.subheader("Individual Results") |
| | st.dataframe(results_df) |
| | tagged_resumes = [TaggedDocument(words=preprocess_text(text), tags=[str(i)]) for i, text in enumerate(resumes_texts)] |
| | model_resumes = train_doc2vec_model(tagged_resumes) |
| |
|
| | st.subheader("\nHeatmap:") |
| | |
| | |
| | skills_keywords_input = st.text_input("Enter skills keywords separated by commas (e.g., python, java, machine learning):") |
| | skills_keywords = [skill.strip() for skill in skills_keywords_input.split(',') if skill.strip()] |
| |
|
| | if skills_keywords: |
| | |
| | skills_similarity_scores = [] |
| | for resume_text in resumes_texts: |
| | resume_text_similarity_scores = [] |
| | for skill in skills_keywords: |
| | similarity_score = calculate_similarity(model_resumes, resume_text, skill) |
| | resume_text_similarity_scores.append(similarity_score) |
| | skills_similarity_scores.append(resume_text_similarity_scores) |
| |
|
| | |
| | skills_similarity_df = pd.DataFrame(skills_similarity_scores, columns=skills_keywords, index=[resume_file.name for resume_file in resumes_files]) |
| |
|
| | |
| | fig, ax = plt.subplots(figsize=(12, 8)) |
| | sns.heatmap(skills_similarity_df, cmap='YlGnBu', annot=True, fmt=".2f", ax=ax) |
| | ax.set_title('Heatmap for Skills Similarity') |
| | ax.set_xlabel('Skills') |
| | ax.set_ylabel('Resumes') |
| |
|
| | |
| | plt.yticks(rotation=0) |
| |
|
| | |
| | st.pyplot(fig) |
| | else: |
| | st.write("Please enter at least one skill keyword.") |
| |
|
| | else: |
| | st.warning("Please upload the Job Description PDF to proceed.") |
| | else: |
| | st.warning("Please upload Resumes PDF to proceed.") |