File size: 3,349 Bytes
647f3d8 a471b38 289e528 2a1faf5 b2ebc6d f5f146e 507aa28 57fdcf3 289e528 647f3d8 ce1bf5d b2ebc6d e90512a 0f861b6 fbcaa4a d66cfb4 b2ebc6d 79054c9 b2ebc6d 18bf236 b2ebc6d a471b38 b2ebc6d a471b38 2a1faf5 f80ba7d 3736724 a471b38 cfb6690 26fe61e a911bc5 3f63f11 8acc4e6 a471b38 8acc4e6 e70b45a fd495ca da3f918 505fc26 da3f918 cfd5025 7f98468 54ba990 e70b45a da3f918 a471b38 cfb6690 7c6c975 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | import streamlit as st
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from keyphrasetransformer import KeyPhraseTransformer
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import numpy as np
import pandas as pd
kp = KeyPhraseTransformer()
#@st.cache_resource
#def load_model():
# model = SentenceTransformer('all-MiniLM-L6-v2')
# return model
#---------------------
# Prepare and tokenize dataset
dataset = load_dataset("Unknown92/Resume_dataset")
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
def tokenize_function(examples):
return tokenizer(examples["Resume"], padding="max_length", truncation=True)
tokenized_datasets = dataset.map(tokenize_function, batched=True)
small_train_dataset = tokenized_datasets["Train"].shuffle(seed=42).select(range(200))
small_eval_dataset = tokenized_datasets["Test"].shuffle(seed=42).select(range(200))
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=5)
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")
trainer = Trainer(
model=model,
args=training_args,
train_dataset=small_train_dataset,
eval_dataset=small_eval_dataset,
compute_metrics=compute_metrics,
)
trainer.train()
#---------------------
def calculate_similarity(model, text1, text2):
embedding1 = model.encode([text1])
embedding2 = model.encode([text2])
return cosine_similarity(embedding1, embedding2)[0][0]
def generate_wordcloud(text, title):
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title(title)
st.pyplot(plt)
st.set_page_config(
page_title="Resume Keyword Identifier",
page_icon="+",
layout="wide",
initial_sidebar_state="expanded",
)
st.title("Resume Match Calculator")
model = load_model()
# Set the font size for the "Paste the Job Description" text
st.markdown("<style>#fc1{font-size: 20px !important;}</style>", unsafe_allow_html=True)
jd = st.text_area("Paste the Job Description:", height=100)
resume = st.text_area("Paste Your Resume:", height=100)
if st.button("Calculate Match Score"):
if jd and resume:
score = calculate_similarity(model, jd, resume)
jp=kp.get_key_phrases(jd)
rp=kp.get_key_phrases(resume)
# Find missing keywords in rp with respect to jp
missing_keywords = set(jp) - set(rp)
# Generate word clouds for JD and Resume
generate_wordcloud(' '.join(jp), 'Word Cloud for JD Keywords')
generate_wordcloud(' '.join(rp), 'Word Cloud for Resume Keywords')
# st.write(f"The match score is: {score}", )
st.write("The match score is:")
st.write(score)
st.write("JD Keywords:" )
st.write(jp)
st.write("Resume Keywords:" )
st.write(rp)
st.write("Missing Keywords in Resume:" )
st.write(list(missing_keywords))
else:
st.write("Please enter both the job description and resume.", )
|