import streamlit as st from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity from keyphrasetransformer import KeyPhraseTransformer from wordcloud import WordCloud import matplotlib.pyplot as plt from datasets import load_dataset from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer import numpy as np import pandas as pd kp = KeyPhraseTransformer() #@st.cache_resource #def load_model(): # model = SentenceTransformer('all-MiniLM-L6-v2') # return model #--------------------- # Prepare and tokenize dataset dataset = load_dataset("Unknown92/Resume_dataset") tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") def tokenize_function(examples): return tokenizer(examples["Resume"], padding="max_length", truncation=True) tokenized_datasets = dataset.map(tokenize_function, batched=True) small_train_dataset = tokenized_datasets["Train"].shuffle(seed=42).select(range(200)) small_eval_dataset = tokenized_datasets["Test"].shuffle(seed=42).select(range(200)) model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=5) training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch") trainer = Trainer( model=model, args=training_args, train_dataset=small_train_dataset, eval_dataset=small_eval_dataset, compute_metrics=compute_metrics, ) trainer.train() #--------------------- def calculate_similarity(model, text1, text2): embedding1 = model.encode([text1]) embedding2 = model.encode([text2]) return cosine_similarity(embedding1, embedding2)[0][0] def generate_wordcloud(text, title): wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text) plt.figure(figsize=(10, 5)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.title(title) st.pyplot(plt) st.set_page_config( page_title="Resume Keyword Identifier", page_icon="+", layout="wide", initial_sidebar_state="expanded", ) st.title("Resume Match Calculator") model = load_model() # Set the font size for the "Paste the Job Description" text st.markdown("", unsafe_allow_html=True) jd = st.text_area("Paste the Job Description:", height=100) resume = st.text_area("Paste Your Resume:", height=100) if st.button("Calculate Match Score"): if jd and resume: score = calculate_similarity(model, jd, resume) jp=kp.get_key_phrases(jd) rp=kp.get_key_phrases(resume) # Find missing keywords in rp with respect to jp missing_keywords = set(jp) - set(rp) # Generate word clouds for JD and Resume generate_wordcloud(' '.join(jp), 'Word Cloud for JD Keywords') generate_wordcloud(' '.join(rp), 'Word Cloud for Resume Keywords') # st.write(f"The match score is: {score}", ) st.write("The match score is:") st.write(score) st.write("JD Keywords:" ) st.write(jp) st.write("Resume Keywords:" ) st.write(rp) st.write("Missing Keywords in Resume:" ) st.write(list(missing_keywords)) else: st.write("Please enter both the job description and resume.", )