Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import PyPDF2 | |
| print("PyPDF2 successfully imported!") | |
| import docx | |
| import re | |
| import nltk | |
| from nltk.corpus import stopwords | |
| from nltk.stem import WordNetLemmatizer | |
| from nltk.tokenize import sent_tokenize | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # Ensure NLTK data is downloaded | |
| nltk_data_dir = os.path.join(os.getcwd(), 'nltk_data') | |
| os.makedirs(nltk_data_dir, exist_ok=True) | |
| nltk.data.path.append(nltk_data_dir) | |
| nltk.download('stopwords', download_dir=nltk_data_dir, quiet=True) | |
| nltk.download('wordnet', download_dir=nltk_data_dir, quiet=True) | |
| nltk.download('punkt', download_dir=nltk_data_dir, quiet=True) | |
| # Load sentence transformer model | |
| model = SentenceTransformer('all-mpnet-base-v2') | |
| # Functions for resume processing | |
| def read_resume(file): | |
| ext = os.path.splitext(file.name)[1].lower() | |
| content = "" | |
| try: | |
| if ext == ".pdf": | |
| reader = PyPDF2.PdfReader(file) | |
| for page in reader.pages: | |
| content += page.extract_text() + "\n" | |
| elif ext == ".docx": | |
| doc = docx.Document(file) | |
| content = '\n'.join([para.text for para in doc.paragraphs]) | |
| elif ext == ".txt": | |
| content = file.read().decode('utf-8') | |
| except Exception as e: | |
| st.error(f"Error reading {file.name}: {e}") | |
| return content | |
| # Preprocessing text | |
| def preprocess_text(text): | |
| text = text.lower() | |
| text = re.sub(r'[^a-z\s]', '', text) | |
| tokens = text.split() | |
| stop_words = set(stopwords.words('english')) | |
| words = [word for word in tokens if word not in stop_words] | |
| lemmatizer = WordNetLemmatizer() | |
| lemmas = [lemmatizer.lemmatize(word) for word in words] | |
| return ' '.join(lemmas) | |
| def compute_similarity(job_description, resumes): | |
| preprocessed_jd = preprocess_text(job_description) | |
| jd_embedding = model.encode(preprocessed_jd) | |
| resume_results = [] | |
| for resume_name, resume_text in resumes.items(): | |
| processed_resume = preprocess_text(resume_text) | |
| resume_embedding = model.encode(processed_resume) | |
| similarity_score = cosine_similarity([jd_embedding], [resume_embedding])[0][0] | |
| resume_results.append((resume_name, similarity_score)) | |
| sorted_resumes = sorted(resume_results, key=lambda x: x[1], reverse=True) | |
| return sorted_resumes | |
| # Streamlit UI | |
| st.title("Resume Search and Ranking Tool") | |
| # Upload files and job description | |
| uploaded_files = st.file_uploader("Upload resumes (PDF, DOCX, TXT)", accept_multiple_files=True) | |
| job_description = st.text_area("Enter Job Description:") | |
| if st.button("Rank Resumes"): | |
| if uploaded_files and job_description: | |
| resumes = {} | |
| for uploaded_file in uploaded_files: | |
| content = read_resume(uploaded_file) | |
| if content: | |
| resumes[uploaded_file.name] = content | |
| ranked_resumes = compute_similarity(job_description, resumes) | |
| st.success("Resumes ranked successfully!") | |
| for i, (resume_name, score) in enumerate(ranked_resumes): | |
| st.write(f"{i + 1}. {resume_name} - Similarity: {score:.2f}") | |
| else: | |
| st.warning("Please upload resumes and enter a job description.") | |