import pickle import numpy as np import spacy import re import string import streamlit as st from tensorflow.keras.models import load_model from tensorflow.keras.preprocessing.sequence import pad_sequences import os abbreviations = { "mgr": "manager", "sr": "senior", "jr": "junior", "asst": "assistant", "assoc": "associate", "dept": "department", "exp": "experience", "hr": "human resources", "acct": "account", "acctg": "accounting", "fin": "finance", "eng": "engineer", "engg": "engineering", "it": "information technology", "qa": "quality assurance", "dev": "development", "devops": "development operations", "proj": "project", "mktg": "marketing", "biz": "business", "comm": "communication", "adm": "administration", "sec": "secretary", "exec": "executive", "corp": "corporation", "intl": "international", "rep": "representative", "mfg": "manufacturing", "prod": "production", "purch": "purchasing", "sales": "sales", "cust": "customer", "svc": "service", "tech": "technical", "sup": "supervisor", "supv": "supervision", "log": "logistics", "inv": "inventory", "sch": "schedule", "edu": "education", "lang": "language", "pr": "public relations", "hrd": "human resources development", "cfo": "chief financial officer", "ceo": "chief executive officer", "coo": "chief operating officer", "cmo": "chief marketing officer", "cto": "chief technology officer", "cio": "chief information officer", "pmo": "project management office", "pmp": "project management professional", "ba": "business analyst", "bpm": "business process management", "ui": "user interface", "ux": "user experience", "svp": "senior vice president", "vp": "vice president", "gm": "general manager", "doe": "depends on experience", "r&d": "research and development", "seo": "search engine optimization", "sem": "search engine marketing", "smm": "social media marketing", "b2b": "business to business", "b2c": "business to consumer", "kpi": "key performance indicator", "roi": "return on investment", "saas": "software as a service", "paas": "platform as a service", "iaas": "infrastructure as a service", "crm": "customer relationship management", "erp": "enterprise resource planning", "sd": "software development", "pm": "project manager", "pa": "personal assistant", "exec": "executive", "fin": "finance", "hrm": "human resources management", "it": "information technology", "pr": "public relations", "qa": "quality assurance", "r&d": "research and development", "scm": "supply chain management", "seo": "search engine optimization", "smm": "social media marketing", "ux": "user experience", "ui": "user interface", "bi": "business intelligence", "dev": "development", "ops": "operations" } def ensure_model_installed(): try: spacy.load('en_core_web_sm') except OSError: from spacy.cli import download download('en_core_web_sm') spacy.load('en_core_web_sm') # Ensure the model is installed ensure_model_installed() nlp = spacy.load("en_core_web_sm") def expand_abbreviations(text, abbreviations): for abbr, expanded in abbreviations.items(): text = re.sub(r'\b{}\b'.format(abbr), expanded, text, flags=re.IGNORECASE) return text def clean_and_preprocess(text): text = expand_abbreviations(text, abbreviations) text = text.lower() text = re.sub(r'\d+', '', text) text = text.translate(str.maketrans('', '', string.punctuation)) text = re.sub(r'\s+', ' ', text).strip() doc = nlp(text) tokens = [token.lemma_ for token in doc if token.is_alpha and not token.is_stop] return ' '.join(tokens) def extract_nouns(text): doc = nlp(text) nouns = [token.lemma_ for token in doc if token.pos_ == "NOUN"] return nouns def load_model_and_tokenizers_for_hr(): model_path = 'modelfile/bighr2.keras' tokenizer_path = 'tokernizer/tokenizershr.pkl' if not os.path.isfile(model_path): raise FileNotFoundError(f"Model file not found: {model_path}") if not os.path.isfile(tokenizer_path): raise FileNotFoundError(f"Tokenizer file not found: {tokenizer_path}") model = load_model(model_path) with open(tokenizer_path, 'rb') as f: tokenizers = pickle.load(f) resume_tokenizer = tokenizers.get('resume_tokenizer') description_tokenizer = tokenizers.get('description_tokenizer') common_nouns_tokenizer = tokenizers.get('common_nouns_tokenizer') if not (resume_tokenizer and description_tokenizer and common_nouns_tokenizer): raise ValueError("Tokenizer components are missing from the file.") return model, resume_tokenizer, description_tokenizer, common_nouns_tokenizer def load_model_and_tokenizers_for_it(): model_path = 'modelfile/bigit2.keras' tokenizer_path = 'tokernizer/tokenizersit.pkl' if not os.path.isfile(model_path): raise FileNotFoundError(f"Model file not found: {model_path}") if not os.path.isfile(tokenizer_path): raise FileNotFoundError(f"Tokenizer file not found: {tokenizer_path}") model = load_model(model_path) with open(tokenizer_path, 'rb') as f: tokenizers = pickle.load(f) resume_tokenizer = tokenizers.get('resume_tokenizer') description_tokenizer = tokenizers.get('description_tokenizer') common_nouns_tokenizer = tokenizers.get('common_nouns_tokenizer') if not (resume_tokenizer and description_tokenizer and common_nouns_tokenizer): raise ValueError("Tokenizer components are missing from the file.") return model, resume_tokenizer, description_tokenizer, common_nouns_tokenizer def load_model_and_tokenizers_for_sales(): model_path = 'modelfile/bigrsales2.keras' tokenizer_path = 'tokernizer/tokenizerssales.pkl' if not os.path.isfile(model_path): raise FileNotFoundError(f"Model file not found: {model_path}") if not os.path.isfile(tokenizer_path): raise FileNotFoundError(f"Tokenizer file not found: {tokenizer_path}") model = load_model(model_path) with open(tokenizer_path, 'rb') as f: tokenizers = pickle.load(f) resume_tokenizer = tokenizers.get('resume_tokenizer') description_tokenizer = tokenizers.get('description_tokenizer') common_nouns_tokenizer = tokenizers.get('common_nouns_tokenizer') if not (resume_tokenizer and description_tokenizer and common_nouns_tokenizer): raise ValueError("Tokenizer components are missing from the file.") return model, resume_tokenizer, description_tokenizer, common_nouns_tokenizer def load_model_and_tokenizers_for_health(): model_path = 'modelfile/bighealth2.keras' tokenizer_path = 'tokernizer/tokenizershealth.pkl' if not os.path.isfile(model_path): raise FileNotFoundError(f"Model file not found: {model_path}") if not os.path.isfile(tokenizer_path): raise FileNotFoundError(f"Tokenizer file not found: {tokenizer_path}") model = load_model(model_path) with open(tokenizer_path, 'rb') as f: tokenizers = pickle.load(f) resume_tokenizer = tokenizers.get('resume_tokenizer') description_tokenizer = tokenizers.get('description_tokenizer') common_nouns_tokenizer = tokenizers.get('common_nouns_tokenizer') if not (resume_tokenizer and description_tokenizer and common_nouns_tokenizer): raise ValueError("Tokenizer components are missing from the file.") return model, resume_tokenizer, description_tokenizer, common_nouns_tokenizer def load_model_and_tokenizers_for_other(): model_path = 'modelfile/bigothers2.keras' tokenizer_path = 'tokernizer/tokenizersothers.pkl' if not os.path.isfile(model_path): raise FileNotFoundError(f"Model file not found: {model_path}") if not os.path.isfile(tokenizer_path): raise FileNotFoundError(f"Tokenizer file not found: {tokenizer_path}") model = load_model(model_path) with open(tokenizer_path, 'rb') as f: tokenizers = pickle.load(f) resume_tokenizer = tokenizers.get('resume_tokenizer') description_tokenizer = tokenizers.get('description_tokenizer') common_nouns_tokenizer = tokenizers.get('common_nouns_tokenizer') if not (resume_tokenizer and description_tokenizer and common_nouns_tokenizer): raise ValueError("Tokenizer components are missing from the file.") return model, resume_tokenizer, description_tokenizer, common_nouns_tokenizer # Streamlit UI st.title("ATS") st.write("Upload your resume and job description, then select the job sector to analyze how well the resume fits the job description.") # Resume input resume = st.text_area("Paste your Resume:", height=150) # Job description input job_description = st.text_area("Paste Job Description:", height=150) # Sector selection sector = st.selectbox("Select Sector:", ['HR', 'IT', 'Sales', 'Health', 'Other']) if st.button("Calculate ATS Score"): if resume and job_description: try: if sector == 'HR': model, resume_tokenizer, description_tokenizer, common_nouns_tokenizer = load_model_and_tokenizers_for_hr() elif sector == 'IT': model, resume_tokenizer, description_tokenizer, common_nouns_tokenizer = load_model_and_tokenizers_for_it() elif sector == 'Sales': model, resume_tokenizer, description_tokenizer, common_nouns_tokenizer = load_model_and_tokenizers_for_sales() elif sector == 'Health': model, resume_tokenizer, description_tokenizer, common_nouns_tokenizer = load_model_and_tokenizers_for_health() elif sector == 'Other': model, resume_tokenizer, description_tokenizer, common_nouns_tokenizer = load_model_and_tokenizers_for_other() processed_resume = clean_and_preprocess(resume) processed_description = clean_and_preprocess(job_description) resume_sequence = resume_tokenizer.texts_to_sequences([processed_resume]) resume_data_padded = pad_sequences(resume_sequence, maxlen=1500) description_sequence = description_tokenizer.texts_to_sequences([processed_description]) description_data_padded = pad_sequences(description_sequence, maxlen=1500) common_nouns = set(extract_nouns(processed_resume)) common_nouns_str = ' '.join(common_nouns) common_nouns_sequence = common_nouns_tokenizer.texts_to_sequences([common_nouns_str]) common_nouns_data = pad_sequences(common_nouns_sequence, maxlen=10) prediction = model.predict([resume_data_padded, description_data_padded, common_nouns_data]) st.success(f"Your predicted ATS Score is: {prediction[0][0]:.2f}") except FileNotFoundError as fnf_error: st.error(str(fnf_error)) except ValueError as val_error: st.error(str(val_error)) except Exception as e: st.error(f"An error occurred: {e}") else: st.error("Please paste both your resume and job description before analyzing.")