import pickle
import numpy as np
import spacy
import re
import string
import streamlit as st
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Abbreviations dictionary for job market
abbreviations = {
    "mgr": "manager",
    "sr": "senior",
    "jr": "junior",
    "asst": "assistant",
    "assoc": "associate",
    "dept": "department",
    "exp": "experience",
    "hr": "human resources",
    "acct": "account",
    "acctg": "accounting",
    "fin": "finance",
    "eng": "engineer",
    "engg": "engineering",
    "it": "information technology",
    "qa": "quality assurance",
    "dev": "development",
    "devops": "development operations",
    "proj": "project",
    "mktg": "marketing",
    "biz": "business",
    "comm": "communication",
    "adm": "administration",
    "sec": "secretary",
    "exec": "executive",
    "corp": "corporation",
    "intl": "international",
    "rep": "representative",
    "mfg": "manufacturing",
    "prod": "production",
    "purch": "purchasing",
    "sales": "sales",
    "cust": "customer",
    "svc": "service",
    "tech": "technical",
    "sup": "supervisor",
    "supv": "supervision",
    "log": "logistics",
    "inv": "inventory",
    "sch": "schedule",
    "edu": "education",
    "lang": "language",
    "pr": "public relations",
    "hrd": "human resources development",
    "cfo": "chief financial officer",
    "ceo": "chief executive officer",
    "coo": "chief operating officer",
    "cmo": "chief marketing officer",
    "cto": "chief technology officer",
    "cio": "chief information officer",
    "pmo": "project management office",
    "pmp": "project management professional",
    "ba": "business analyst",
    "bpm": "business process management",
    "ui": "user interface",
    "ux": "user experience",
    "svp": "senior vice president",
    "vp": "vice president",
    "gm": "general manager",
    "doe": "depends on experience",
    "r&d": "research and development",
    "seo": "search engine optimization",
    "sem": "search engine marketing",
    "smm": "social media marketing",
    "b2b": "business to business",
    "b2c": "business to consumer",
    "kpi": "key performance indicator",
    "roi": "return on investment",
    "saas": "software as a service",
    "paas": "platform as a service",
    "iaas": "infrastructure as a service",
    "crm": "customer relationship management",
    "erp": "enterprise resource planning",
    "sd": "software development",
    "pm": "project manager",
    "pa": "personal assistant",
    "exec": "executive",
    "fin": "finance",
    "hrm": "human resources management",
    "it": "information technology",
    "pr": "public relations",
    "qa": "quality assurance",
    "r&d": "research and development",
    "scm": "supply chain management",
    "seo": "search engine optimization",
    "smm": "social media marketing",
    "ux": "user experience",
    "ui": "user interface",
    "bi": "business intelligence",
    "dev": "development",
    "ops": "operations"
}


# Load Spacy model
nlp = spacy.load("en_core_web_sm")

def expand_abbreviations(text, abbreviations):
    for abbr, expanded in abbreviations.items():
        text = re.sub(r'\b{}\b'.format(abbr), expanded, text)
    return text

def clean_and_preprocess(text):
    text = expand_abbreviations(text, abbreviations)
    text = text.lower()
    text = re.sub(r'\d+', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\s+', ' ', text).strip()
    doc = nlp(text)
    tokens = [token.lemma_ for token in doc if token.is_alpha and not token.is_stop]
    return ' '.join(tokens)

def extract_nouns(text):
    doc = nlp(text)
    nouns = [token.lemma_ for token in doc if token.pos_ == "NOUN"]
    return nouns

# Define the sector options and their corresponding model and tokenizer paths
sectors = {
    'HR': {
        'model': r'modelfile\bighr2.keras',
        'tokenizer': r'tokernizer\tokenizershr.pkl'
    },
    'IT': {
        'model': r'modelfile\bigit2.keras',
        'tokenizer': r'tokernizer\tokenizersit.pkl'
    },
    'Sales': {
        'model': r'modelfile\bigrsales2.keras',
        'tokenizer': r'tokernizer\tokenizerssales.pkl'
    },
    'Health': {
        'model': r'modelfile\bighealth2.keras',
        'tokenizer': r'tokernizer\tokenizershealth.pkl'
    },
    'Other': {
        'model': r'modelfile\bigothers2.keras',
        'tokenizer': r'tokernizer\tokenizersothers.pkl'
    }
}

# Streamlit UI
st.title("Resume and Job Description Analyzer")

st.write("Upload your resume and job description, then select the job sector to analyze how well the resume fits the job description.")

# Resume input
resume = st.text_area("Paste your Resume:", height=150)

# Job description input
job_description = st.text_area("Paste Job Description:", height=150)

# Sector selection
sector = st.selectbox("Select Sector:", list(sectors.keys()))

if st.button("Analyze Resume"):
    if resume and job_description:
        try:
            # Load the selected model and tokenizer
            model_path = sectors[sector]['model']
            tokenizer_path = sectors[sector]['tokenizer']

            model = load_model(model_path)

            with open(tokenizer_path, 'rb') as f:
                tokenizers = pickle.load(f)

            resume_tokenizer = tokenizers['resume_tokenizer']
            description_tokenizer = tokenizers['description_tokenizer']
            common_nouns_tokenizer = tokenizers['common_nouns_tokenizer']

            # Preprocess the resume
            processed_resume = clean_and_preprocess(resume)

            # Preprocess the job description
            processed_description = clean_and_preprocess(job_description)

            # Convert to sequences using the resume tokenizer
            resume_sequence = resume_tokenizer.texts_to_sequences([processed_resume])
            resume_data_padded = pad_sequences(resume_sequence, maxlen=1500)

            # Convert to sequences using the description tokenizer
            description_sequence = description_tokenizer.texts_to_sequences([processed_description])
            description_data_padded = pad_sequences(description_sequence, maxlen=1500)

            # Extract common nouns from the resume
            common_nouns = set(extract_nouns(processed_resume))
            common_nouns_str = ' '.join(common_nouns)

            # Convert to sequences using the common nouns tokenizer
            common_nouns_sequence = common_nouns_tokenizer.texts_to_sequences([common_nouns_str])
            common_nouns_data = pad_sequences(common_nouns_sequence, maxlen=10)

            # Make predictions
            prediction = model.predict([resume_data_padded, description_data_padded, common_nouns_data])

            st.success(f"Your predicted ATS Score is: {prediction[0][0]:.2f}")
        except Exception as e:
            st.error(f"An error occurred: {e}")
    else:
        st.error("Please paste both your resume and job description before analyzing.")