Spaces:

mednow
/

ATS

Sleeping

App Files Files Community

mednow commited on Aug 14, 2024

Commit

c1dbcdd

verified ·

1 Parent(s): fd35728

Upload 3 files

Browse files

Files changed (3) hide show

app.py +47 -40
dockerfile +16 -10
requirements.txt +5 -10

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import string
 import streamlit as st
 from tensorflow.keras.models import load_model
 from tensorflow.keras.preprocessing.sequence import pad_sequences
-import os
 # Abbreviations dictionary for job market
 abbreviations = {
@@ -102,8 +102,6 @@ abbreviations = {
     "ops": "operations"
 }
 def ensure_model_installed():
     try:
         spacy.load('en_core_web_sm')
@@ -111,6 +109,7 @@ def ensure_model_installed():
         from spacy.cli import download
         download('en_core_web_sm')
         spacy.load('en_core_web_sm')
 # Ensure the model is installed
 ensure_model_installed()
@@ -118,7 +117,7 @@ nlp = spacy.load("en_core_web_sm")
 def expand_abbreviations(text, abbreviations):
     for abbr, expanded in abbreviations.items():
-        text = re.sub(r'\b{}\b'.format(abbr), expanded, text)
     return text
 def clean_and_preprocess(text):
@@ -139,24 +138,24 @@ def extract_nouns(text):
 # Define the sector options and their corresponding model and tokenizer paths
 sectors = {
     'HR': {
-        'model': r'modelfile\bighr2.keras',
-        'tokenizer': r'tokernizer\tokenizershr.pkl'
     },
     'IT': {
-        'model': r'modelfile\bigit2.keras',
-        'tokenizer': r'tokernizer\tokenizersit.pkl'
     },
     'Sales': {
-        'model': r'modelfile\bigrsales2.keras',
-        'tokenizer': r'tokernizer\tokenizerssales.pkl'
     },
     'Health': {
-        'model': r'modelfile\bighealth2.keras',
-        'tokenizer': r'tokernizer\tokenizershealth.pkl'
     },
     'Other': {
-        'model': r'modelfile\bigothers2.keras',
-        'tokenizer': r'tokernizer\tokenizersothers.pkl'
     }
 }
@@ -174,48 +173,56 @@ job_description = st.text_area("Paste Job Description:", height=150)
 # Sector selection
 sector = st.selectbox("Select Sector:", list(sectors.keys()))
-if st.button("Calculate ATS SCore"):
     if resume and job_description:
         try:
             # Load the selected model and tokenizer
             model_path = sectors[sector]['model']
             tokenizer_path = sectors[sector]['tokenizer']
-            model = load_model(model_path)
-            with open(tokenizer_path, 'rb') as f:
-                tokenizers = pickle.load(f)
-            resume_tokenizer = tokenizers['resume_tokenizer']
-            description_tokenizer = tokenizers['description_tokenizer']
-            common_nouns_tokenizer = tokenizers['common_nouns_tokenizer']
-            # Preprocess the resume
-            processed_resume = clean_and_preprocess(resume)
-            # Preprocess the job description
-            processed_description = clean_and_preprocess(job_description)
-            # Convert to sequences using the resume tokenizer
-            resume_sequence = resume_tokenizer.texts_to_sequences([processed_resume])
-            resume_data_padded = pad_sequences(resume_sequence, maxlen=1500)
-            # Convert to sequences using the description tokenizer
-            description_sequence = description_tokenizer.texts_to_sequences([processed_description])
-            description_data_padded = pad_sequences(description_sequence, maxlen=1500)
-            # Extract common nouns from the resume
-            common_nouns = set(extract_nouns(processed_resume))
-            common_nouns_str = ' '.join(common_nouns)
-            # Convert to sequences using the common nouns tokenizer
-            common_nouns_sequence = common_nouns_tokenizer.texts_to_sequences([common_nouns_str])
-            common_nouns_data = pad_sequences(common_nouns_sequence, maxlen=10)
-            # Make predictions
-            prediction = model.predict([resume_data_padded, description_data_padded, common_nouns_data])
-            st.success(f"Your predicted ATS Score is: {prediction[0][0]:.2f}")
         except Exception as e:
             st.error(f"An error occurred: {e}")
     else:

 import streamlit as st
 from tensorflow.keras.models import load_model
 from tensorflow.keras.preprocessing.sequence import pad_sequences
+import os
 # Abbreviations dictionary for job market
 abbreviations = {
     "ops": "operations"
 }
 def ensure_model_installed():
     try:
         spacy.load('en_core_web_sm')
         from spacy.cli import download
         download('en_core_web_sm')
         spacy.load('en_core_web_sm')
 # Ensure the model is installed
 ensure_model_installed()
 def expand_abbreviations(text, abbreviations):
     for abbr, expanded in abbreviations.items():
+        text = re.sub(r'\b{}\b'.format(abbr), expanded, text, flags=re.IGNORECASE)
     return text
 def clean_and_preprocess(text):
 # Define the sector options and their corresponding model and tokenizer paths
 sectors = {
     'HR': {
+        'model': 'modelfile/bighr2.keras',
+        'tokenizer': 'tokernizer/tokenizershr.pkl'
     },
     'IT': {
+        'model': 'modelfile/bigit2.keras',
+        'tokenizer': 'tokernizer/tokenizersit.pkl'
     },
     'Sales': {
+        'model': 'modelfile/bigrsales2.keras',
+        'tokenizer': 'tokernizer/tokenizerssales.pkl'
     },
     'Health': {
+        'model': 'modelfile/bighealth2.keras',
+        'tokenizer': 'tokernizer/tokenizershealth.pkl'
     },
     'Other': {
+        'model': 'modelfile/bigothers2.keras',
+        'tokenizer': 'tokernizer/tokenizersothers.pkl'
     }
 }
 # Sector selection
 sector = st.selectbox("Select Sector:", list(sectors.keys()))
+if st.button("Calculate ATS Score"):
     if resume and job_description:
         try:
             # Load the selected model and tokenizer
             model_path = sectors[sector]['model']
             tokenizer_path = sectors[sector]['tokenizer']
+            if not os.path.isfile(model_path):
+                st.error(f"Model file not found: {model_path}")
+            elif not os.path.isfile(tokenizer_path):
+                st.error(f"Tokenizer file not found: {tokenizer_path}")
+            else:
+                model = load_model(model_path)
+                with open(tokenizer_path, 'rb') as f:
+                    tokenizers = pickle.load(f)
+                resume_tokenizer = tokenizers.get('resume_tokenizer')
+                description_tokenizer = tokenizers.get('description_tokenizer')
+                common_nouns_tokenizer = tokenizers.get('common_nouns_tokenizer')
+                if not (resume_tokenizer and description_tokenizer and common_nouns_tokenizer):
+                    st.error("Tokenizer components are missing from the file.")
+                else:
+                    # Preprocess the resume
+                    processed_resume = clean_and_preprocess(resume)
+                    # Preprocess the job description
+                    processed_description = clean_and_preprocess(job_description)
+                    # Convert to sequences using the resume tokenizer
+                    resume_sequence = resume_tokenizer.texts_to_sequences([processed_resume])
+                    resume_data_padded = pad_sequences(resume_sequence, maxlen=1500)
+                    # Convert to sequences using the description tokenizer
+                    description_sequence = description_tokenizer.texts_to_sequences([processed_description])
+                    description_data_padded = pad_sequences(description_sequence, maxlen=1500)
+                    # Extract common nouns from the resume
+                    common_nouns = set(extract_nouns(processed_resume))
+                    common_nouns_str = ' '.join(common_nouns)
+                    # Convert to sequences using the common nouns tokenizer
+                    common_nouns_sequence = common_nouns_tokenizer.texts_to_sequences([common_nouns_str])
+                    common_nouns_data = pad_sequences(common_nouns_sequence, maxlen=10)
+                    # Make predictions
+                    prediction = model.predict([resume_data_padded, description_data_padded, common_nouns_data])
+                    st.success(f"Your predicted ATS Score is: {prediction[0][0]:.2f}")
         except Exception as e:
             st.error(f"An error occurred: {e}")
     else:

dockerfile CHANGED Viewed

@@ -1,21 +1,27 @@
-# Use a base image with Python installed
 FROM python:3.9-slim
 # Set working directory
 WORKDIR /app
-# Copy requirements file to the container
-COPY requirements.txt .
-# Install Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt
-# Install SpaCy and the en_core_web_sm model
-RUN pip install --no-cache-dir spacy && \
-    python -m spacy download en_core_web_sm
-# Copy the rest of your Streamlit application code to the container
-COPY . .
-# Set the entry point for the application
 CMD ["streamlit", "run", "app.py"]

+# Use a base image with Python
 FROM python:3.9-slim
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
 # Set working directory
 WORKDIR /app
+# Copy requirements file
+COPY requirements.txt /app/
+# Install dependencies
 RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application code
+COPY . /app/
+# Ensure the SpaCy model is installed
+RUN python -c "import spacy; spacy.cli.download('en_core_web_sm')"
+# Expose the port Streamlit will run on
+EXPOSE 8501
+# Run the Streamlit application
 CMD ["streamlit", "run", "app.py"]

requirements.txt CHANGED Viewed

@@ -1,14 +1,9 @@
-# pickle5
-# numpy
 # streamlit
-# tensorflow
-# # spacy==3.5.0
-# # # SpaCy model
-# # https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0.tar.gz
-# spacy
-# https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl
-# pandas
-# pydantic==1.10.7
 spacy==3.5.1
 pydantic==1.10.7

 # streamlit
+# numpy
+# tensorflow==2.14.0
+# spacy==3.5.1
+# pickle5
 spacy==3.5.1
 pydantic==1.10.7