mednow commited on
Commit
c1dbcdd
·
verified ·
1 Parent(s): fd35728

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +47 -40
  2. dockerfile +16 -10
  3. requirements.txt +5 -10
app.py CHANGED
@@ -6,7 +6,7 @@ import string
6
  import streamlit as st
7
  from tensorflow.keras.models import load_model
8
  from tensorflow.keras.preprocessing.sequence import pad_sequences
9
- import os
10
 
11
  # Abbreviations dictionary for job market
12
  abbreviations = {
@@ -102,8 +102,6 @@ abbreviations = {
102
  "ops": "operations"
103
  }
104
 
105
-
106
-
107
  def ensure_model_installed():
108
  try:
109
  spacy.load('en_core_web_sm')
@@ -111,6 +109,7 @@ def ensure_model_installed():
111
  from spacy.cli import download
112
  download('en_core_web_sm')
113
  spacy.load('en_core_web_sm')
 
114
  # Ensure the model is installed
115
  ensure_model_installed()
116
 
@@ -118,7 +117,7 @@ nlp = spacy.load("en_core_web_sm")
118
 
119
  def expand_abbreviations(text, abbreviations):
120
  for abbr, expanded in abbreviations.items():
121
- text = re.sub(r'\b{}\b'.format(abbr), expanded, text)
122
  return text
123
 
124
  def clean_and_preprocess(text):
@@ -139,24 +138,24 @@ def extract_nouns(text):
139
  # Define the sector options and their corresponding model and tokenizer paths
140
  sectors = {
141
  'HR': {
142
- 'model': r'modelfile\bighr2.keras',
143
- 'tokenizer': r'tokernizer\tokenizershr.pkl'
144
  },
145
  'IT': {
146
- 'model': r'modelfile\bigit2.keras',
147
- 'tokenizer': r'tokernizer\tokenizersit.pkl'
148
  },
149
  'Sales': {
150
- 'model': r'modelfile\bigrsales2.keras',
151
- 'tokenizer': r'tokernizer\tokenizerssales.pkl'
152
  },
153
  'Health': {
154
- 'model': r'modelfile\bighealth2.keras',
155
- 'tokenizer': r'tokernizer\tokenizershealth.pkl'
156
  },
157
  'Other': {
158
- 'model': r'modelfile\bigothers2.keras',
159
- 'tokenizer': r'tokernizer\tokenizersothers.pkl'
160
  }
161
  }
162
 
@@ -174,48 +173,56 @@ job_description = st.text_area("Paste Job Description:", height=150)
174
  # Sector selection
175
  sector = st.selectbox("Select Sector:", list(sectors.keys()))
176
 
177
- if st.button("Calculate ATS SCore"):
178
  if resume and job_description:
179
  try:
180
  # Load the selected model and tokenizer
181
  model_path = sectors[sector]['model']
182
  tokenizer_path = sectors[sector]['tokenizer']
183
 
184
- model = load_model(model_path)
 
 
 
 
 
185
 
186
- with open(tokenizer_path, 'rb') as f:
187
- tokenizers = pickle.load(f)
188
 
189
- resume_tokenizer = tokenizers['resume_tokenizer']
190
- description_tokenizer = tokenizers['description_tokenizer']
191
- common_nouns_tokenizer = tokenizers['common_nouns_tokenizer']
192
 
193
- # Preprocess the resume
194
- processed_resume = clean_and_preprocess(resume)
 
 
 
195
 
196
- # Preprocess the job description
197
- processed_description = clean_and_preprocess(job_description)
198
 
199
- # Convert to sequences using the resume tokenizer
200
- resume_sequence = resume_tokenizer.texts_to_sequences([processed_resume])
201
- resume_data_padded = pad_sequences(resume_sequence, maxlen=1500)
202
 
203
- # Convert to sequences using the description tokenizer
204
- description_sequence = description_tokenizer.texts_to_sequences([processed_description])
205
- description_data_padded = pad_sequences(description_sequence, maxlen=1500)
206
 
207
- # Extract common nouns from the resume
208
- common_nouns = set(extract_nouns(processed_resume))
209
- common_nouns_str = ' '.join(common_nouns)
210
 
211
- # Convert to sequences using the common nouns tokenizer
212
- common_nouns_sequence = common_nouns_tokenizer.texts_to_sequences([common_nouns_str])
213
- common_nouns_data = pad_sequences(common_nouns_sequence, maxlen=10)
214
 
215
- # Make predictions
216
- prediction = model.predict([resume_data_padded, description_data_padded, common_nouns_data])
217
 
218
- st.success(f"Your predicted ATS Score is: {prediction[0][0]:.2f}")
219
  except Exception as e:
220
  st.error(f"An error occurred: {e}")
221
  else:
 
6
  import streamlit as st
7
  from tensorflow.keras.models import load_model
8
  from tensorflow.keras.preprocessing.sequence import pad_sequences
9
+ import os
10
 
11
  # Abbreviations dictionary for job market
12
  abbreviations = {
 
102
  "ops": "operations"
103
  }
104
 
 
 
105
  def ensure_model_installed():
106
  try:
107
  spacy.load('en_core_web_sm')
 
109
  from spacy.cli import download
110
  download('en_core_web_sm')
111
  spacy.load('en_core_web_sm')
112
+
113
  # Ensure the model is installed
114
  ensure_model_installed()
115
 
 
117
 
118
  def expand_abbreviations(text, abbreviations):
119
  for abbr, expanded in abbreviations.items():
120
+ text = re.sub(r'\b{}\b'.format(abbr), expanded, text, flags=re.IGNORECASE)
121
  return text
122
 
123
  def clean_and_preprocess(text):
 
138
  # Define the sector options and their corresponding model and tokenizer paths
139
  sectors = {
140
  'HR': {
141
+ 'model': 'modelfile/bighr2.keras',
142
+ 'tokenizer': 'tokernizer/tokenizershr.pkl'
143
  },
144
  'IT': {
145
+ 'model': 'modelfile/bigit2.keras',
146
+ 'tokenizer': 'tokernizer/tokenizersit.pkl'
147
  },
148
  'Sales': {
149
+ 'model': 'modelfile/bigrsales2.keras',
150
+ 'tokenizer': 'tokernizer/tokenizerssales.pkl'
151
  },
152
  'Health': {
153
+ 'model': 'modelfile/bighealth2.keras',
154
+ 'tokenizer': 'tokernizer/tokenizershealth.pkl'
155
  },
156
  'Other': {
157
+ 'model': 'modelfile/bigothers2.keras',
158
+ 'tokenizer': 'tokernizer/tokenizersothers.pkl'
159
  }
160
  }
161
 
 
173
  # Sector selection
174
  sector = st.selectbox("Select Sector:", list(sectors.keys()))
175
 
176
+ if st.button("Calculate ATS Score"):
177
  if resume and job_description:
178
  try:
179
  # Load the selected model and tokenizer
180
  model_path = sectors[sector]['model']
181
  tokenizer_path = sectors[sector]['tokenizer']
182
 
183
+ if not os.path.isfile(model_path):
184
+ st.error(f"Model file not found: {model_path}")
185
+ elif not os.path.isfile(tokenizer_path):
186
+ st.error(f"Tokenizer file not found: {tokenizer_path}")
187
+ else:
188
+ model = load_model(model_path)
189
 
190
+ with open(tokenizer_path, 'rb') as f:
191
+ tokenizers = pickle.load(f)
192
 
193
+ resume_tokenizer = tokenizers.get('resume_tokenizer')
194
+ description_tokenizer = tokenizers.get('description_tokenizer')
195
+ common_nouns_tokenizer = tokenizers.get('common_nouns_tokenizer')
196
 
197
+ if not (resume_tokenizer and description_tokenizer and common_nouns_tokenizer):
198
+ st.error("Tokenizer components are missing from the file.")
199
+ else:
200
+ # Preprocess the resume
201
+ processed_resume = clean_and_preprocess(resume)
202
 
203
+ # Preprocess the job description
204
+ processed_description = clean_and_preprocess(job_description)
205
 
206
+ # Convert to sequences using the resume tokenizer
207
+ resume_sequence = resume_tokenizer.texts_to_sequences([processed_resume])
208
+ resume_data_padded = pad_sequences(resume_sequence, maxlen=1500)
209
 
210
+ # Convert to sequences using the description tokenizer
211
+ description_sequence = description_tokenizer.texts_to_sequences([processed_description])
212
+ description_data_padded = pad_sequences(description_sequence, maxlen=1500)
213
 
214
+ # Extract common nouns from the resume
215
+ common_nouns = set(extract_nouns(processed_resume))
216
+ common_nouns_str = ' '.join(common_nouns)
217
 
218
+ # Convert to sequences using the common nouns tokenizer
219
+ common_nouns_sequence = common_nouns_tokenizer.texts_to_sequences([common_nouns_str])
220
+ common_nouns_data = pad_sequences(common_nouns_sequence, maxlen=10)
221
 
222
+ # Make predictions
223
+ prediction = model.predict([resume_data_padded, description_data_padded, common_nouns_data])
224
 
225
+ st.success(f"Your predicted ATS Score is: {prediction[0][0]:.2f}")
226
  except Exception as e:
227
  st.error(f"An error occurred: {e}")
228
  else:
dockerfile CHANGED
@@ -1,21 +1,27 @@
1
- # Use a base image with Python installed
2
  FROM python:3.9-slim
3
 
 
 
 
 
4
  # Set working directory
5
  WORKDIR /app
6
 
7
- # Copy requirements file to the container
8
- COPY requirements.txt .
9
 
10
- # Install Python dependencies
11
  RUN pip install --no-cache-dir -r requirements.txt
12
 
13
- # Install SpaCy and the en_core_web_sm model
14
- RUN pip install --no-cache-dir spacy && \
15
- python -m spacy download en_core_web_sm
 
 
16
 
17
- # Copy the rest of your Streamlit application code to the container
18
- COPY . .
19
 
20
- # Set the entry point for the application
21
  CMD ["streamlit", "run", "app.py"]
 
1
+ # Use a base image with Python
2
  FROM python:3.9-slim
3
 
4
+ # Set environment variables
5
+ ENV PYTHONDONTWRITEBYTECODE=1
6
+ ENV PYTHONUNBUFFERED=1
7
+
8
  # Set working directory
9
  WORKDIR /app
10
 
11
+ # Copy requirements file
12
+ COPY requirements.txt /app/
13
 
14
+ # Install dependencies
15
  RUN pip install --no-cache-dir -r requirements.txt
16
 
17
+ # Copy the rest of the application code
18
+ COPY . /app/
19
+
20
+ # Ensure the SpaCy model is installed
21
+ RUN python -c "import spacy; spacy.cli.download('en_core_web_sm')"
22
 
23
+ # Expose the port Streamlit will run on
24
+ EXPOSE 8501
25
 
26
+ # Run the Streamlit application
27
  CMD ["streamlit", "run", "app.py"]
requirements.txt CHANGED
@@ -1,14 +1,9 @@
1
- # pickle5
2
- # numpy
3
  # streamlit
4
- # tensorflow
5
- # # spacy==3.5.0
6
- # # # SpaCy model
7
- # # https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0.tar.gz
8
- # spacy
9
- # https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl
10
- # pandas
11
- # pydantic==1.10.7
12
 
13
  spacy==3.5.1
14
  pydantic==1.10.7
 
 
 
1
  # streamlit
2
+ # numpy
3
+ # tensorflow==2.14.0
4
+ # spacy==3.5.1
5
+ # pickle5
6
+
 
 
 
7
 
8
  spacy==3.5.1
9
  pydantic==1.10.7