Bur3hani commited on
Commit
5abbb5e
·
verified ·
1 Parent(s): 15d4fe0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -156
app.py CHANGED
@@ -126,159 +126,4 @@ def get_text_embeddings(text):
126
  else: return np.mean([token.vector for token in doc if token.has_vector], axis=0) if [token.vector for token in doc if token.has_vector] else np.zeros(nlp.vocab.vectors.shape[1])
127
 
128
  def calculate_cosine_similarity(vec1, vec2):
129
- if np.all(vec1 == 0) or np.all(vec2 == 0): return 0.0
130
- vec1 = vec1.reshape(1, -1)
131
- vec2 = vec2.reshape(1, -1)
132
- return cosine_similarity(vec1, vec2)[0][0]
133
-
134
- # --- Main Processing Pipeline ---
135
- def analyze_document(doc_text):
136
- doc_spacy = nlp(doc_text)
137
- cleaned_text = preprocess_text(doc_text)
138
- extracted_skills, general_entities = extract_skills(doc_spacy, skill_keywords=predefined_skills_list)
139
- years_exp, education_level = extract_experience_and_education(doc_text)
140
- text_embedding = get_text_embeddings(cleaned_text)
141
- return {
142
- "raw_text": doc_text, "cleaned_text": cleaned_text, "spacy_doc": doc_spacy,
143
- "extracted_skills": extracted_skills, "general_entities": general_entities,
144
- "years_experience": years_exp, "education_level": education_level,
145
- "text_embedding": text_embedding
146
- }
147
-
148
- # --- Matching and Scoring Logic ---
149
- def calculate_match_scores(cv_data, jd_data):
150
- results = {}
151
- overall_similarity = calculate_cosine_similarity(cv_data["text_embedding"], jd_data["text_embedding"])
152
- results["overall_match_score"] = round(overall_similarity * 100, 2)
153
- cv_skills = set(cv_data["extracted_skills"])
154
- jd_skills = set(jd_data["extracted_skills"])
155
- matched_skills = list(cv_skills.intersection(jd_skills))
156
- missing_skills = list(jd_skills.difference(cv_skills))
157
- extra_skills_in_cv = list(cv_skills.difference(jd_skills))
158
- results["matched_skills"] = matched_skills
159
- results["missing_skills"] = missing_skills
160
- results["extra_skills_in_cv"] = extra_skills_in_cv
161
- if jd_skills: skill_match_percentage = len(matched_skills) / len(jd_skills) * 100
162
- else: skill_match_percentage = 0.0
163
- results["skill_match_percentage"] = round(skill_match_percentage, 2)
164
- corpus = [cv_data["cleaned_text"], jd_data["cleaned_text"]]
165
- tfidf_vectorizer = TfidfVectorizer(max_features=100)
166
- tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)
167
- feature_names = tfidf_vectorizer.get_feature_names_out()
168
- cv_tfidf_scores = {feature_names[i]: tfidf_matrix[0, i] for i in tfidf_matrix[0].nonzero()[1]}
169
- jd_tfidf_scores = {feature_names[i]: tfidf_matrix[1, i] for i in tfidf_matrix[1].nonzero()[1]}
170
- top_cv_keywords = sorted(cv_tfidf_scores.items(), key=lambda x: x[1], reverse=True)[:15]
171
- top_jd_keywords = sorted(jd_tfidf_scores.items(), key=lambda x: x[1], reverse=True)[:15]
172
- results["top_cv_keywords"] = [k for k,v in top_cv_keywords]
173
- results["top_jd_keywords"] = [k for k,v in top_jd_keywords]
174
- common_keywords = set(results["top_cv_keywords"]).intersection(set(results["top_jd_keywords"]))
175
- results["common_keywords"] = list(common_keywords)
176
- cv_exp_years = cv_data["years_experience"]
177
- jd_exp_years = jd_data["years_experience"]
178
- results["cv_years_experience"] = cv_exp_years
179
- results["jd_years_experience"] = jd_exp_years
180
- exp_status = "Not specified by Job"
181
- if jd_exp_years > 0:
182
- if cv_exp_years >= jd_exp_years: exp_status = "Meets or Exceeds Requirement"
183
- else: exp_status = f"Below Requirement (Needs {jd_exp_years - cv_exp_years} more years)"
184
- results["experience_match_status"] = exp_status
185
- cv_edu = cv_data["education_level"]
186
- jd_edu = jd_data["education_level"]
187
- results["cv_education_level"] = cv_edu
188
- results["jd_education_level"] = jd_edu
189
- edu_match_status = "Not Specified by Job"
190
- if jd_edu != "Not Specified":
191
- edu_order = {"Associate's": 1, "Bachelor's": 2, "Master's": 3, "Ph.D.": 4}
192
- if edu_order.get(cv_edu, 0) >= edu_order.get(jd_edu, 0): edu_match_status = "Meets or Exceeds Requirement"
193
- else: edu_match_status = "Below Requirement"
194
- results["education_match_status"] = edu_match_status
195
- return results
196
-
197
- # --- Overall Analysis Orchestrator ---
198
- def perform_cv_job_analysis(cv_text, job_desc_text):
199
- cv_analysis_data = analyze_document(cv_text)
200
- job_desc_analysis_data = analyze_document(job_desc_text)
201
- match_results = calculate_match_scores(cv_analysis_data, job_desc_analysis_data)
202
- return match_results
203
-
204
- # --- Visualization Functions ---
205
- def create_overall_match_plot(score):
206
- fig, ax = plt.subplots(figsize=(6, 2))
207
- sns.set_style("whitegrid")
208
- ax.barh(["Overall Match"], [score], color='skyblue')
209
- ax.set_xlim(0, 100)
210
- ax.text(score + 2, 0, f'{score}%', va='center', color='black', fontsize=12)
211
- ax.set_title("Overall CV-Job Description Match Score", fontsize=14)
212
- ax.set_xlabel("Match Percentage", fontsize=12)
213
- ax.get_yaxis().set_visible(False)
214
- plt.tight_layout()
215
- return fig
216
-
217
- def create_skill_match_plot(matched_skills, missing_skills):
218
- labels = ['Matched Skills', 'Missing Skills']
219
- sizes = [len(matched_skills), len(missing_skills)]
220
- colors = ['#66b3ff', '#ff9999']
221
- explode = (0.05, 0.05) if sizes[0] > 0 and sizes[1] > 0 else (0,0)
222
- if sum(sizes) == 0: return None
223
- fig, ax = plt.subplots(figsize=(7, 7))
224
- ax.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=90, textprops={'fontsize': 12})
225
- ax.axis('equal')
226
- ax.set_title("Skill Match Breakdown", fontsize=14)
227
- plt.tight_layout()
228
- return fig
229
-
230
- def create_top_keywords_plot(cv_keywords, jd_keywords):
231
- fig, axes = plt.subplots(1, 2, figsize=(16, 6))
232
- sns.set_style("whitegrid")
233
- cv_df = pd.DataFrame(Counter(cv_keywords).most_common(10), columns=['Keyword', 'Count'])
234
- if not cv_df.empty:
235
- sns.barplot(x='Count', y='Keyword', data=cv_df, ax=axes[0], palette='viridis')
236
- axes[0].set_title('Top Keywords in CV', fontsize=14)
237
- axes[0].set_xlabel('Frequency/Importance', fontsize=12)
238
- axes[0].set_ylabel('')
239
- jd_df = pd.DataFrame(Counter(jd_keywords).most_common(10), columns=['Keyword', 'Count'])
240
- if not jd_df.empty:
241
- sns.barplot(x='Count', y='Keyword', data=jd_df, ax=axes[1], palette='plasma')
242
- axes[1].set_title('Top Keywords in Job Description', fontsize=14)
243
- axes[1].set_xlabel('Frequency/Importance', fontsize=12)
244
- axes[1].set_ylabel('')
245
- plt.tight_layout()
246
- return fig
247
-
248
- # --- Main Gradio Interface Function ---
249
- def analyze_cv_match(cv_file_obj, cv_text_input, jd_text_input):
250
- cv_content = ""
251
- if cv_file_obj is not None:
252
- cv_content = get_file_content(cv_file_obj)
253
- elif cv_text_input:
254
- cv_content = cv_text_input
255
- if not cv_content:
256
- return (f"<h4><p style='color:red;'>🚨 Error: Please upload a CV file or paste your CV text.</p></h4>",
257
- None, None, None, "Analysis Failed")
258
- if not jd_text_input:
259
- return (f"<h4><p style='color:red;'>🚨 Error: Please paste the Job Description text.</p></h4>",
260
- None, None, None, "Analysis Failed")
261
- try:
262
- analysis_results = perform_cv_job_analysis(cv_content, jd_text_input)
263
- html_output = f"""
264
- <h2 style='text-align: center;'>💡 Analysis Results Summary 💡</h2>
265
- <div style='display: flex; justify-content: space-around; flex-wrap: wrap; text-align: center; margin-bottom: 20px;'>
266
- <div style='background-color: #e0f7fa; padding: 15px; border-radius: 8px; margin: 5px; min-width: 200px; box-shadow: 2px 2px 5px rgba(0,0,0,0.1);'>
267
- <h3>Overall Match Score</h3>
268
- <h1 style='color: #007bb6;'>{analysis_results['overall_match_score']}%</h1>
269
- </div>
270
- <div style='background-color: #e8f5e9; padding: 15px; border-radius: 8px; margin: 5px; min-width: 200px; box-shadow: 2px 2px 5px rgba(0,0,0,0.1);'>
271
- <h3>Skill Match</h3>
272
- <h1 style='color: #43a047;'>{analysis_results['skill_match_percentage']}%</h1>
273
- </div>
274
- <div style='background-color: #fff3e0; padding: 15px; border-radius: 8px; margin: 5px; min-width: 200px; box-shadow: 2px 2px 5px rgba(0,0,0,0.1);'>
275
- <h3>Experience Match</h3>
276
- <h1 style='color: #fb8c00;'>{analysis_results['experience_match_status']}</h1>
277
- </div>
278
- </div>
279
- <hr style='border-top: 2px solid #bbb; margin: 20px 0;'/>
280
- <h2 style='text-align: center;'>📝 Detailed Breakdown</h2>
281
- <h4>Skills Analysis</h4>
282
- <p><strong>✅ Matched Skills:</strong> {', '.join(analysis_results['matched_skills']) if analysis_results['matched_skills'] else 'None found matching job description.'}</p>
283
- <p><strong>❌ Missing Skills (from Job Description):</strong> {', '.join(analysis_results['missing_skills']) if analysis_results['missing_skills'] else '🥳 None! Your CV has all specified skills.'}</p>
284
- <p><strong>💡 Extra Skills in CV (not in Job Description):</strong> {', '.join(
 
126
  else: return np.mean([token.vector for token in doc if token.has_vector], axis=0) if [token.vector for token in doc if token.has_vector] else np.zeros(nlp.vocab.vectors.shape[1])
127
 
128
  def calculate_cosine_similarity(vec1, vec2):
129
+ if np