Bur3hani commited on
Commit
95f078d
Β·
verified Β·
1 Parent(s): 5abbb5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +230 -1
app.py CHANGED
@@ -126,4 +126,233 @@ def get_text_embeddings(text):
126
  else: return np.mean([token.vector for token in doc if token.has_vector], axis=0) if [token.vector for token in doc if token.has_vector] else np.zeros(nlp.vocab.vectors.shape[1])
127
 
128
  def calculate_cosine_similarity(vec1, vec2):
129
- if np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  else: return np.mean([token.vector for token in doc if token.has_vector], axis=0) if [token.vector for token in doc if token.has_vector] else np.zeros(nlp.vocab.vectors.shape[1])
127
 
128
  def calculate_cosine_similarity(vec1, vec2):
129
+ # This check prevents errors if one of the vectors is all zeros.
130
+ if np.all(vec1 == 0) or np.all(vec2 == 0):
131
+ return 0.0
132
+ vec1 = vec1.reshape(1, -1)
133
+ vec2 = vec2.reshape(1, -1)
134
+ return cosine_similarity(vec1, vec2)[0][0]
135
+
136
+ # --- Main Processing Pipeline ---
137
+ def analyze_document(doc_text):
138
+ doc_spacy = nlp(doc_text)
139
+ cleaned_text = preprocess_text(doc_text)
140
+ extracted_skills, general_entities = extract_skills(doc_spacy, skill_keywords=predefined_skills_list)
141
+ years_exp, education_level = extract_experience_and_education(doc_text)
142
+ text_embedding = get_text_embeddings(cleaned_text)
143
+ return {
144
+ "raw_text": doc_text, "cleaned_text": cleaned_text, "spacy_doc": doc_spacy,
145
+ "extracted_skills": extracted_skills, "general_entities": general_entities,
146
+ "years_experience": years_exp, "education_level": education_level,
147
+ "text_embedding": text_embedding
148
+ }
149
+
150
+ # --- Matching and Scoring Logic ---
151
+ def calculate_match_scores(cv_data, jd_data):
152
+ results = {}
153
+ overall_similarity = calculate_cosine_similarity(cv_data["text_embedding"], jd_data["text_embedding"])
154
+ results["overall_match_score"] = round(overall_similarity * 100, 2)
155
+ cv_skills = set(cv_data["extracted_skills"])
156
+ jd_skills = set(jd_data["extracted_skills"])
157
+ matched_skills = list(cv_skills.intersection(jd_skills))
158
+ missing_skills = list(jd_skills.difference(cv_skills))
159
+ extra_skills_in_cv = list(cv_skills.difference(jd_skills))
160
+ results["matched_skills"] = matched_skills
161
+ results["missing_skills"] = missing_skills
162
+ results["extra_skills_in_cv"] = extra_skills_in_cv
163
+ if jd_skills: skill_match_percentage = len(matched_skills) / len(jd_skills) * 100
164
+ else: skill_match_percentage = 0.0
165
+ results["skill_match_percentage"] = round(skill_match_percentage, 2)
166
+ corpus = [cv_data["cleaned_text"], jd_data["cleaned_text"]]
167
+ tfidf_vectorizer = TfidfVectorizer(max_features=100)
168
+ tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)
169
+ feature_names = tfidf_vectorizer.get_feature_names_out()
170
+ cv_tfidf_scores = {feature_names[i]: tfidf_matrix[0, i] for i in tfidf_matrix[0].nonzero()[1]}
171
+ jd_tfidf_scores = {feature_names[i]: tfidf_matrix[1, i] for i in tfidf_matrix[1].nonzero()[1]}
172
+ top_cv_keywords = sorted(cv_tfidf_scores.items(), key=lambda x: x[1], reverse=True)[:15]
173
+ top_jd_keywords = sorted(jd_tfidf_scores.items(), key=lambda x: x[1], reverse=True)[:15]
174
+ results["top_cv_keywords"] = [k for k,v in top_cv_keywords]
175
+ results["top_jd_keywords"] = [k for k,v in top_jd_keywords]
176
+ common_keywords = set(results["top_cv_keywords"]).intersection(set(results["top_jd_keywords"]))
177
+ results["common_keywords"] = list(common_keywords)
178
+ cv_exp_years = cv_data["years_experience"]
179
+ jd_exp_years = jd_data["years_experience"]
180
+ results["cv_years_experience"] = cv_exp_years
181
+ results["jd_years_experience"] = jd_exp_years
182
+ exp_status = "Not specified by Job"
183
+ if jd_exp_years > 0:
184
+ if cv_exp_years >= jd_exp_years: exp_status = "Meets or Exceeds Requirement"
185
+ else: exp_status = f"Below Requirement (Needs {jd_exp_years - cv_exp_years} more years)"
186
+ results["experience_match_status"] = exp_status
187
+ cv_edu = cv_data["education_level"]
188
+ jd_edu = jd_data["education_level"]
189
+ results["cv_education_level"] = cv_edu
190
+ results["jd_education_level"] = jd_edu
191
+ edu_match_status = "Not Specified by Job"
192
+ if jd_edu != "Not Specified":
193
+ edu_order = {"Associate's": 1, "Bachelor's": 2, "Master's": 3, "Ph.D.": 4}
194
+ if edu_order.get(cv_edu, 0) >= edu_order.get(jd_edu, 0): edu_match_status = "Meets or Exceeds Requirement"
195
+ else: edu_match_status = "Below Requirement"
196
+ results["education_match_status"] = edu_match_status
197
+ return results
198
+
199
+ # --- Overall Analysis Orchestrator ---
200
+ def perform_cv_job_analysis(cv_text, job_desc_text):
201
+ cv_analysis_data = analyze_document(cv_text)
202
+ job_desc_analysis_data = analyze_document(job_desc_text)
203
+ match_results = calculate_match_scores(cv_analysis_data, job_desc_analysis_data)
204
+ return match_results
205
+
206
+ # --- Visualization Functions ---
207
+ def create_overall_match_plot(score):
208
+ fig, ax = plt.subplots(figsize=(6, 2))
209
+ sns.set_style("whitegrid")
210
+ ax.barh(["Overall Match"], [score], color='skyblue')
211
+ ax.set_xlim(0, 100)
212
+ ax.text(score + 2, 0, f'{score}%', va='center', color='black', fontsize=12)
213
+ ax.set_title("Overall CV-Job Description Match Score", fontsize=14)
214
+ ax.set_xlabel("Match Percentage", fontsize=12)
215
+ ax.get_yaxis().set_visible(False)
216
+ plt.tight_layout()
217
+ return fig
218
+
219
+ def create_skill_match_plot(matched_skills, missing_skills):
220
+ labels = ['Matched Skills', 'Missing Skills']
221
+ sizes = [len(matched_skills), len(missing_skills)]
222
+ colors = ['#66b3ff', '#ff9999']
223
+ explode = (0.05, 0.05) if sizes[0] > 0 and sizes[1] > 0 else (0,0)
224
+ if sum(sizes) == 0: return None
225
+ fig, ax = plt.subplots(figsize=(7, 7))
226
+ ax.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=90, textprops={'fontsize': 12})
227
+ ax.axis('equal')
228
+ ax.set_title("Skill Match Breakdown", fontsize=14)
229
+ plt.tight_layout()
230
+ return fig
231
+
232
+ def create_top_keywords_plot(cv_keywords, jd_keywords):
233
+ fig, axes = plt.subplots(1, 2, figsize=(16, 6))
234
+ sns.set_style("whitegrid")
235
+ cv_df = pd.DataFrame(Counter(cv_keywords).most_common(10), columns=['Keyword', 'Count'])
236
+ if not cv_df.empty:
237
+ sns.barplot(x='Count', y='Keyword', data=cv_df, ax=axes[0], palette='viridis')
238
+ axes[0].set_title('Top Keywords in CV', fontsize=14)
239
+ axes[0].set_xlabel('Frequency/Importance', fontsize=12)
240
+ axes[0].set_ylabel('')
241
+ jd_df = pd.DataFrame(Counter(jd_keywords).most_common(10), columns=['Keyword', 'Count'])
242
+ if not jd_df.empty:
243
+ sns.barplot(x='Count', y='Keyword', data=jd_df, ax=axes[1], palette='plasma')
244
+ axes[1].set_title('Top Keywords in Job Description', fontsize=14)
245
+ axes[1].set_xlabel('Frequency/Importance', fontsize=12)
246
+ axes[1].set_ylabel('')
247
+ plt.tight_layout()
248
+ return fig
249
+
250
+ # --- Main Gradio Interface Function ---
251
+ def analyze_cv_match(cv_file_obj, cv_text_input, jd_text_input):
252
+ cv_content = ""
253
+ if cv_file_obj is not None:
254
+ cv_content = get_file_content(cv_file_obj)
255
+ elif cv_text_input:
256
+ cv_content = cv_text_input
257
+ if not cv_content:
258
+ return (f"<h4><p style='color:red;'>🚨 Error: Please upload a CV file or paste your CV text.</p></h4>",
259
+ None, None, None, "Analysis Failed")
260
+ if not jd_text_input:
261
+ return (f"<h4><p style='color:red;'>🚨 Error: Please paste the Job Description text.</p></h4>",
262
+ None, None, None, "Analysis Failed")
263
+ try:
264
+ analysis_results = perform_cv_job_analysis(cv_content, jd_text_input)
265
+
266
+ # Using helper variables for clarity and to prevent syntax issues in f-strings
267
+ matched_skills_str = ', '.join(analysis_results['matched_skills']) if analysis_results['matched_skills'] else 'None found matching job description.'
268
+ missing_skills_str = ', '.join(analysis_results['missing_skills']) if analysis_results['missing_skills'] else 'πŸ₯³ None! Your CV has all specified skills.'
269
+ extra_skills_str = ', '.join(analysis_results['extra_skills_in_cv']) if analysis_results['extra_skills_in_cv'] else 'None. (This is often fine, showing broader capability.)'
270
+ common_keywords_str = ', '.join(analysis_results['common_keywords']) if analysis_results['common_keywords'] else 'No significant common keywords beyond skills.'
271
+ cv_keywords_str = ', '.join(analysis_results['top_cv_keywords']) if analysis_results['top_cv_keywords'] else 'N/A'
272
+ jd_keywords_str = ', '.join(analysis_results['top_jd_keywords']) if analysis_results['top_jd_keywords'] else 'N/A'
273
+
274
+ # Build the HTML string piece by piece
275
+ html_output = f"""
276
+ <h2 style='text-align: center;'>πŸ’‘ Analysis Results Summary πŸ’‘</h2>
277
+ <div style='display: flex; justify-content: space-around; flex-wrap: wrap; text-align: center; margin-bottom: 20px;'>
278
+ <div style='background-color: #e0f7fa; padding: 15px; border-radius: 8px; margin: 5px; min-width: 200px; box-shadow: 2px 2px 5px rgba(0,0,0,0.1);'>
279
+ <h3>Overall Match Score</h3>
280
+ <h1 style='color: #007bb6;'>{analysis_results['overall_match_score']}%</h1>
281
+ </div>
282
+ <div style='background-color: #e8f5e9; padding: 15px; border-radius: 8px; margin: 5px; min-width: 200px; box-shadow: 2px 2px 5px rgba(0,0,0,0.1);'>
283
+ <h3>Skill Match</h3>
284
+ <h1 style='color: #43a047;'>{analysis_results['skill_match_percentage']}%</h1>
285
+ </div>
286
+ <div style='background-color: #fff3e0; padding: 15px; border-radius: 8px; margin: 5px; min-width: 200px; box-shadow: 2px 2px 5px rgba(0,0,0,0.1);'>
287
+ <h3>Experience Match</h3>
288
+ <h1 style='color: #fb8c00;'>{analysis_results['experience_match_status']}</h1>
289
+ </div>
290
+ </div>
291
+ <hr style='border-top: 2px solid #bbb; margin: 20px 0;'/>
292
+ <h2 style='text-align: center;'>πŸ“ Detailed Breakdown</h2>
293
+ <h4>Skills Analysis</h4>
294
+ <p><strong>βœ… Matched Skills:</strong> {matched_skills_str}</p>
295
+ <p><strong>❌ Missing Skills (from Job Description):</strong> {missing_skills_str}</p>
296
+ <p><strong>πŸ’‘ Extra Skills in CV (not in Job Description):</strong> {extra_skills_str}</p>
297
+ <h4>Keyword Relevance (Top TF-IDF Terms)</h4>
298
+ <p><strong>🀝 Top Common Keywords:</strong> {common_keywords_str}</p>
299
+ <p><strong>πŸ” Top Keywords in Your CV:</strong> {cv_keywords_str}</p>
300
+ <p><strong>🎯 Top Keywords in Job Description:</strong> {jd_keywords_str}</p>
301
+ <h4>Experience & Education Comparison</h4>
302
+ <p><strong>πŸ‘€ Your CV's Experience:</strong> <code>{analysis_results['cv_years_experience']}</code> years</p>
303
+ <p><strong>πŸ’Ό Job's Required Experience:</strong> <code>{analysis_results['jd_years_experience']}</code> years</p>
304
+ <p style='color:green;'><strong>Status:</strong> {analysis_results['experience_match_status']}</p>
305
+ <p><strong>πŸŽ“ Your CV's Education:</strong> <code>{analysis_results['cv_education_level']}</code></p>
306
+ <p><strong>πŸ“š Job's Required Education:</strong> <code>{analysis_results['jd_education_level']}</code></p>
307
+ <p style='color:green;'><strong>Status:</strong> {analysis_results['education_match_status']}</p>
308
+ """
309
+
310
+ # Generate plots
311
+ overall_plot = create_overall_match_plot(analysis_results['overall_match_score'])
312
+ skill_plot = create_skill_match_plot(analysis_results['matched_skills'], analysis_results['missing_skills'])
313
+ keywords_plot = create_top_keywords_plot(analysis_results['top_cv_keywords'], analysis_results['top_jd_keywords'])
314
+ return html_output, overall_plot, skill_plot, keywords_plot, "Analysis Complete!"
315
+ except Exception as e:
316
+ import traceback
317
+ error_traceback = traceback.format_exc()
318
+ return (f"<h4><p style='color:red;'>An unexpected error occurred during analysis: {e}</p></h4>"
319
+ f"<details><summary>Click for details</summary><pre>{error_traceback}</pre></details>",
320
+ None, None, None, "Analysis Failed")
321
+
322
+ # --- Gradio Interface Definition ---
323
+ with gr.Blocks(theme=gr.themes.Soft(), title="CV-Job Match Analyzer") as demo:
324
+
325
+ # Cleaner fix: Add CSS padding to the top of the app to push it down
326
+ gr.HTML("<style>#root{padding-top: 50px !important;}</style>")
327
+
328
+ gr.Markdown(
329
+ """
330
+ # πŸ‘¨β€πŸ’Ό CV-Job Match Analyzer πŸ“ˆ
331
+ Welcome! This tool helps you understand how well a CV matches a job description.
332
+ Upload a CV (PDF, DOCX, TXT) and paste the job description text to get an instant analysis.
333
+ """
334
+ )
335
+ with gr.Row():
336
+ with gr.Column(scale=1, min_width=400):
337
+ gr.Markdown("## **1. Your CV**")
338
+ cv_file_obj = gr.File(label="Upload CV (PDF, DOCX, TXT)", file_types=[".pdf", ".docx", ".txt"])
339
+ cv_text_input = gr.Textbox(label="Or paste CV text here (overrides file upload)", lines=10, placeholder="Paste your CV content here...")
340
+ gr.Markdown("## **2. Job Description**")
341
+ jd_text_input = gr.Textbox(label="Paste the Job Description text here", lines=10, placeholder="Paste the job description content here...")
342
+ with gr.Row():
343
+ analyze_button = gr.Button("✨ Analyze CV Match ✨", variant="primary", scale=1)
344
+ clear_button = gr.ClearButton([cv_file_obj, cv_text_input, jd_text_input], scale=1)
345
+ with gr.Column(scale=2, min_width=600):
346
+ output_html = gr.HTML(label="Analysis Report")
347
+ gr.Markdown("## **πŸ“Š Visual Insights**")
348
+ output_overall_plot = gr.Plot(label="Overall Match Score")
349
+ output_skill_plot = gr.Plot(label="Skill Match Breakdown")
350
+ output_keywords_plot = gr.Plot(label="Top Keywords")
351
+
352
+ analyze_button.click(
353
+ fn=analyze_cv_match,
354
+ inputs=[cv_file_obj, cv_text_input, jd_text_input],
355
+ outputs=[output_html, output_overall_plot, output_skill_plot, output_keywords_plot, gr.State(value="")],
356
+ )
357
+
358
+ demo.launch()