zlf18 commited on
Commit
b6481eb
·
verified ·
1 Parent(s): ac17735

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -91
app.py CHANGED
@@ -61,6 +61,31 @@ FINETUNED_MODEL_ID = "its-zion-18/projfinetuned"
61
  KNOWN_WORDS = set()
62
 
63
  # --- CORE NLP & HELPER FUNCTIONS ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  def _norm_skill_token(s: str) -> str:
65
  s = s.lower().strip()
66
  s = re.sub(r'[\(\)\[\]\{\}\*]', '', s)
@@ -196,41 +221,44 @@ def initialize_data_and_model():
196
  ds = datasets.load_dataset("its-zion-18/Jobs-tabular-dataset")
197
  original_df = ds["original"].to_pandas()
198
 
199
- # --- NEW: Advanced LLM Skill Extractor ---
200
- # This new function uses a much more detailed prompt to get niche, specific skills.
201
- def extract_skills_llm_advanced(job_title: str, duties: str, qualifications: str) -> list[str]:
202
- if not LLM_PIPELINE: return []
203
-
204
- # We combine the most important fields to give the LLM full context.
205
- full_context = f"Job Title: {job_title}\n\nDuties: {duties}\n\nQualifications: {qualifications}"
206
-
207
- # This prompt is highly specific to encourage better, more niche results.
208
  prompt = f"""
209
- Instruct: You are a highly specialized technical recruiter and hiring manager. Your task is to meticulously extract a comprehensive list of the most critical and specific skills from the provided job description, paying special attention to the 'qualifications' and 'duties' sections.
210
-
211
- Identify specific programming languages, software tools (e.g., AutoCAD, Figma, SAP), cloud technologies (e.g., AWS S3, Azure DevOps), data analysis tools (e.g., Tableau, Power BI), engineering concepts, and industry standards (e.g., ISO 13485, GMP).
212
-
213
- Avoid overly generic soft skills like 'teamwork' or 'communication' unless they are explicitly emphasized as a core requirement. Prioritize tangible, niche competencies that truly define the role.
214
-
215
- Return a single, comma-separated string of the extracted skills. Do not add any preamble or explanation.
216
-
217
- [Job Description Context]
218
- {full_context}
219
-
220
- [Extracted Skills]
221
- """
222
  try:
223
  response = LLM_PIPELINE(prompt, max_new_tokens=150, do_sample=False, temperature=0.1)
224
  generated_text = response[0]['generated_text']
225
- # Robustly find the skills part after the final indicator
226
- skills_part = generated_text.split("[Extracted Skills]")[-1].strip()
227
  skills = [skill.strip() for skill in skills_part.split(',') if skill.strip()]
228
- # Return a de-duplicated list, preserving order as much as possible
229
  return list(dict.fromkeys(s.lower() for s in skills))
230
- except Exception as e:
231
- print(f"LLM skill extraction failed: {e}")
232
- return []
233
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  def extract_skills_direct_scan(text: str) -> list[str]:
235
  if not isinstance(text, str): return []
236
  found_skills = set()
@@ -239,58 +267,32 @@ def initialize_data_and_model():
239
  found_skills.add(skill)
240
  return list(found_skills)
241
 
242
- def expand_skills_with_llm(job_title: str, existing_skills: list) -> list:
243
- if not LLM_PIPELINE or not job_title: return []
244
-
245
- skills_to_add = 6 - len(existing_skills)
246
- prompt = f"""
247
- Instruct: A job has the title "{job_title}" and requires the skills: {', '.join(existing_skills)}.
248
- Based on this, what are {skills_to_add} additional, closely related skills typically required for such a role?
249
- List only the new skills, separated by commas. Do not repeat skills from the original list.
250
-
251
- Additional Skills:
252
- """
253
- try:
254
- response = LLM_PIPELINE(prompt, max_new_tokens=50, do_sample=True, temperature=0.5)
255
- generated_text = response[0]['generated_text']
256
- skills_part = generated_text.split("Additional Skills:")[-1].strip()
257
- new_skills = [skill.strip().lower() for skill in skills_part.split(',') if skill.strip()]
258
- return new_skills
259
- except Exception:
260
- return []
261
-
262
- # --- MODIFIED: Hybrid Skill Extraction Logic ---
263
- # This function is now simpler and more powerful. It prioritizes the advanced LLM extractor.
264
  def extract_skills_hybrid(row) -> list[str]:
265
- # Extract the relevant text fields from the row
266
- job_title = str(row.get('Job title', ''))
267
- duties = str(row.get('Duties', ''))
268
- qualifications = str(row.get('qualifications', ''))
269
- description = str(row.get('Description', ''))
270
-
271
- # The full text is used for the direct scan as a fallback
272
- full_text_for_scan = " ".join([job_title, duties, qualifications, description])
273
 
274
- # 🎯 Primary Method: Use the advanced LLM extractor for high-quality, niche skills
275
- advanced_llm_skills = extract_skills_llm_advanced(job_title, duties, qualifications)
276
-
277
- # 🛡️ Secondary Method: Use a direct scan as a fast and reliable backup for common skills
278
- direct_skills = extract_skills_direct_scan(full_text_for_scan)
279
-
280
- # Combine the results, giving priority to the LLM's findings
281
- combined_skills = set(advanced_llm_skills) | set(direct_skills)
282
 
283
- # If the combined list is still too short, use the LLM to expand it
284
  if len(combined_skills) < 6:
285
- expanded_skills = expand_skills_with_llm(job_title, list(combined_skills))
 
286
  combined_skills.update(expanded_skills)
287
 
288
  return sorted(list(combined_skills))
289
 
 
 
 
 
290
  print("--- Extracting skills with HYBRID ACCURACY model. Please wait... ---")
291
  # Apply the hybrid function row-wise to include job title context
292
  original_df['Skills'] = original_df.progress_apply(extract_skills_hybrid, axis=1)
293
-
 
294
  print(f"--- Saving processed data to {PROCESSED_DATA_PATH} for faster future startups ---")
295
  original_df.to_parquet(PROCESSED_DATA_PATH)
296
 
@@ -327,14 +329,12 @@ def get_job_matches(dream_job: str, top_n: int, skills_text: str):
327
  emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
328
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
329
 
330
- # --- NEW: Initialize variables for the recommendations section ---
331
  recommendations_table = pd.DataFrame()
332
  recommendations_visible = False
333
 
334
  if user_skills:
335
  scored_df = score_jobs_by_skills(user_skills, emb_matches)
336
 
337
- # --- NEW: Logic to get top 5 jobs based purely on skill match score ---
338
  skill_sorted_df = scored_df.sort_values(by='Skill Match Score', ascending=False).head(5)
339
  if not skill_sorted_df.empty:
340
  recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
@@ -343,7 +343,6 @@ def get_job_matches(dream_job: str, top_n: int, skills_text: str):
343
  recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
344
  recommendations_table = recs
345
  recommendations_visible = True
346
- # --- END NEW ---
347
 
348
  display_df = scored_df.head(top_n)
349
  status = f"Found and **re-ranked** results by your {len(user_skills)} skills. Displaying top {len(display_df)}."
@@ -364,7 +363,6 @@ def get_job_matches(dream_job: str, top_n: int, skills_text: str):
364
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
365
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
366
 
367
- # --- MODIFIED: Added new outputs for recommendations ---
368
  return status, emb_matches, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), gr.Accordion(visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
369
 
370
  def rerank_current_results(initial_matches_df, skills_text, top_n):
@@ -373,7 +371,6 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
373
  initial_matches_df = pd.DataFrame(initial_matches_df)
374
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
375
 
376
- # --- NEW: Initialize variables for the recommendations section ---
377
  recommendations_table = pd.DataFrame()
378
  recommendations_visible = False
379
 
@@ -388,7 +385,6 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
388
  status = f"Results **re-ranked** based on your {len(user_skills)} skills."
389
  display_df = ranked_df.head(top_n)
390
 
391
- # --- NEW: Logic to get top 5 jobs based purely on skill match score ---
392
  skill_sorted_df = ranked_df.sort_values(by='Skill Match Score', ascending=False).head(5)
393
  if not skill_sorted_df.empty:
394
  recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
@@ -397,7 +393,6 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
397
  recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
398
  recommendations_table = recs
399
  recommendations_visible = True
400
- # --- END NEW ---
401
 
402
  table_to_show = display_df[['job_title', 'company', 'Final Score', 'Skill Match Score']]
403
  table_to_show = table_to_show.rename(columns={'Final Score': 'Overall Score'})
@@ -407,18 +402,15 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
407
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
408
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
409
 
410
- # --- MODIFIED: Added new outputs for recommendations ---
411
  return status, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
412
 
413
  def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: str):
414
  if not dream_job:
415
- # --- MODIFIED: Added new default outputs ---
416
  return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(""), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)
417
  unrecognized_words = check_spelling_in_query(dream_job)
418
  if unrecognized_words:
419
  word_list_html = ", ".join([f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized_words])
420
  alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
421
- # --- MODIFIED: Added new default outputs ---
422
  return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert_message, visible=True), gr.Row(visible=True), pd.DataFrame(), gr.Accordion(visible=False)
423
 
424
  status, emb_matches, table_to_show, dropdown, details_accordion, recommendations_table, recommendations_accordion = get_job_matches(dream_job, top_n, skills_text)
@@ -456,12 +448,11 @@ def on_select_job(job_id, skills_text):
456
  matched_job_skills_mask = torch.any(similarity_matrix > 0.58, dim=0)
457
  all_missing_skills = [skill for i, skill in enumerate(job_skills) if not matched_job_skills_mask[i]]
458
 
459
- if user_skills and score_val >= 0.98:
460
- learning_plan_html = "<h4 style='color:green;'>🎉 You have all the required skills!</h4>"
461
- job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
462
- return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
463
-
464
- if user_skills:
465
  job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
466
  headline = "<b>Great fit!</b>" if score_val >= 0.8 else "<b>Good progress!</b>" if score_val >= 0.5 else "<b>Solid starting point.</b>"
467
  learning_plan_html = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
@@ -470,11 +461,25 @@ def on_select_job(job_id, skills_text):
470
  learning_plan_html += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
471
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
472
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
473
  headline = "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
474
- skills_to_display = sorted(job_skills)[:5]
475
  items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
476
  learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
477
- full_skill_list_for_state = sorted(job_skills)
 
478
  new_offset = len(skills_to_display)
479
  should_button_be_visible = len(full_skill_list_for_state) > 5
480
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), full_skill_list_for_state, new_offset, gr.Button(visible=should_button_be_visible)
@@ -489,7 +494,6 @@ def load_more_skills(full_skills_list, current_offset):
489
  return learning_plan_html, new_offset, gr.Button(visible=should_button_be_visible)
490
 
491
  def on_reset():
492
- # --- MODIFIED: Added new default outputs for reset ---
493
  return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "Status: Ready.", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False), [], 0, gr.Button(visible=False), pd.DataFrame(), gr.Accordion(visible=False))
494
 
495
  print("Starting application initialization...")
@@ -520,7 +524,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
520
 
521
  df_output = gr.DataFrame(label="Job Matches (Sorted by Overall Relevance)", interactive=False)
522
 
523
- # --- NEW: Added the recommendations section ---
524
  with gr.Accordion("✨ Based on your current skills and career interest consider these jobs...", open=True, visible=False) as recommendations_accordion:
525
  recommendations_df_output = gr.DataFrame(label="Top Skill Matches", interactive=False)
526
 
@@ -534,7 +537,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
534
  learning_plan_output = gr.HTML(label="Learning Plan")
535
  load_more_btn = gr.Button("Load More Skills", visible=False)
536
 
537
- # --- MODIFIED: Added new outputs to the click events ---
538
  search_btn.click(fn=find_matches_and_rank_with_check, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
539
  search_anyway_btn.click(fn=find_matches_and_rank_anyway, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
540
  retype_btn.click(lambda: ("Status: Ready for you to retype.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(visible=False), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)), outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
 
61
  KNOWN_WORDS = set()
62
 
63
  # --- CORE NLP & HELPER FUNCTIONS ---
64
+ def expand_skills_with_llm(job_title: str, existing_skills: list, num_skills_to_add: int) -> list:
65
+ """
66
+ Uses the LLM to suggest additional skills based on a job title and existing skills.
67
+ """
68
+ if not LLM_PIPELINE or not job_title or num_skills_to_add <= 0:
69
+ return []
70
+
71
+ existing_skills_str = ', '.join(existing_skills)
72
+ prompt = f"""
73
+ Instruct: A job has the title "{job_title}" and already lists these skills: {existing_skills_str}.
74
+ Based on this, what are {num_skills_to_add} additional, closely related skills typically required for such a role?
75
+ List only the new skills, separated by commas. Do not repeat skills from the original list. Do not include any preamble.
76
+
77
+ Additional Skills:
78
+ """
79
+ try:
80
+ response = LLM_PIPELINE(prompt, max_new_tokens=50, do_sample=True, temperature=0.5)
81
+ generated_text = response[0]['generated_text']
82
+ skills_part = generated_text.split("Additional Skills:")[-1].strip()
83
+ new_skills = [skill.strip().lower() for skill in skills_part.split(',') if skill.strip()]
84
+ return list(dict.fromkeys(new_skills)) # Ensure unique skills are returned
85
+ except Exception as e:
86
+ print(f"🚨 ERROR expanding skills with LLM: {e}")
87
+ return []
88
+
89
  def _norm_skill_token(s: str) -> str:
90
  s = s.lower().strip()
91
  s = re.sub(r'[\(\)\[\]\{\}\*]', '', s)
 
221
  ds = datasets.load_dataset("its-zion-18/Jobs-tabular-dataset")
222
  original_df = ds["original"].to_pandas()
223
 
224
+ def extract_skills_llm(text: str) -> list[str]:
225
+ if not isinstance(text, str) or len(text.strip()) < 20 or not LLM_PIPELINE: return []
 
 
 
 
 
 
 
226
  prompt = f"""
227
+ Instruct: You are an expert technical recruiter. Extract the key skills from the job description text. List technical and soft skills as a comma-separated string.
228
+ [Example 1]
229
+ Text: "Requires 3+ years of experience in cloud infrastructure. Must be proficient in AWS, particularly EC2 and S3. Experience with Terraform for IaC is a plus."
230
+ Extracted Skills: cloud infrastructure, aws, ec2, s3, terraform, infrastructure as code
231
+ [Example 2]
232
+ Text: "Seeking a team lead with strong project management abilities. Must communicate effectively with stakeholders and manage timelines using Agile methodologies like Scrum."
233
+ Extracted Skills: project management, leadership, stakeholder communication, agile, scrum
234
+ [Actual Task]
235
+ Text: "{text}"
236
+ Extracted Skills:
237
+ """
 
 
238
  try:
239
  response = LLM_PIPELINE(prompt, max_new_tokens=150, do_sample=False, temperature=0.1)
240
  generated_text = response[0]['generated_text']
241
+ skills_part = generated_text.split("Extracted Skills:")[-1].strip()
 
242
  skills = [skill.strip() for skill in skills_part.split(',') if skill.strip()]
 
243
  return list(dict.fromkeys(s.lower() for s in skills))
244
+ except Exception: return []
245
+
246
+ def extract_skills_nltk(text: str) -> list[str]:
247
+ if not isinstance(text, str): return []
248
+ text_lower = text.lower()
249
+ grammar = "NP: {<JJ.*>*<NN.*>+}"
250
+ chunk_parser = nltk.RegexpParser(grammar)
251
+ tokens = nltk.word_tokenize(text_lower)
252
+ tagged_tokens = nltk.pos_tag(tokens)
253
+ chunked_text = chunk_parser.parse(tagged_tokens)
254
+ potential_skills = set()
255
+ for subtree in chunked_text.subtrees():
256
+ if subtree.label() == 'NP':
257
+ phrase = " ".join(word for word, tag in subtree.leaves())
258
+ if _norm_skill_token(phrase) in SKILL_WHITELIST:
259
+ potential_skills.add(_norm_skill_token(phrase))
260
+ return sorted(list(potential_skills))
261
+
262
  def extract_skills_direct_scan(text: str) -> list[str]:
263
  if not isinstance(text, str): return []
264
  found_skills = set()
 
267
  found_skills.add(skill)
268
  return list(found_skills)
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  def extract_skills_hybrid(row) -> list[str]:
271
+ text = row['text_for_skills']
272
+ job_title = row.get('Job title', '') # Use original Job title for context
 
 
 
 
 
 
273
 
274
+ llm_skills = extract_skills_llm(text)
275
+ nltk_skills = extract_skills_nltk(text)
276
+ direct_skills = extract_skills_direct_scan(text)
277
+ combined_skills = set(llm_skills) | set(nltk_skills) | set(direct_skills)
 
 
 
 
278
 
279
+ # If the combined list is still too short, expand it
280
  if len(combined_skills) < 6:
281
+ # MODIFIED: Call the global expand_skills_with_llm function
282
+ expanded_skills = expand_skills_with_llm(job_title, list(combined_skills), num_skills_to_add=6-len(combined_skills))
283
  combined_skills.update(expanded_skills)
284
 
285
  return sorted(list(combined_skills))
286
 
287
+ def create_text_for_skills(row):
288
+ return " ".join([str(s) for s in [row.get("Job title"), row.get("Duties"), row.get("qualifications"), row.get("Description")] if pd.notna(s)])
289
+
290
+ original_df["text_for_skills"] = original_df.apply(create_text_for_skills, axis=1)
291
  print("--- Extracting skills with HYBRID ACCURACY model. Please wait... ---")
292
  # Apply the hybrid function row-wise to include job title context
293
  original_df['Skills'] = original_df.progress_apply(extract_skills_hybrid, axis=1)
294
+ original_df = original_df.drop(columns=['text_for_skills'])
295
+
296
  print(f"--- Saving processed data to {PROCESSED_DATA_PATH} for faster future startups ---")
297
  original_df.to_parquet(PROCESSED_DATA_PATH)
298
 
 
329
  emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
330
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
331
 
 
332
  recommendations_table = pd.DataFrame()
333
  recommendations_visible = False
334
 
335
  if user_skills:
336
  scored_df = score_jobs_by_skills(user_skills, emb_matches)
337
 
 
338
  skill_sorted_df = scored_df.sort_values(by='Skill Match Score', ascending=False).head(5)
339
  if not skill_sorted_df.empty:
340
  recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
 
343
  recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
344
  recommendations_table = recs
345
  recommendations_visible = True
 
346
 
347
  display_df = scored_df.head(top_n)
348
  status = f"Found and **re-ranked** results by your {len(user_skills)} skills. Displaying top {len(display_df)}."
 
363
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
364
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
365
 
 
366
  return status, emb_matches, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), gr.Accordion(visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
367
 
368
  def rerank_current_results(initial_matches_df, skills_text, top_n):
 
371
  initial_matches_df = pd.DataFrame(initial_matches_df)
372
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
373
 
 
374
  recommendations_table = pd.DataFrame()
375
  recommendations_visible = False
376
 
 
385
  status = f"Results **re-ranked** based on your {len(user_skills)} skills."
386
  display_df = ranked_df.head(top_n)
387
 
 
388
  skill_sorted_df = ranked_df.sort_values(by='Skill Match Score', ascending=False).head(5)
389
  if not skill_sorted_df.empty:
390
  recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
 
393
  recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
394
  recommendations_table = recs
395
  recommendations_visible = True
 
396
 
397
  table_to_show = display_df[['job_title', 'company', 'Final Score', 'Skill Match Score']]
398
  table_to_show = table_to_show.rename(columns={'Final Score': 'Overall Score'})
 
402
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
403
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
404
 
 
405
  return status, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
406
 
407
  def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: str):
408
  if not dream_job:
 
409
  return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(""), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)
410
  unrecognized_words = check_spelling_in_query(dream_job)
411
  if unrecognized_words:
412
  word_list_html = ", ".join([f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized_words])
413
  alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
 
414
  return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert_message, visible=True), gr.Row(visible=True), pd.DataFrame(), gr.Accordion(visible=False)
415
 
416
  status, emb_matches, table_to_show, dropdown, details_accordion, recommendations_table, recommendations_accordion = get_job_matches(dream_job, top_n, skills_text)
 
448
  matched_job_skills_mask = torch.any(similarity_matrix > 0.58, dim=0)
449
  all_missing_skills = [skill for i, skill in enumerate(job_skills) if not matched_job_skills_mask[i]]
450
 
451
+ if score_val >= 0.98:
452
+ learning_plan_html = "<h4 style='color:green;'>🎉 You have all the required skills!</h4>"
453
+ job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
454
+ return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
455
+
 
456
  job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
457
  headline = "<b>Great fit!</b>" if score_val >= 0.8 else "<b>Good progress!</b>" if score_val >= 0.5 else "<b>Solid starting point.</b>"
458
  learning_plan_html = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
 
461
  learning_plan_html += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
462
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
463
  else:
464
+ # --- MODIFIED LOGIC TO ENSURE AT LEAST 5 SKILLS ---
465
+ current_job_skills = list(job_skills)
466
+ job_title = str(row.get("job_title", ""))
467
+
468
+ if len(current_job_skills) < 5 and job_title and LLM_PIPELINE:
469
+ additional_skills_needed = 5 - len(current_job_skills)
470
+ newly_expanded_skills = expand_skills_with_llm(job_title, current_job_skills, num_skills_to_add=additional_skills_needed)
471
+
472
+ for skill in newly_expanded_skills:
473
+ if skill not in current_job_skills:
474
+ current_job_skills.append(skill)
475
+ # --- END MODIFICATION ---
476
+
477
  headline = "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
478
+ skills_to_display = sorted(current_job_skills)[:5]
479
  items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
480
  learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
481
+
482
+ full_skill_list_for_state = sorted(current_job_skills)
483
  new_offset = len(skills_to_display)
484
  should_button_be_visible = len(full_skill_list_for_state) > 5
485
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), full_skill_list_for_state, new_offset, gr.Button(visible=should_button_be_visible)
 
494
  return learning_plan_html, new_offset, gr.Button(visible=should_button_be_visible)
495
 
496
  def on_reset():
 
497
  return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "Status: Ready.", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False), [], 0, gr.Button(visible=False), pd.DataFrame(), gr.Accordion(visible=False))
498
 
499
  print("Starting application initialization...")
 
524
 
525
  df_output = gr.DataFrame(label="Job Matches (Sorted by Overall Relevance)", interactive=False)
526
 
 
527
  with gr.Accordion("✨ Based on your current skills and career interest consider these jobs...", open=True, visible=False) as recommendations_accordion:
528
  recommendations_df_output = gr.DataFrame(label="Top Skill Matches", interactive=False)
529
 
 
537
  learning_plan_output = gr.HTML(label="Learning Plan")
538
  load_more_btn = gr.Button("Load More Skills", visible=False)
539
 
 
540
  search_btn.click(fn=find_matches_and_rank_with_check, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
541
  search_anyway_btn.click(fn=find_matches_and_rank_anyway, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
542
  retype_btn.click(lambda: ("Status: Ready for you to retype.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(visible=False), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)), outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])