zlf18 commited on
Commit
638f98a
·
verified ·
1 Parent(s): b6481eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -48
app.py CHANGED
@@ -61,31 +61,6 @@ FINETUNED_MODEL_ID = "its-zion-18/projfinetuned"
61
  KNOWN_WORDS = set()
62
 
63
  # --- CORE NLP & HELPER FUNCTIONS ---
64
- def expand_skills_with_llm(job_title: str, existing_skills: list, num_skills_to_add: int) -> list:
65
- """
66
- Uses the LLM to suggest additional skills based on a job title and existing skills.
67
- """
68
- if not LLM_PIPELINE or not job_title or num_skills_to_add <= 0:
69
- return []
70
-
71
- existing_skills_str = ', '.join(existing_skills)
72
- prompt = f"""
73
- Instruct: A job has the title "{job_title}" and already lists these skills: {existing_skills_str}.
74
- Based on this, what are {num_skills_to_add} additional, closely related skills typically required for such a role?
75
- List only the new skills, separated by commas. Do not repeat skills from the original list. Do not include any preamble.
76
-
77
- Additional Skills:
78
- """
79
- try:
80
- response = LLM_PIPELINE(prompt, max_new_tokens=50, do_sample=True, temperature=0.5)
81
- generated_text = response[0]['generated_text']
82
- skills_part = generated_text.split("Additional Skills:")[-1].strip()
83
- new_skills = [skill.strip().lower() for skill in skills_part.split(',') if skill.strip()]
84
- return list(dict.fromkeys(new_skills)) # Ensure unique skills are returned
85
- except Exception as e:
86
- print(f"🚨 ERROR expanding skills with LLM: {e}")
87
- return []
88
-
89
  def _norm_skill_token(s: str) -> str:
90
  s = s.lower().strip()
91
  s = re.sub(r'[\(\)\[\]\{\}\*]', '', s)
@@ -267,6 +242,26 @@ Extracted Skills:
267
  found_skills.add(skill)
268
  return list(found_skills)
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  def extract_skills_hybrid(row) -> list[str]:
271
  text = row['text_for_skills']
272
  job_title = row.get('Job title', '') # Use original Job title for context
@@ -278,8 +273,7 @@ Extracted Skills:
278
 
279
  # If the combined list is still too short, expand it
280
  if len(combined_skills) < 6:
281
- # MODIFIED: Call the global expand_skills_with_llm function
282
- expanded_skills = expand_skills_with_llm(job_title, list(combined_skills), num_skills_to_add=6-len(combined_skills))
283
  combined_skills.update(expanded_skills)
284
 
285
  return sorted(list(combined_skills))
@@ -329,12 +323,14 @@ def get_job_matches(dream_job: str, top_n: int, skills_text: str):
329
  emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
330
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
331
 
 
332
  recommendations_table = pd.DataFrame()
333
  recommendations_visible = False
334
 
335
  if user_skills:
336
  scored_df = score_jobs_by_skills(user_skills, emb_matches)
337
 
 
338
  skill_sorted_df = scored_df.sort_values(by='Skill Match Score', ascending=False).head(5)
339
  if not skill_sorted_df.empty:
340
  recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
@@ -343,6 +339,7 @@ def get_job_matches(dream_job: str, top_n: int, skills_text: str):
343
  recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
344
  recommendations_table = recs
345
  recommendations_visible = True
 
346
 
347
  display_df = scored_df.head(top_n)
348
  status = f"Found and **re-ranked** results by your {len(user_skills)} skills. Displaying top {len(display_df)}."
@@ -363,6 +360,7 @@ def get_job_matches(dream_job: str, top_n: int, skills_text: str):
363
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
364
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
365
 
 
366
  return status, emb_matches, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), gr.Accordion(visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
367
 
368
  def rerank_current_results(initial_matches_df, skills_text, top_n):
@@ -371,6 +369,7 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
371
  initial_matches_df = pd.DataFrame(initial_matches_df)
372
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
373
 
 
374
  recommendations_table = pd.DataFrame()
375
  recommendations_visible = False
376
 
@@ -385,6 +384,7 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
385
  status = f"Results **re-ranked** based on your {len(user_skills)} skills."
386
  display_df = ranked_df.head(top_n)
387
 
 
388
  skill_sorted_df = ranked_df.sort_values(by='Skill Match Score', ascending=False).head(5)
389
  if not skill_sorted_df.empty:
390
  recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
@@ -393,6 +393,7 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
393
  recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
394
  recommendations_table = recs
395
  recommendations_visible = True
 
396
 
397
  table_to_show = display_df[['job_title', 'company', 'Final Score', 'Skill Match Score']]
398
  table_to_show = table_to_show.rename(columns={'Final Score': 'Overall Score'})
@@ -402,15 +403,18 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
402
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
403
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
404
 
 
405
  return status, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
406
 
407
  def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: str):
408
  if not dream_job:
 
409
  return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(""), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)
410
  unrecognized_words = check_spelling_in_query(dream_job)
411
  if unrecognized_words:
412
  word_list_html = ", ".join([f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized_words])
413
  alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
 
414
  return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert_message, visible=True), gr.Row(visible=True), pd.DataFrame(), gr.Accordion(visible=False)
415
 
416
  status, emb_matches, table_to_show, dropdown, details_accordion, recommendations_table, recommendations_accordion = get_job_matches(dream_job, top_n, skills_text)
@@ -448,11 +452,12 @@ def on_select_job(job_id, skills_text):
448
  matched_job_skills_mask = torch.any(similarity_matrix > 0.58, dim=0)
449
  all_missing_skills = [skill for i, skill in enumerate(job_skills) if not matched_job_skills_mask[i]]
450
 
451
- if score_val >= 0.98:
452
- learning_plan_html = "<h4 style='color:green;'>🎉 You have all the required skills!</h4>"
453
- job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
454
- return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
455
-
 
456
  job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
457
  headline = "<b>Great fit!</b>" if score_val >= 0.8 else "<b>Good progress!</b>" if score_val >= 0.5 else "<b>Solid starting point.</b>"
458
  learning_plan_html = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
@@ -461,25 +466,11 @@ def on_select_job(job_id, skills_text):
461
  learning_plan_html += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
462
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
463
  else:
464
- # --- MODIFIED LOGIC TO ENSURE AT LEAST 5 SKILLS ---
465
- current_job_skills = list(job_skills)
466
- job_title = str(row.get("job_title", ""))
467
-
468
- if len(current_job_skills) < 5 and job_title and LLM_PIPELINE:
469
- additional_skills_needed = 5 - len(current_job_skills)
470
- newly_expanded_skills = expand_skills_with_llm(job_title, current_job_skills, num_skills_to_add=additional_skills_needed)
471
-
472
- for skill in newly_expanded_skills:
473
- if skill not in current_job_skills:
474
- current_job_skills.append(skill)
475
- # --- END MODIFICATION ---
476
-
477
  headline = "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
478
- skills_to_display = sorted(current_job_skills)[:5]
479
  items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
480
  learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
481
-
482
- full_skill_list_for_state = sorted(current_job_skills)
483
  new_offset = len(skills_to_display)
484
  should_button_be_visible = len(full_skill_list_for_state) > 5
485
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), full_skill_list_for_state, new_offset, gr.Button(visible=should_button_be_visible)
@@ -494,6 +485,7 @@ def load_more_skills(full_skills_list, current_offset):
494
  return learning_plan_html, new_offset, gr.Button(visible=should_button_be_visible)
495
 
496
  def on_reset():
 
497
  return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "Status: Ready.", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False), [], 0, gr.Button(visible=False), pd.DataFrame(), gr.Accordion(visible=False))
498
 
499
  print("Starting application initialization...")
@@ -524,6 +516,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
524
 
525
  df_output = gr.DataFrame(label="Job Matches (Sorted by Overall Relevance)", interactive=False)
526
 
 
527
  with gr.Accordion("✨ Based on your current skills and career interest consider these jobs...", open=True, visible=False) as recommendations_accordion:
528
  recommendations_df_output = gr.DataFrame(label="Top Skill Matches", interactive=False)
529
 
@@ -537,6 +530,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
537
  learning_plan_output = gr.HTML(label="Learning Plan")
538
  load_more_btn = gr.Button("Load More Skills", visible=False)
539
 
 
540
  search_btn.click(fn=find_matches_and_rank_with_check, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
541
  search_anyway_btn.click(fn=find_matches_and_rank_anyway, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
542
  retype_btn.click(lambda: ("Status: Ready for you to retype.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(visible=False), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)), outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
 
61
  KNOWN_WORDS = set()
62
 
63
  # --- CORE NLP & HELPER FUNCTIONS ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  def _norm_skill_token(s: str) -> str:
65
  s = s.lower().strip()
66
  s = re.sub(r'[\(\)\[\]\{\}\*]', '', s)
 
242
  found_skills.add(skill)
243
  return list(found_skills)
244
 
245
+ def expand_skills_with_llm(job_title: str, existing_skills: list) -> list:
246
+ if not LLM_PIPELINE or not job_title: return []
247
+
248
+ skills_to_add = 6 - len(existing_skills)
249
+ prompt = f"""
250
+ Instruct: A job has the title "{job_title}" and requires the skills: {', '.join(existing_skills)}.
251
+ Based on this, what are {skills_to_add} additional, closely related skills typically required for such a role?
252
+ List only the new skills, separated by commas. Do not repeat skills from the original list.
253
+
254
+ Additional Skills:
255
+ """
256
+ try:
257
+ response = LLM_PIPELINE(prompt, max_new_tokens=50, do_sample=True, temperature=0.5)
258
+ generated_text = response[0]['generated_text']
259
+ skills_part = generated_text.split("Additional Skills:")[-1].strip()
260
+ new_skills = [skill.strip().lower() for skill in skills_part.split(',') if skill.strip()]
261
+ return new_skills
262
+ except Exception:
263
+ return []
264
+
265
  def extract_skills_hybrid(row) -> list[str]:
266
  text = row['text_for_skills']
267
  job_title = row.get('Job title', '') # Use original Job title for context
 
273
 
274
  # If the combined list is still too short, expand it
275
  if len(combined_skills) < 6:
276
+ expanded_skills = expand_skills_with_llm(job_title, list(combined_skills))
 
277
  combined_skills.update(expanded_skills)
278
 
279
  return sorted(list(combined_skills))
 
323
  emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
324
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
325
 
326
+ # --- NEW: Initialize variables for the recommendations section ---
327
  recommendations_table = pd.DataFrame()
328
  recommendations_visible = False
329
 
330
  if user_skills:
331
  scored_df = score_jobs_by_skills(user_skills, emb_matches)
332
 
333
+ # --- NEW: Logic to get top 5 jobs based purely on skill match score ---
334
  skill_sorted_df = scored_df.sort_values(by='Skill Match Score', ascending=False).head(5)
335
  if not skill_sorted_df.empty:
336
  recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
 
339
  recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
340
  recommendations_table = recs
341
  recommendations_visible = True
342
+ # --- END NEW ---
343
 
344
  display_df = scored_df.head(top_n)
345
  status = f"Found and **re-ranked** results by your {len(user_skills)} skills. Displaying top {len(display_df)}."
 
360
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
361
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
362
 
363
+ # --- MODIFIED: Added new outputs for recommendations ---
364
  return status, emb_matches, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), gr.Accordion(visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
365
 
366
  def rerank_current_results(initial_matches_df, skills_text, top_n):
 
369
  initial_matches_df = pd.DataFrame(initial_matches_df)
370
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
371
 
372
+ # --- NEW: Initialize variables for the recommendations section ---
373
  recommendations_table = pd.DataFrame()
374
  recommendations_visible = False
375
 
 
384
  status = f"Results **re-ranked** based on your {len(user_skills)} skills."
385
  display_df = ranked_df.head(top_n)
386
 
387
+ # --- NEW: Logic to get top 5 jobs based purely on skill match score ---
388
  skill_sorted_df = ranked_df.sort_values(by='Skill Match Score', ascending=False).head(5)
389
  if not skill_sorted_df.empty:
390
  recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
 
393
  recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
394
  recommendations_table = recs
395
  recommendations_visible = True
396
+ # --- END NEW ---
397
 
398
  table_to_show = display_df[['job_title', 'company', 'Final Score', 'Skill Match Score']]
399
  table_to_show = table_to_show.rename(columns={'Final Score': 'Overall Score'})
 
403
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
404
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
405
 
406
+ # --- MODIFIED: Added new outputs for recommendations ---
407
  return status, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
408
 
409
  def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: str):
410
  if not dream_job:
411
+ # --- MODIFIED: Added new default outputs ---
412
  return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(""), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)
413
  unrecognized_words = check_spelling_in_query(dream_job)
414
  if unrecognized_words:
415
  word_list_html = ", ".join([f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized_words])
416
  alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
417
+ # --- MODIFIED: Added new default outputs ---
418
  return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert_message, visible=True), gr.Row(visible=True), pd.DataFrame(), gr.Accordion(visible=False)
419
 
420
  status, emb_matches, table_to_show, dropdown, details_accordion, recommendations_table, recommendations_accordion = get_job_matches(dream_job, top_n, skills_text)
 
452
  matched_job_skills_mask = torch.any(similarity_matrix > 0.58, dim=0)
453
  all_missing_skills = [skill for i, skill in enumerate(job_skills) if not matched_job_skills_mask[i]]
454
 
455
+ if user_skills and score_val >= 0.98:
456
+ learning_plan_html = "<h4 style='color:green;'>🎉 You have all the required skills!</h4>"
457
+ job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
458
+ return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
459
+
460
+ if user_skills:
461
  job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
462
  headline = "<b>Great fit!</b>" if score_val >= 0.8 else "<b>Good progress!</b>" if score_val >= 0.5 else "<b>Solid starting point.</b>"
463
  learning_plan_html = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
 
466
  learning_plan_html += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
467
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
468
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
469
  headline = "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
470
+ skills_to_display = sorted(job_skills)[:5]
471
  items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
472
  learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
473
+ full_skill_list_for_state = sorted(job_skills)
 
474
  new_offset = len(skills_to_display)
475
  should_button_be_visible = len(full_skill_list_for_state) > 5
476
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), full_skill_list_for_state, new_offset, gr.Button(visible=should_button_be_visible)
 
485
  return learning_plan_html, new_offset, gr.Button(visible=should_button_be_visible)
486
 
487
  def on_reset():
488
+ # --- MODIFIED: Added new default outputs for reset ---
489
  return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "Status: Ready.", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False), [], 0, gr.Button(visible=False), pd.DataFrame(), gr.Accordion(visible=False))
490
 
491
  print("Starting application initialization...")
 
516
 
517
  df_output = gr.DataFrame(label="Job Matches (Sorted by Overall Relevance)", interactive=False)
518
 
519
+ # --- NEW: Added the recommendations section ---
520
  with gr.Accordion("✨ Based on your current skills and career interest consider these jobs...", open=True, visible=False) as recommendations_accordion:
521
  recommendations_df_output = gr.DataFrame(label="Top Skill Matches", interactive=False)
522
 
 
530
  learning_plan_output = gr.HTML(label="Learning Plan")
531
  load_more_btn = gr.Button("Load More Skills", visible=False)
532
 
533
+ # --- MODIFIED: Added new outputs to the click events ---
534
  search_btn.click(fn=find_matches_and_rank_with_check, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
535
  search_anyway_btn.click(fn=find_matches_and_rank_anyway, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
536
  retype_btn.click(lambda: ("Status: Ready for you to retype.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(visible=False), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)), outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])