Update app.py
Browse files
app.py
CHANGED
|
@@ -163,7 +163,11 @@ def initialize_data_and_model():
|
|
| 163 |
|
| 164 |
def extract_skills_llm(text: str) -> list[str]:
|
| 165 |
if not isinstance(text, str) or len(text.strip()) < 20 or not LLM_PIPELINE: return []
|
| 166 |
-
prompt = f"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
try:
|
| 168 |
response = LLM_PIPELINE(prompt, max_new_tokens=150, do_sample=False, temperature=0.1, return_full_text=False)
|
| 169 |
skills_text = response[0]['generated_text'].strip()
|
|
@@ -198,7 +202,7 @@ def initialize_data_and_model():
|
|
| 198 |
def expand_skills_with_llm(job_title: str, existing_skills: list) -> list:
|
| 199 |
if not LLM_PIPELINE or not job_title or not existing_skills: return []
|
| 200 |
skills_to_add = 6 - len(existing_skills)
|
| 201 |
-
prompt = f"""Instruct: A job has the title "{job_title}"
|
| 202 |
try:
|
| 203 |
response = LLM_PIPELINE(prompt, max_new_tokens=50, do_sample=True, temperature=0.5, return_full_text=False)
|
| 204 |
new_skills_text = response[0]['generated_text'].strip()
|
|
@@ -270,11 +274,13 @@ def get_job_matches(dream_job: str, top_n: int):
|
|
| 270 |
dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
|
| 271 |
dropdown_value = dropdown_options[0][1] if dropdown_options else None
|
| 272 |
|
| 273 |
-
|
|
|
|
| 274 |
|
| 275 |
def analyze_skills(dream_job, initial_matches_df, skills_text, top_n):
|
| 276 |
user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
|
| 277 |
if not user_skills:
|
|
|
|
| 278 |
table1_df = pd.DataFrame(initial_matches_df).head(top_n)
|
| 279 |
table1_to_show = table1_df[['job_title', 'company', 'Similarity Score']]
|
| 280 |
table1_to_show['Similarity Score'] = table1_to_show['Similarity Score'].map('{:.2%}'.format)
|
|
@@ -282,27 +288,28 @@ def analyze_skills(dream_job, initial_matches_df, skills_text, top_n):
|
|
| 282 |
|
| 283 |
status = "Analyzing skills and finding new job matches..."
|
| 284 |
|
| 285 |
-
# --- LOGIC FOR TABLE
|
| 286 |
-
reranked_initial_jobs = pd.DataFrame(initial_matches_df)
|
| 287 |
reranked_initial_jobs['Skill Match Score'] = reranked_initial_jobs['Skills'].apply(lambda js: calculate_skill_match_score(user_skills, js))
|
| 288 |
reranked_initial_jobs = reranked_initial_jobs.sort_values(by='Skill Match Score', ascending=False)
|
| 289 |
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
|
| 294 |
-
# --- LOGIC FOR TABLE
|
| 295 |
combined_query = dream_job + ". My current skills are: " + skills_text
|
| 296 |
newly_found_jobs = find_job_matches(combined_query, top_k=top_n)
|
| 297 |
newly_found_jobs['Skill Match Score'] = newly_found_jobs['Skills'].apply(lambda js: calculate_skill_match_score(user_skills, js))
|
| 298 |
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
|
| 303 |
status = f"Re-ranked initial jobs and found new jobs for your skills."
|
| 304 |
-
|
| 305 |
-
return
|
|
|
|
| 306 |
|
| 307 |
def find_matches_and_rank_with_check(dream_job: str, top_n: int):
|
| 308 |
if not dream_job:
|
|
@@ -314,11 +321,13 @@ def find_matches_and_rank_with_check(dream_job: str, top_n: int):
|
|
| 314 |
alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
|
| 315 |
return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.update(visible=False), gr.update(visible=False), gr.update(value=alert_message, visible=True), gr.update(visible=True), pd.DataFrame(), gr.update(visible=False)
|
| 316 |
|
| 317 |
-
status, emb_matches, table_to_show, dropdown_update, accordion_update = get_job_matches(dream_job, top_n)
|
|
|
|
| 318 |
return status, emb_matches, table_to_show, dropdown_update, accordion_update, gr.update(visible=False), gr.update(visible=False), pd.DataFrame(), gr.update(visible=False)
|
| 319 |
|
|
|
|
| 320 |
def find_matches_and_rank_anyway(dream_job: str, top_n: int):
|
| 321 |
-
status, emb_matches, table_to_show, dropdown_update, accordion_update = get_job_matches(dream_job, top_n)
|
| 322 |
return status, emb_matches, table_to_show, dropdown_update, accordion_update, gr.update(visible=False), gr.update(visible=False), pd.DataFrame(), gr.update(visible=False)
|
| 323 |
|
| 324 |
def on_select_job(job_id, skills_text):
|
|
@@ -416,12 +425,28 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
|
|
| 416 |
learning_plan_output = gr.HTML(label="Learning Plan")
|
| 417 |
load_more_btn = gr.Button("Load More Skills", visible=False)
|
| 418 |
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
|
| 427 |
ui.launch()
|
|
|
|
| 163 |
|
| 164 |
def extract_skills_llm(text: str) -> list[str]:
|
| 165 |
if not isinstance(text, str) or len(text.strip()) < 20 or not LLM_PIPELINE: return []
|
| 166 |
+
prompt = f"""
|
| 167 |
+
Instruct: You are an expert technical recruiter. Extract the key skills from the job description text. List technical and soft skills as a comma-separated string.
|
| 168 |
+
[Example 1] Text: "Requires 3+ years of experience in cloud infrastructure. Must be proficient in AWS, particularly EC2 and S3. Experience with Terraform for IaC is a plus." Extracted Skills: cloud infrastructure, aws, ec2, s3, terraform, infrastructure as code
|
| 169 |
+
[Example 2] Text: "Seeking a team lead with strong project management abilities. Must communicate effectively with stakeholders and manage timelines using Agile methodologies like Scrum." Extracted Skills: project management, leadership, stakeholder communication, agile, scrum
|
| 170 |
+
[Actual Task] Text: "{text}" Extracted Skills:"""
|
| 171 |
try:
|
| 172 |
response = LLM_PIPELINE(prompt, max_new_tokens=150, do_sample=False, temperature=0.1, return_full_text=False)
|
| 173 |
skills_text = response[0]['generated_text'].strip()
|
|
|
|
| 202 |
def expand_skills_with_llm(job_title: str, existing_skills: list) -> list:
|
| 203 |
if not LLM_PIPELINE or not job_title or not existing_skills: return []
|
| 204 |
skills_to_add = 6 - len(existing_skills)
|
| 205 |
+
prompt = f"""Instruct: A job has the title "{job_title}" and requires the skills: {', '.join(existing_skills)}. Based on this, what are {skills_to_add} additional, closely related skills typically required for such a role? List only the new skills, separated by commas. Do not repeat skills from the original list. Additional Skills:"""
|
| 206 |
try:
|
| 207 |
response = LLM_PIPELINE(prompt, max_new_tokens=50, do_sample=True, temperature=0.5, return_full_text=False)
|
| 208 |
new_skills_text = response[0]['generated_text'].strip()
|
|
|
|
| 274 |
dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
|
| 275 |
dropdown_value = dropdown_options[0][1] if dropdown_options else None
|
| 276 |
|
| 277 |
+
# When initially finding matches, only df_output is used. skill_jobs_output is cleared/hidden.
|
| 278 |
+
return status, emb_matches, table_to_show, gr.update(choices=dropdown_options, value=dropdown_value, visible=True), gr.update(visible=True), pd.DataFrame(), gr.update(visible=False)
|
| 279 |
|
| 280 |
def analyze_skills(dream_job, initial_matches_df, skills_text, top_n):
|
| 281 |
user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
|
| 282 |
if not user_skills:
|
| 283 |
+
# If skills are cleared, just show the original table without skill scores and hide the second table
|
| 284 |
table1_df = pd.DataFrame(initial_matches_df).head(top_n)
|
| 285 |
table1_to_show = table1_df[['job_title', 'company', 'Similarity Score']]
|
| 286 |
table1_to_show['Similarity Score'] = table1_to_show['Similarity Score'].map('{:.2%}'.format)
|
|
|
|
| 288 |
|
| 289 |
status = "Analyzing skills and finding new job matches..."
|
| 290 |
|
| 291 |
+
# --- LOGIC FOR TOP TABLE (Reranked Initial Jobs) ---
|
| 292 |
+
reranked_initial_jobs = pd.DataFrame(initial_matches_df) # Ensure it's a DataFrame
|
| 293 |
reranked_initial_jobs['Skill Match Score'] = reranked_initial_jobs['Skills'].apply(lambda js: calculate_skill_match_score(user_skills, js))
|
| 294 |
reranked_initial_jobs = reranked_initial_jobs.sort_values(by='Skill Match Score', ascending=False)
|
| 295 |
|
| 296 |
+
top_table_df = reranked_initial_jobs.head(top_n)[['job_title', 'company', 'Similarity Score', 'Skill Match Score']]
|
| 297 |
+
top_table_df['Similarity Score'] = top_table_df['Similarity Score'].map('{:.2%}'.format)
|
| 298 |
+
top_table_df['Skill Match Score'] = top_table_df['Skill Match Score'].map('{:.2%}'.format)
|
| 299 |
|
| 300 |
+
# --- LOGIC FOR BOTTOM TABLE (New Skill-Based Jobs) ---
|
| 301 |
combined_query = dream_job + ". My current skills are: " + skills_text
|
| 302 |
newly_found_jobs = find_job_matches(combined_query, top_k=top_n)
|
| 303 |
newly_found_jobs['Skill Match Score'] = newly_found_jobs['Skills'].apply(lambda js: calculate_skill_match_score(user_skills, js))
|
| 304 |
|
| 305 |
+
bottom_table_df = newly_found_jobs[['job_title', 'company', 'Similarity Score', 'Skill Match Score']]
|
| 306 |
+
bottom_table_df['Similarity Score'] = bottom_table_df['Similarity Score'].map('{:.2%}'.format)
|
| 307 |
+
bottom_table_df['Skill Match Score'] = bottom_table_df['Skill Match Score'].map('{:.2%}'.format)
|
| 308 |
|
| 309 |
status = f"Re-ranked initial jobs and found new jobs for your skills."
|
| 310 |
+
|
| 311 |
+
# Corrected return order: top_table_df for df_output, bottom_table_df for skill_jobs_output
|
| 312 |
+
return status, top_table_df, bottom_table_df, gr.update(visible=True)
|
| 313 |
|
| 314 |
def find_matches_and_rank_with_check(dream_job: str, top_n: int):
|
| 315 |
if not dream_job:
|
|
|
|
| 321 |
alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
|
| 322 |
return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.update(visible=False), gr.update(visible=False), gr.update(value=alert_message, visible=True), gr.update(visible=True), pd.DataFrame(), gr.update(visible=False)
|
| 323 |
|
| 324 |
+
status, emb_matches, table_to_show, dropdown_update, accordion_update, _, _ = get_job_matches(dream_job, top_n) # Adjusted to unpack the 2 extra outputs
|
| 325 |
+
# Hide the second table on a new search
|
| 326 |
return status, emb_matches, table_to_show, dropdown_update, accordion_update, gr.update(visible=False), gr.update(visible=False), pd.DataFrame(), gr.update(visible=False)
|
| 327 |
|
| 328 |
+
|
| 329 |
def find_matches_and_rank_anyway(dream_job: str, top_n: int):
|
| 330 |
+
status, emb_matches, table_to_show, dropdown_update, accordion_update, _, _ = get_job_matches(dream_job, top_n) # Adjusted to unpack the 2 extra outputs
|
| 331 |
return status, emb_matches, table_to_show, dropdown_update, accordion_update, gr.update(visible=False), gr.update(visible=False), pd.DataFrame(), gr.update(visible=False)
|
| 332 |
|
| 333 |
def on_select_job(job_id, skills_text):
|
|
|
|
| 425 |
learning_plan_output = gr.HTML(label="Learning Plan")
|
| 426 |
load_more_btn = gr.Button("Load More Skills", visible=False)
|
| 427 |
|
| 428 |
+
# Updated 'search_btn' and 'search_anyway_btn' to correctly unpack outputs from get_job_matches
|
| 429 |
+
search_btn.click(fn=find_matches_and_rank_with_check, inputs=[dream_text, topk_slider],
|
| 430 |
+
outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion,
|
| 431 |
+
spelling_alert, spelling_row, skill_jobs_output, skill_jobs_col])
|
| 432 |
+
search_anyway_btn.click(fn=find_matches_and_rank_anyway, inputs=[dream_text, topk_slider],
|
| 433 |
+
outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion,
|
| 434 |
+
spelling_alert, spelling_row, skill_jobs_output, skill_jobs_col])
|
| 435 |
+
|
| 436 |
+
# The analyze_btn output order is now correct: top_table_df to df_output, bottom_table_df to skill_jobs_output
|
| 437 |
+
analyze_btn.click(fn=analyze_skills, inputs=[dream_text, initial_matches_state, skills_text, topk_slider],
|
| 438 |
+
outputs=[status_text, df_output, skill_jobs_output, skill_jobs_col])
|
| 439 |
+
|
| 440 |
+
retype_btn.click(lambda: ("Status: Ready for you to retype.", None, pd.DataFrame(), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), pd.DataFrame(), gr.update(visible=False)),
|
| 441 |
+
outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, skill_jobs_output, skill_jobs_col])
|
| 442 |
+
reset_btn.click(fn=on_reset,
|
| 443 |
+
outputs=[dream_text, topk_slider, skills_text, df_output, initial_matches_state, job_selector, details_accordion, status_text,
|
| 444 |
+
job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, spelling_alert, spelling_row,
|
| 445 |
+
missing_skills_state, skills_offset_state, load_more_btn, skill_jobs_output, skill_jobs_col], queue=False)
|
| 446 |
+
job_selector.change(fn=on_select_job, inputs=[job_selector, skills_text],
|
| 447 |
+
outputs=[job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, learning_plan_output,
|
| 448 |
+
details_accordion, missing_skills_state, skills_offset_state, load_more_btn])
|
| 449 |
+
load_more_btn.click(fn=load_more_skills, inputs=[missing_skills_state, skills_offset_state],
|
| 450 |
+
outputs=[learning_plan_output, skills_offset_state, load_more_btn])
|
| 451 |
|
| 452 |
ui.launch()
|