zlf18 commited on
Commit
2873ff1
·
verified ·
1 Parent(s): 190fb0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -29
app.py CHANGED
@@ -100,15 +100,12 @@ def llm_expand_query(user_input: str) -> str:
100
  if not LLM_PIPELINE: return user_input
101
  prompt_template = (
102
  f"User's career interest: '{user_input}'\n"
103
- f"Instruction: Based on the user's interest, write a concise, single-sentence summary (40-60 words) that elaborates on the core intent, typical skills, and responsibilities. "
104
- f"Do not include a preamble, the user input, or any list formatting in the output. Just the expanded sentence.\n"
105
- f"Expanded Intent:"
106
  )
107
  try:
108
  response = LLM_PIPELINE(prompt_template, max_new_tokens=100, do_sample=True, temperature=0.6, return_full_text=False)
109
  expanded_query = response[0]['generated_text'].strip()
110
- final_query = user_input + ". " + expanded_query.replace('\n', ' ').replace(':', '').strip()
111
- final_query = final_query.replace('..', '.').strip()
112
  return final_query
113
  except Exception:
114
  return user_input
@@ -168,16 +165,9 @@ def initialize_data_and_model():
168
  if not isinstance(text, str) or len(text.strip()) < 20 or not LLM_PIPELINE: return []
169
  prompt = f"""
170
  Instruct: You are an expert technical recruiter. Extract the key skills from the job description text. List technical and soft skills as a comma-separated string.
171
- [Example 1]
172
- Text: "Requires 3+ years of experience in cloud infrastructure. Must be proficient in AWS, particularly EC2 and S3. Experience with Terraform for IaC is a plus."
173
- Extracted Skills: cloud infrastructure, aws, ec2, s3, terraform, infrastructure as code
174
- [Example 2]
175
- Text: "Seeking a team lead with strong project management abilities. Must communicate effectively with stakeholders and manage timelines using Agile methodologies like Scrum."
176
- Extracted Skills: project management, leadership, stakeholder communication, agile, scrum
177
- [Actual Task]
178
- Text: "{text}"
179
- Extracted Skills:
180
- """
181
  try:
182
  response = LLM_PIPELINE(prompt, max_new_tokens=150, do_sample=False, temperature=0.1, return_full_text=False)
183
  skills_text = response[0]['generated_text'].strip()
@@ -240,7 +230,6 @@ Extracted Skills:
240
  print("--- Extracting skills with HYBRID ACCURACY model. Please wait... ---")
241
  original_df['Skills'] = original_df.progress_apply(extract_skills_hybrid, axis=1)
242
  original_df = original_df.drop(columns=['text_for_skills'])
243
-
244
  print(f"--- Saving processed data to {PROCESSED_DATA_PATH} for faster future startups ---")
245
  original_df.to_parquet(PROCESSED_DATA_PATH)
246
 
@@ -255,7 +244,7 @@ Extracted Skills:
255
  augmented_df["full_text"] = augmented_df.apply(create_full_text, axis=1)
256
  combined_df = pd.concat([original_df.copy(), augmented_df.copy()], ignore_index=True)
257
  original_df = original_df.rename(columns={'Job title': 'job_title', 'Company': 'company'})
258
-
259
  print("--- Loading Fine-Tuned Sentence Transformer Model ---")
260
  model = SentenceTransformer(FINETUNED_MODEL_ID)
261
  print("--- Encoding Embeddings ---")
@@ -280,7 +269,7 @@ def get_job_matches(dream_job: str, top_n: int):
280
  status = f"Found {len(emb_matches)} top matches for your career goal."
281
 
282
  table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
283
- table_to_show = table_to_show.rename(columns={'Similarity Score': 'Relevance Score'})
284
 
285
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
286
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
@@ -290,25 +279,34 @@ def get_job_matches(dream_job: str, top_n: int):
290
  def analyze_skills(dream_job, initial_matches_df, skills_text, top_n):
291
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
292
  if not user_skills:
293
- # If skills are cleared, just show the original table without skill scores
294
- table1_to_show = initial_matches_df.head(top_n)[['job_title', 'company', 'Similarity Score']]
 
 
295
  return "Skills cleared. Showing original relevance.", table1_to_show, pd.DataFrame(), gr.update(visible=False)
296
 
 
 
297
  # --- 1. Rerank the first table ---
298
  rerank_df = pd.DataFrame(initial_matches_df) # Make sure it's a DataFrame
299
  rerank_df['Skill Match Score'] = rerank_df['Skills'].apply(lambda job_skills: calculate_skill_match_score(user_skills, job_skills))
300
  rerank_df = rerank_df.sort_values(by='Skill Match Score', ascending=False)
301
 
302
  table1_to_show = rerank_df.head(top_n)[['job_title', 'company', 'Similarity Score', 'Skill Match Score']]
 
 
303
 
304
  # --- 2. Find new jobs for the second table ---
305
- status = "Analyzing skills and finding new job matches..."
306
  combined_query = dream_job + ". My current skills are: " + skills_text
307
  new_matches_df = find_job_matches(combined_query, top_k=top_n)
308
- table2_to_show = new_matches_df[['job_title', 'company', 'Similarity Score']]
309
- table2_to_show = table2_to_show.rename(columns={'Similarity Score': 'Blended Match Score'})
 
 
 
 
310
 
311
- status = f"Re-ranked initial jobs by skill match and found new jobs to consider."
312
  return status, table1_to_show, table2_to_show, gr.update(visible=True)
313
 
314
  def find_matches_and_rank_with_check(dream_job: str, top_n: int):
@@ -322,6 +320,7 @@ def find_matches_and_rank_with_check(dream_job: str, top_n: int):
322
  return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.update(visible=False), gr.update(visible=False), gr.update(value=alert_message, visible=True), gr.update(visible=True), pd.DataFrame(), gr.update(visible=False)
323
 
324
  status, emb_matches, table_to_show, dropdown_update, accordion_update = get_job_matches(dream_job, top_n)
 
325
  return status, emb_matches, table_to_show, dropdown_update, accordion_update, gr.update(visible=False), gr.update(visible=False), pd.DataFrame(), gr.update(visible=False)
326
 
327
  def find_matches_and_rank_anyway(dream_job: str, top_n: int):
@@ -358,7 +357,7 @@ def on_select_job(job_id, skills_text):
358
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.update(visible=True), [], 0, gr.Button(visible=False)
359
 
360
  headline = "<h4>Focus on these skills to improve your match:</h4>" if user_skills else "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
361
- skills_to_display = sorted(all_missing_skills)[:5]
362
  items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
363
  learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
364
  full_skill_list_for_state = sorted(all_missing_skills) if user_skills else sorted(job_skills)
@@ -378,12 +377,10 @@ def load_more_skills(full_skills_list, current_offset):
378
  def on_reset():
379
  return ("", 3, "", pd.DataFrame(), None, gr.update(visible=False), gr.update(visible=False), "Status: Ready.", "", "", "", "", gr.update(visible=False), gr.update(visible=False), [], 0, gr.Button(visible=False), pd.DataFrame(), gr.update(visible=False))
380
 
381
- # --- Main App ---
382
  try:
383
  initialization_status = initialize_data_and_model()
384
  except Exception as e:
385
- # Fallback for brevity, paste your full initialize_data_and_model function
386
- raise e
387
  print(initialization_status)
388
 
389
  with gr.Blocks(theme=gr.themes.Soft()) as ui:
@@ -409,7 +406,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
409
  retype_btn = gr.Button("Let Me Fix It", variant="stop")
410
 
411
  gr.Markdown("### Top Matches for Your Career Goal")
412
- df_output = gr.DataFrame(label="Job Matches", interactive=False, datatype=['str', 'str', 'number', 'number'])
413
 
414
  with gr.Column(visible=False) as skill_jobs_col:
415
  gr.Markdown("### Potential Jobs to Consider (Given Your Skills)")
 
100
  if not LLM_PIPELINE: return user_input
101
  prompt_template = (
102
  f"User's career interest: '{user_input}'\n"
103
+ f"Instruction: Based on the user's interest, write a concise, single-sentence summary (40-60 words) that elaborates on the core intent, typical skills, and responsibilities. Do not include a preamble. Expanded Intent:"
 
 
104
  )
105
  try:
106
  response = LLM_PIPELINE(prompt_template, max_new_tokens=100, do_sample=True, temperature=0.6, return_full_text=False)
107
  expanded_query = response[0]['generated_text'].strip()
108
+ final_query = user_input + ". " + expanded_query.replace('\n', ' ').strip()
 
109
  return final_query
110
  except Exception:
111
  return user_input
 
165
  if not isinstance(text, str) or len(text.strip()) < 20 or not LLM_PIPELINE: return []
166
  prompt = f"""
167
  Instruct: You are an expert technical recruiter. Extract the key skills from the job description text. List technical and soft skills as a comma-separated string.
168
+ [Example 1] Text: "Requires 3+ years of experience in cloud infrastructure. Must be proficient in AWS, particularly EC2 and S3. Experience with Terraform for IaC is a plus." Extracted Skills: cloud infrastructure, aws, ec2, s3, terraform, infrastructure as code
169
+ [Example 2] Text: "Seeking a team lead with strong project management abilities. Must communicate effectively with stakeholders and manage timelines using Agile methodologies like Scrum." Extracted Skills: project management, leadership, stakeholder communication, agile, scrum
170
+ [Actual Task] Text: "{text}" Extracted Skills:"""
 
 
 
 
 
 
 
171
  try:
172
  response = LLM_PIPELINE(prompt, max_new_tokens=150, do_sample=False, temperature=0.1, return_full_text=False)
173
  skills_text = response[0]['generated_text'].strip()
 
230
  print("--- Extracting skills with HYBRID ACCURACY model. Please wait... ---")
231
  original_df['Skills'] = original_df.progress_apply(extract_skills_hybrid, axis=1)
232
  original_df = original_df.drop(columns=['text_for_skills'])
 
233
  print(f"--- Saving processed data to {PROCESSED_DATA_PATH} for faster future startups ---")
234
  original_df.to_parquet(PROCESSED_DATA_PATH)
235
 
 
244
  augmented_df["full_text"] = augmented_df.apply(create_full_text, axis=1)
245
  combined_df = pd.concat([original_df.copy(), augmented_df.copy()], ignore_index=True)
246
  original_df = original_df.rename(columns={'Job title': 'job_title', 'Company': 'company'})
247
+
248
  print("--- Loading Fine-Tuned Sentence Transformer Model ---")
249
  model = SentenceTransformer(FINETUNED_MODEL_ID)
250
  print("--- Encoding Embeddings ---")
 
269
  status = f"Found {len(emb_matches)} top matches for your career goal."
270
 
271
  table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
272
+ table_to_show['Similarity Score'] = table_to_show['Similarity Score'].map('{:.2%}'.format)
273
 
274
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
275
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
 
279
  def analyze_skills(dream_job, initial_matches_df, skills_text, top_n):
280
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
281
  if not user_skills:
282
+ # If skills are cleared, just show the original table without skill scores and hide the second table
283
+ table1_df = pd.DataFrame(initial_matches_df).head(top_n)
284
+ table1_to_show = table1_df[['job_title', 'company', 'Similarity Score']]
285
+ table1_to_show['Similarity Score'] = table1_to_show['Similarity Score'].map('{:.2%}'.format)
286
  return "Skills cleared. Showing original relevance.", table1_to_show, pd.DataFrame(), gr.update(visible=False)
287
 
288
+ status = "Analyzing skills and finding new job matches..."
289
+
290
  # --- 1. Rerank the first table ---
291
  rerank_df = pd.DataFrame(initial_matches_df) # Make sure it's a DataFrame
292
  rerank_df['Skill Match Score'] = rerank_df['Skills'].apply(lambda job_skills: calculate_skill_match_score(user_skills, job_skills))
293
  rerank_df = rerank_df.sort_values(by='Skill Match Score', ascending=False)
294
 
295
  table1_to_show = rerank_df.head(top_n)[['job_title', 'company', 'Similarity Score', 'Skill Match Score']]
296
+ table1_to_show['Similarity Score'] = table1_to_show['Similarity Score'].map('{:.2%}'.format)
297
+ table1_to_show['Skill Match Score'] = table1_to_show['Skill Match Score'].map('{:.2%}'.format)
298
 
299
  # --- 2. Find new jobs for the second table ---
 
300
  combined_query = dream_job + ". My current skills are: " + skills_text
301
  new_matches_df = find_job_matches(combined_query, top_k=top_n)
302
+ # Calculate skill match for this new table as well
303
+ new_matches_df['Skill Match Score'] = new_matches_df['Skills'].apply(lambda job_skills: calculate_skill_match_score(user_skills, job_skills))
304
+
305
+ table2_to_show = new_matches_df[['job_title', 'company', 'Similarity Score', 'Skill Match Score']]
306
+ table2_to_show['Similarity Score'] = table2_to_show['Similarity Score'].map('{:.2%}'.format)
307
+ table2_to_show['Skill Match Score'] = table2_to_show['Skill Match Score'].map('{:.2%}'.format)
308
 
309
+ status = f"Re-ranked initial jobs and found new jobs for your skills."
310
  return status, table1_to_show, table2_to_show, gr.update(visible=True)
311
 
312
  def find_matches_and_rank_with_check(dream_job: str, top_n: int):
 
320
  return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.update(visible=False), gr.update(visible=False), gr.update(value=alert_message, visible=True), gr.update(visible=True), pd.DataFrame(), gr.update(visible=False)
321
 
322
  status, emb_matches, table_to_show, dropdown_update, accordion_update = get_job_matches(dream_job, top_n)
323
+ # Hide the second table on a new search
324
  return status, emb_matches, table_to_show, dropdown_update, accordion_update, gr.update(visible=False), gr.update(visible=False), pd.DataFrame(), gr.update(visible=False)
325
 
326
  def find_matches_and_rank_anyway(dream_job: str, top_n: int):
 
357
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.update(visible=True), [], 0, gr.Button(visible=False)
358
 
359
  headline = "<h4>Focus on these skills to improve your match:</h4>" if user_skills else "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
360
+ skills_to_display = sorted(all_missing_skills)[:5] if user_skills else sorted(job_skills)[:5]
361
  items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
362
  learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
363
  full_skill_list_for_state = sorted(all_missing_skills) if user_skills else sorted(job_skills)
 
377
  def on_reset():
378
  return ("", 3, "", pd.DataFrame(), None, gr.update(visible=False), gr.update(visible=False), "Status: Ready.", "", "", "", "", gr.update(visible=False), gr.update(visible=False), [], 0, gr.Button(visible=False), pd.DataFrame(), gr.update(visible=False))
379
 
 
380
  try:
381
  initialization_status = initialize_data_and_model()
382
  except Exception as e:
383
+ initialization_status = f"ERROR during startup: {e}. Please check logs."
 
384
  print(initialization_status)
385
 
386
  with gr.Blocks(theme=gr.themes.Soft()) as ui:
 
406
  retype_btn = gr.Button("Let Me Fix It", variant="stop")
407
 
408
  gr.Markdown("### Top Matches for Your Career Goal")
409
+ df_output = gr.DataFrame(label="Job Matches", interactive=False)
410
 
411
  with gr.Column(visible=False) as skill_jobs_col:
412
  gr.Markdown("### Potential Jobs to Consider (Given Your Skills)")