Update app.py
Browse files
app.py
CHANGED
|
@@ -100,15 +100,12 @@ def llm_expand_query(user_input: str) -> str:
|
|
| 100 |
if not LLM_PIPELINE: return user_input
|
| 101 |
prompt_template = (
|
| 102 |
f"User's career interest: '{user_input}'\n"
|
| 103 |
-
f"Instruction: Based on the user's interest, write a concise, single-sentence summary (40-60 words) that elaborates on the core intent, typical skills, and responsibilities. "
|
| 104 |
-
f"Do not include a preamble, the user input, or any list formatting in the output. Just the expanded sentence.\n"
|
| 105 |
-
f"Expanded Intent:"
|
| 106 |
)
|
| 107 |
try:
|
| 108 |
response = LLM_PIPELINE(prompt_template, max_new_tokens=100, do_sample=True, temperature=0.6, return_full_text=False)
|
| 109 |
expanded_query = response[0]['generated_text'].strip()
|
| 110 |
-
final_query = user_input + ". " + expanded_query.replace('\n', ' ').
|
| 111 |
-
final_query = final_query.replace('..', '.').strip()
|
| 112 |
return final_query
|
| 113 |
except Exception:
|
| 114 |
return user_input
|
|
@@ -168,16 +165,9 @@ def initialize_data_and_model():
|
|
| 168 |
if not isinstance(text, str) or len(text.strip()) < 20 or not LLM_PIPELINE: return []
|
| 169 |
prompt = f"""
|
| 170 |
Instruct: You are an expert technical recruiter. Extract the key skills from the job description text. List technical and soft skills as a comma-separated string.
|
| 171 |
-
[Example 1]
|
| 172 |
-
Text: "
|
| 173 |
-
|
| 174 |
-
[Example 2]
|
| 175 |
-
Text: "Seeking a team lead with strong project management abilities. Must communicate effectively with stakeholders and manage timelines using Agile methodologies like Scrum."
|
| 176 |
-
Extracted Skills: project management, leadership, stakeholder communication, agile, scrum
|
| 177 |
-
[Actual Task]
|
| 178 |
-
Text: "{text}"
|
| 179 |
-
Extracted Skills:
|
| 180 |
-
"""
|
| 181 |
try:
|
| 182 |
response = LLM_PIPELINE(prompt, max_new_tokens=150, do_sample=False, temperature=0.1, return_full_text=False)
|
| 183 |
skills_text = response[0]['generated_text'].strip()
|
|
@@ -240,7 +230,6 @@ Extracted Skills:
|
|
| 240 |
print("--- Extracting skills with HYBRID ACCURACY model. Please wait... ---")
|
| 241 |
original_df['Skills'] = original_df.progress_apply(extract_skills_hybrid, axis=1)
|
| 242 |
original_df = original_df.drop(columns=['text_for_skills'])
|
| 243 |
-
|
| 244 |
print(f"--- Saving processed data to {PROCESSED_DATA_PATH} for faster future startups ---")
|
| 245 |
original_df.to_parquet(PROCESSED_DATA_PATH)
|
| 246 |
|
|
@@ -255,7 +244,7 @@ Extracted Skills:
|
|
| 255 |
augmented_df["full_text"] = augmented_df.apply(create_full_text, axis=1)
|
| 256 |
combined_df = pd.concat([original_df.copy(), augmented_df.copy()], ignore_index=True)
|
| 257 |
original_df = original_df.rename(columns={'Job title': 'job_title', 'Company': 'company'})
|
| 258 |
-
|
| 259 |
print("--- Loading Fine-Tuned Sentence Transformer Model ---")
|
| 260 |
model = SentenceTransformer(FINETUNED_MODEL_ID)
|
| 261 |
print("--- Encoding Embeddings ---")
|
|
@@ -280,7 +269,7 @@ def get_job_matches(dream_job: str, top_n: int):
|
|
| 280 |
status = f"Found {len(emb_matches)} top matches for your career goal."
|
| 281 |
|
| 282 |
table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
|
| 283 |
-
table_to_show
|
| 284 |
|
| 285 |
dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
|
| 286 |
dropdown_value = dropdown_options[0][1] if dropdown_options else None
|
|
@@ -290,25 +279,34 @@ def get_job_matches(dream_job: str, top_n: int):
|
|
| 290 |
def analyze_skills(dream_job, initial_matches_df, skills_text, top_n):
|
| 291 |
user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
|
| 292 |
if not user_skills:
|
| 293 |
-
# If skills are cleared, just show the original table without skill scores
|
| 294 |
-
|
|
|
|
|
|
|
| 295 |
return "Skills cleared. Showing original relevance.", table1_to_show, pd.DataFrame(), gr.update(visible=False)
|
| 296 |
|
|
|
|
|
|
|
| 297 |
# --- 1. Rerank the first table ---
|
| 298 |
rerank_df = pd.DataFrame(initial_matches_df) # Make sure it's a DataFrame
|
| 299 |
rerank_df['Skill Match Score'] = rerank_df['Skills'].apply(lambda job_skills: calculate_skill_match_score(user_skills, job_skills))
|
| 300 |
rerank_df = rerank_df.sort_values(by='Skill Match Score', ascending=False)
|
| 301 |
|
| 302 |
table1_to_show = rerank_df.head(top_n)[['job_title', 'company', 'Similarity Score', 'Skill Match Score']]
|
|
|
|
|
|
|
| 303 |
|
| 304 |
# --- 2. Find new jobs for the second table ---
|
| 305 |
-
status = "Analyzing skills and finding new job matches..."
|
| 306 |
combined_query = dream_job + ". My current skills are: " + skills_text
|
| 307 |
new_matches_df = find_job_matches(combined_query, top_k=top_n)
|
| 308 |
-
|
| 309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
|
| 311 |
-
status = f"Re-ranked initial jobs
|
| 312 |
return status, table1_to_show, table2_to_show, gr.update(visible=True)
|
| 313 |
|
| 314 |
def find_matches_and_rank_with_check(dream_job: str, top_n: int):
|
|
@@ -322,6 +320,7 @@ def find_matches_and_rank_with_check(dream_job: str, top_n: int):
|
|
| 322 |
return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.update(visible=False), gr.update(visible=False), gr.update(value=alert_message, visible=True), gr.update(visible=True), pd.DataFrame(), gr.update(visible=False)
|
| 323 |
|
| 324 |
status, emb_matches, table_to_show, dropdown_update, accordion_update = get_job_matches(dream_job, top_n)
|
|
|
|
| 325 |
return status, emb_matches, table_to_show, dropdown_update, accordion_update, gr.update(visible=False), gr.update(visible=False), pd.DataFrame(), gr.update(visible=False)
|
| 326 |
|
| 327 |
def find_matches_and_rank_anyway(dream_job: str, top_n: int):
|
|
@@ -358,7 +357,7 @@ def on_select_job(job_id, skills_text):
|
|
| 358 |
return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.update(visible=True), [], 0, gr.Button(visible=False)
|
| 359 |
|
| 360 |
headline = "<h4>Focus on these skills to improve your match:</h4>" if user_skills else "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
|
| 361 |
-
skills_to_display = sorted(all_missing_skills)[:5]
|
| 362 |
items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
|
| 363 |
learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
|
| 364 |
full_skill_list_for_state = sorted(all_missing_skills) if user_skills else sorted(job_skills)
|
|
@@ -378,12 +377,10 @@ def load_more_skills(full_skills_list, current_offset):
|
|
| 378 |
def on_reset():
|
| 379 |
return ("", 3, "", pd.DataFrame(), None, gr.update(visible=False), gr.update(visible=False), "Status: Ready.", "", "", "", "", gr.update(visible=False), gr.update(visible=False), [], 0, gr.Button(visible=False), pd.DataFrame(), gr.update(visible=False))
|
| 380 |
|
| 381 |
-
# --- Main App ---
|
| 382 |
try:
|
| 383 |
initialization_status = initialize_data_and_model()
|
| 384 |
except Exception as e:
|
| 385 |
-
|
| 386 |
-
raise e
|
| 387 |
print(initialization_status)
|
| 388 |
|
| 389 |
with gr.Blocks(theme=gr.themes.Soft()) as ui:
|
|
@@ -409,7 +406,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
|
|
| 409 |
retype_btn = gr.Button("Let Me Fix It", variant="stop")
|
| 410 |
|
| 411 |
gr.Markdown("### Top Matches for Your Career Goal")
|
| 412 |
-
df_output = gr.DataFrame(label="Job Matches", interactive=False
|
| 413 |
|
| 414 |
with gr.Column(visible=False) as skill_jobs_col:
|
| 415 |
gr.Markdown("### Potential Jobs to Consider (Given Your Skills)")
|
|
|
|
| 100 |
if not LLM_PIPELINE: return user_input
|
| 101 |
prompt_template = (
|
| 102 |
f"User's career interest: '{user_input}'\n"
|
| 103 |
+
f"Instruction: Based on the user's interest, write a concise, single-sentence summary (40-60 words) that elaborates on the core intent, typical skills, and responsibilities. Do not include a preamble. Expanded Intent:"
|
|
|
|
|
|
|
| 104 |
)
|
| 105 |
try:
|
| 106 |
response = LLM_PIPELINE(prompt_template, max_new_tokens=100, do_sample=True, temperature=0.6, return_full_text=False)
|
| 107 |
expanded_query = response[0]['generated_text'].strip()
|
| 108 |
+
final_query = user_input + ". " + expanded_query.replace('\n', ' ').strip()
|
|
|
|
| 109 |
return final_query
|
| 110 |
except Exception:
|
| 111 |
return user_input
|
|
|
|
| 165 |
if not isinstance(text, str) or len(text.strip()) < 20 or not LLM_PIPELINE: return []
|
| 166 |
prompt = f"""
|
| 167 |
Instruct: You are an expert technical recruiter. Extract the key skills from the job description text. List technical and soft skills as a comma-separated string.
|
| 168 |
+
[Example 1] Text: "Requires 3+ years of experience in cloud infrastructure. Must be proficient in AWS, particularly EC2 and S3. Experience with Terraform for IaC is a plus." Extracted Skills: cloud infrastructure, aws, ec2, s3, terraform, infrastructure as code
|
| 169 |
+
[Example 2] Text: "Seeking a team lead with strong project management abilities. Must communicate effectively with stakeholders and manage timelines using Agile methodologies like Scrum." Extracted Skills: project management, leadership, stakeholder communication, agile, scrum
|
| 170 |
+
[Actual Task] Text: "{text}" Extracted Skills:"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
try:
|
| 172 |
response = LLM_PIPELINE(prompt, max_new_tokens=150, do_sample=False, temperature=0.1, return_full_text=False)
|
| 173 |
skills_text = response[0]['generated_text'].strip()
|
|
|
|
| 230 |
print("--- Extracting skills with HYBRID ACCURACY model. Please wait... ---")
|
| 231 |
original_df['Skills'] = original_df.progress_apply(extract_skills_hybrid, axis=1)
|
| 232 |
original_df = original_df.drop(columns=['text_for_skills'])
|
|
|
|
| 233 |
print(f"--- Saving processed data to {PROCESSED_DATA_PATH} for faster future startups ---")
|
| 234 |
original_df.to_parquet(PROCESSED_DATA_PATH)
|
| 235 |
|
|
|
|
| 244 |
augmented_df["full_text"] = augmented_df.apply(create_full_text, axis=1)
|
| 245 |
combined_df = pd.concat([original_df.copy(), augmented_df.copy()], ignore_index=True)
|
| 246 |
original_df = original_df.rename(columns={'Job title': 'job_title', 'Company': 'company'})
|
| 247 |
+
|
| 248 |
print("--- Loading Fine-Tuned Sentence Transformer Model ---")
|
| 249 |
model = SentenceTransformer(FINETUNED_MODEL_ID)
|
| 250 |
print("--- Encoding Embeddings ---")
|
|
|
|
| 269 |
status = f"Found {len(emb_matches)} top matches for your career goal."
|
| 270 |
|
| 271 |
table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
|
| 272 |
+
table_to_show['Similarity Score'] = table_to_show['Similarity Score'].map('{:.2%}'.format)
|
| 273 |
|
| 274 |
dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
|
| 275 |
dropdown_value = dropdown_options[0][1] if dropdown_options else None
|
|
|
|
| 279 |
def analyze_skills(dream_job, initial_matches_df, skills_text, top_n):
|
| 280 |
user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
|
| 281 |
if not user_skills:
|
| 282 |
+
# If skills are cleared, just show the original table without skill scores and hide the second table
|
| 283 |
+
table1_df = pd.DataFrame(initial_matches_df).head(top_n)
|
| 284 |
+
table1_to_show = table1_df[['job_title', 'company', 'Similarity Score']]
|
| 285 |
+
table1_to_show['Similarity Score'] = table1_to_show['Similarity Score'].map('{:.2%}'.format)
|
| 286 |
return "Skills cleared. Showing original relevance.", table1_to_show, pd.DataFrame(), gr.update(visible=False)
|
| 287 |
|
| 288 |
+
status = "Analyzing skills and finding new job matches..."
|
| 289 |
+
|
| 290 |
# --- 1. Rerank the first table ---
|
| 291 |
rerank_df = pd.DataFrame(initial_matches_df) # Make sure it's a DataFrame
|
| 292 |
rerank_df['Skill Match Score'] = rerank_df['Skills'].apply(lambda job_skills: calculate_skill_match_score(user_skills, job_skills))
|
| 293 |
rerank_df = rerank_df.sort_values(by='Skill Match Score', ascending=False)
|
| 294 |
|
| 295 |
table1_to_show = rerank_df.head(top_n)[['job_title', 'company', 'Similarity Score', 'Skill Match Score']]
|
| 296 |
+
table1_to_show['Similarity Score'] = table1_to_show['Similarity Score'].map('{:.2%}'.format)
|
| 297 |
+
table1_to_show['Skill Match Score'] = table1_to_show['Skill Match Score'].map('{:.2%}'.format)
|
| 298 |
|
| 299 |
# --- 2. Find new jobs for the second table ---
|
|
|
|
| 300 |
combined_query = dream_job + ". My current skills are: " + skills_text
|
| 301 |
new_matches_df = find_job_matches(combined_query, top_k=top_n)
|
| 302 |
+
# Calculate skill match for this new table as well
|
| 303 |
+
new_matches_df['Skill Match Score'] = new_matches_df['Skills'].apply(lambda job_skills: calculate_skill_match_score(user_skills, job_skills))
|
| 304 |
+
|
| 305 |
+
table2_to_show = new_matches_df[['job_title', 'company', 'Similarity Score', 'Skill Match Score']]
|
| 306 |
+
table2_to_show['Similarity Score'] = table2_to_show['Similarity Score'].map('{:.2%}'.format)
|
| 307 |
+
table2_to_show['Skill Match Score'] = table2_to_show['Skill Match Score'].map('{:.2%}'.format)
|
| 308 |
|
| 309 |
+
status = f"Re-ranked initial jobs and found new jobs for your skills."
|
| 310 |
return status, table1_to_show, table2_to_show, gr.update(visible=True)
|
| 311 |
|
| 312 |
def find_matches_and_rank_with_check(dream_job: str, top_n: int):
|
|
|
|
| 320 |
return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.update(visible=False), gr.update(visible=False), gr.update(value=alert_message, visible=True), gr.update(visible=True), pd.DataFrame(), gr.update(visible=False)
|
| 321 |
|
| 322 |
status, emb_matches, table_to_show, dropdown_update, accordion_update = get_job_matches(dream_job, top_n)
|
| 323 |
+
# Hide the second table on a new search
|
| 324 |
return status, emb_matches, table_to_show, dropdown_update, accordion_update, gr.update(visible=False), gr.update(visible=False), pd.DataFrame(), gr.update(visible=False)
|
| 325 |
|
| 326 |
def find_matches_and_rank_anyway(dream_job: str, top_n: int):
|
|
|
|
| 357 |
return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.update(visible=True), [], 0, gr.Button(visible=False)
|
| 358 |
|
| 359 |
headline = "<h4>Focus on these skills to improve your match:</h4>" if user_skills else "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
|
| 360 |
+
skills_to_display = sorted(all_missing_skills)[:5] if user_skills else sorted(job_skills)[:5]
|
| 361 |
items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
|
| 362 |
learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
|
| 363 |
full_skill_list_for_state = sorted(all_missing_skills) if user_skills else sorted(job_skills)
|
|
|
|
| 377 |
def on_reset():
|
| 378 |
return ("", 3, "", pd.DataFrame(), None, gr.update(visible=False), gr.update(visible=False), "Status: Ready.", "", "", "", "", gr.update(visible=False), gr.update(visible=False), [], 0, gr.Button(visible=False), pd.DataFrame(), gr.update(visible=False))
|
| 379 |
|
|
|
|
| 380 |
try:
|
| 381 |
initialization_status = initialize_data_and_model()
|
| 382 |
except Exception as e:
|
| 383 |
+
initialization_status = f"ERROR during startup: {e}. Please check logs."
|
|
|
|
| 384 |
print(initialization_status)
|
| 385 |
|
| 386 |
with gr.Blocks(theme=gr.themes.Soft()) as ui:
|
|
|
|
| 406 |
retype_btn = gr.Button("Let Me Fix It", variant="stop")
|
| 407 |
|
| 408 |
gr.Markdown("### Top Matches for Your Career Goal")
|
| 409 |
+
df_output = gr.DataFrame(label="Job Matches", interactive=False)
|
| 410 |
|
| 411 |
with gr.Column(visible=False) as skill_jobs_col:
|
| 412 |
gr.Markdown("### Potential Jobs to Consider (Given Your Skills)")
|