Spaces:

zlf18
/

test2

Sleeping

App Files Files Community

zlf18 commited on Oct 12, 2025

Commit

2873ff1

verified ·

1 Parent(s): 190fb0d

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -29

app.py CHANGED Viewed

@@ -100,15 +100,12 @@ def llm_expand_query(user_input: str) -> str:
     if not LLM_PIPELINE: return user_input
     prompt_template = (
         f"User's career interest: '{user_input}'\n"
-        f"Instruction: Based on the user's interest, write a concise, single-sentence summary (40-60 words) that elaborates on the core intent, typical skills, and responsibilities. "
-        f"Do not include a preamble, the user input, or any list formatting in the output. Just the expanded sentence.\n"
-        f"Expanded Intent:"
     )
     try:
         response = LLM_PIPELINE(prompt_template, max_new_tokens=100, do_sample=True, temperature=0.6, return_full_text=False)
         expanded_query = response[0]['generated_text'].strip()
-        final_query = user_input + ". " + expanded_query.replace('\n', ' ').replace(':', '').strip()
-        final_query = final_query.replace('..', '.').strip()
         return final_query
     except Exception:
         return user_input
@@ -168,16 +165,9 @@ def initialize_data_and_model():
             if not isinstance(text, str) or len(text.strip()) < 20 or not LLM_PIPELINE: return []
             prompt = f"""
 Instruct: You are an expert technical recruiter. Extract the key skills from the job description text. List technical and soft skills as a comma-separated string.
-[Example 1]
-Text: "Requires 3+ years of experience in cloud infrastructure. Must be proficient in AWS, particularly EC2 and S3. Experience with Terraform for IaC is a plus."
-Extracted Skills: cloud infrastructure, aws, ec2, s3, terraform, infrastructure as code
-[Example 2]
-Text: "Seeking a team lead with strong project management abilities. Must communicate effectively with stakeholders and manage timelines using Agile methodologies like Scrum."
-Extracted Skills: project management, leadership, stakeholder communication, agile, scrum
-[Actual Task]
-Text: "{text}"
-Extracted Skills:
-"""
             try:
                 response = LLM_PIPELINE(prompt, max_new_tokens=150, do_sample=False, temperature=0.1, return_full_text=False)
                 skills_text = response[0]['generated_text'].strip()
@@ -240,7 +230,6 @@ Extracted Skills:
         print("--- Extracting skills with HYBRID ACCURACY model. Please wait... ---")
         original_df['Skills'] = original_df.progress_apply(extract_skills_hybrid, axis=1)
         original_df = original_df.drop(columns=['text_for_skills'])
         print(f"--- Saving processed data to {PROCESSED_DATA_PATH} for faster future startups ---")
         original_df.to_parquet(PROCESSED_DATA_PATH)
@@ -255,7 +244,7 @@ Extracted Skills:
     augmented_df["full_text"] = augmented_df.apply(create_full_text, axis=1)
     combined_df = pd.concat([original_df.copy(), augmented_df.copy()], ignore_index=True)
     original_df = original_df.rename(columns={'Job title': 'job_title', 'Company': 'company'})
     print("--- Loading Fine-Tuned Sentence Transformer Model ---")
     model = SentenceTransformer(FINETUNED_MODEL_ID)
     print("--- Encoding Embeddings ---")
@@ -280,7 +269,7 @@ def get_job_matches(dream_job: str, top_n: int):
     status = f"Found {len(emb_matches)} top matches for your career goal."
     table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
-    table_to_show = table_to_show.rename(columns={'Similarity Score': 'Relevance Score'})
     dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
     dropdown_value = dropdown_options[0][1] if dropdown_options else None
@@ -290,25 +279,34 @@ def get_job_matches(dream_job: str, top_n: int):
 def analyze_skills(dream_job, initial_matches_df, skills_text, top_n):
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
     if not user_skills:
-        # If skills are cleared, just show the original table without skill scores
-        table1_to_show = initial_matches_df.head(top_n)[['job_title', 'company', 'Similarity Score']]
         return "Skills cleared. Showing original relevance.", table1_to_show, pd.DataFrame(), gr.update(visible=False)
     # --- 1. Rerank the first table ---
     rerank_df = pd.DataFrame(initial_matches_df) # Make sure it's a DataFrame
     rerank_df['Skill Match Score'] = rerank_df['Skills'].apply(lambda job_skills: calculate_skill_match_score(user_skills, job_skills))
     rerank_df = rerank_df.sort_values(by='Skill Match Score', ascending=False)
     table1_to_show = rerank_df.head(top_n)[['job_title', 'company', 'Similarity Score', 'Skill Match Score']]
     # --- 2. Find new jobs for the second table ---
-    status = "Analyzing skills and finding new job matches..."
     combined_query = dream_job + ". My current skills are: " + skills_text
     new_matches_df = find_job_matches(combined_query, top_k=top_n)
-    table2_to_show = new_matches_df[['job_title', 'company', 'Similarity Score']]
-    table2_to_show = table2_to_show.rename(columns={'Similarity Score': 'Blended Match Score'})
-    status = f"Re-ranked initial jobs by skill match and found new jobs to consider."
     return status, table1_to_show, table2_to_show, gr.update(visible=True)
 def find_matches_and_rank_with_check(dream_job: str, top_n: int):
@@ -322,6 +320,7 @@ def find_matches_and_rank_with_check(dream_job: str, top_n: int):
         return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.update(visible=False), gr.update(visible=False), gr.update(value=alert_message, visible=True), gr.update(visible=True), pd.DataFrame(), gr.update(visible=False)
     status, emb_matches, table_to_show, dropdown_update, accordion_update = get_job_matches(dream_job, top_n)
     return status, emb_matches, table_to_show, dropdown_update, accordion_update, gr.update(visible=False), gr.update(visible=False), pd.DataFrame(), gr.update(visible=False)
 def find_matches_and_rank_anyway(dream_job: str, top_n: int):
@@ -358,7 +357,7 @@ def on_select_job(job_id, skills_text):
         return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.update(visible=True), [], 0, gr.Button(visible=False)
     headline = "<h4>Focus on these skills to improve your match:</h4>" if user_skills else "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
-    skills_to_display = sorted(all_missing_skills)[:5]
     items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
     learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
     full_skill_list_for_state = sorted(all_missing_skills) if user_skills else sorted(job_skills)
@@ -378,12 +377,10 @@ def load_more_skills(full_skills_list, current_offset):
 def on_reset():
     return ("", 3, "", pd.DataFrame(), None, gr.update(visible=False), gr.update(visible=False), "Status: Ready.", "", "", "", "", gr.update(visible=False), gr.update(visible=False), [], 0, gr.Button(visible=False), pd.DataFrame(), gr.update(visible=False))
-# --- Main App ---
 try:
     initialization_status = initialize_data_and_model()
 except Exception as e:
-    # Fallback for brevity, paste your full initialize_data_and_model function
-    raise e
 print(initialization_status)
 with gr.Blocks(theme=gr.themes.Soft()) as ui:
@@ -409,7 +406,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
         retype_btn = gr.Button("Let Me Fix It", variant="stop")
     gr.Markdown("### Top Matches for Your Career Goal")
-    df_output = gr.DataFrame(label="Job Matches", interactive=False, datatype=['str', 'str', 'number', 'number'])
     with gr.Column(visible=False) as skill_jobs_col:
         gr.Markdown("### Potential Jobs to Consider (Given Your Skills)")

     if not LLM_PIPELINE: return user_input
     prompt_template = (
         f"User's career interest: '{user_input}'\n"
+        f"Instruction: Based on the user's interest, write a concise, single-sentence summary (40-60 words) that elaborates on the core intent, typical skills, and responsibilities. Do not include a preamble. Expanded Intent:"
     )
     try:
         response = LLM_PIPELINE(prompt_template, max_new_tokens=100, do_sample=True, temperature=0.6, return_full_text=False)
         expanded_query = response[0]['generated_text'].strip()
+        final_query = user_input + ". " + expanded_query.replace('\n', ' ').strip()
         return final_query
     except Exception:
         return user_input
             if not isinstance(text, str) or len(text.strip()) < 20 or not LLM_PIPELINE: return []
             prompt = f"""
 Instruct: You are an expert technical recruiter. Extract the key skills from the job description text. List technical and soft skills as a comma-separated string.
+[Example 1] Text: "Requires 3+ years of experience in cloud infrastructure. Must be proficient in AWS, particularly EC2 and S3. Experience with Terraform for IaC is a plus." Extracted Skills: cloud infrastructure, aws, ec2, s3, terraform, infrastructure as code
+[Example 2] Text: "Seeking a team lead with strong project management abilities. Must communicate effectively with stakeholders and manage timelines using Agile methodologies like Scrum." Extracted Skills: project management, leadership, stakeholder communication, agile, scrum
+[Actual Task] Text: "{text}" Extracted Skills:"""
             try:
                 response = LLM_PIPELINE(prompt, max_new_tokens=150, do_sample=False, temperature=0.1, return_full_text=False)
                 skills_text = response[0]['generated_text'].strip()
         print("--- Extracting skills with HYBRID ACCURACY model. Please wait... ---")
         original_df['Skills'] = original_df.progress_apply(extract_skills_hybrid, axis=1)
         original_df = original_df.drop(columns=['text_for_skills'])
         print(f"--- Saving processed data to {PROCESSED_DATA_PATH} for faster future startups ---")
         original_df.to_parquet(PROCESSED_DATA_PATH)
     augmented_df["full_text"] = augmented_df.apply(create_full_text, axis=1)
     combined_df = pd.concat([original_df.copy(), augmented_df.copy()], ignore_index=True)
     original_df = original_df.rename(columns={'Job title': 'job_title', 'Company': 'company'})
     print("--- Loading Fine-Tuned Sentence Transformer Model ---")
     model = SentenceTransformer(FINETUNED_MODEL_ID)
     print("--- Encoding Embeddings ---")
     status = f"Found {len(emb_matches)} top matches for your career goal."
     table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
+    table_to_show['Similarity Score'] = table_to_show['Similarity Score'].map('{:.2%}'.format)
     dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
     dropdown_value = dropdown_options[0][1] if dropdown_options else None
 def analyze_skills(dream_job, initial_matches_df, skills_text, top_n):
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
     if not user_skills:
+        # If skills are cleared, just show the original table without skill scores and hide the second table
+        table1_df = pd.DataFrame(initial_matches_df).head(top_n)
+        table1_to_show = table1_df[['job_title', 'company', 'Similarity Score']]
+        table1_to_show['Similarity Score'] = table1_to_show['Similarity Score'].map('{:.2%}'.format)
         return "Skills cleared. Showing original relevance.", table1_to_show, pd.DataFrame(), gr.update(visible=False)
+    status = "Analyzing skills and finding new job matches..."
     # --- 1. Rerank the first table ---
     rerank_df = pd.DataFrame(initial_matches_df) # Make sure it's a DataFrame
     rerank_df['Skill Match Score'] = rerank_df['Skills'].apply(lambda job_skills: calculate_skill_match_score(user_skills, job_skills))
     rerank_df = rerank_df.sort_values(by='Skill Match Score', ascending=False)
     table1_to_show = rerank_df.head(top_n)[['job_title', 'company', 'Similarity Score', 'Skill Match Score']]
+    table1_to_show['Similarity Score'] = table1_to_show['Similarity Score'].map('{:.2%}'.format)
+    table1_to_show['Skill Match Score'] = table1_to_show['Skill Match Score'].map('{:.2%}'.format)
     # --- 2. Find new jobs for the second table ---
     combined_query = dream_job + ". My current skills are: " + skills_text
     new_matches_df = find_job_matches(combined_query, top_k=top_n)
+    # Calculate skill match for this new table as well
+    new_matches_df['Skill Match Score'] = new_matches_df['Skills'].apply(lambda job_skills: calculate_skill_match_score(user_skills, job_skills))
+    table2_to_show = new_matches_df[['job_title', 'company', 'Similarity Score', 'Skill Match Score']]
+    table2_to_show['Similarity Score'] = table2_to_show['Similarity Score'].map('{:.2%}'.format)
+    table2_to_show['Skill Match Score'] = table2_to_show['Skill Match Score'].map('{:.2%}'.format)
+    status = f"Re-ranked initial jobs and found new jobs for your skills."
     return status, table1_to_show, table2_to_show, gr.update(visible=True)
 def find_matches_and_rank_with_check(dream_job: str, top_n: int):
         return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.update(visible=False), gr.update(visible=False), gr.update(value=alert_message, visible=True), gr.update(visible=True), pd.DataFrame(), gr.update(visible=False)
     status, emb_matches, table_to_show, dropdown_update, accordion_update = get_job_matches(dream_job, top_n)
+    # Hide the second table on a new search
     return status, emb_matches, table_to_show, dropdown_update, accordion_update, gr.update(visible=False), gr.update(visible=False), pd.DataFrame(), gr.update(visible=False)
 def find_matches_and_rank_anyway(dream_job: str, top_n: int):
         return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.update(visible=True), [], 0, gr.Button(visible=False)
     headline = "<h4>Focus on these skills to improve your match:</h4>" if user_skills else "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
+    skills_to_display = sorted(all_missing_skills)[:5] if user_skills else sorted(job_skills)[:5]
     items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
     learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
     full_skill_list_for_state = sorted(all_missing_skills) if user_skills else sorted(job_skills)
 def on_reset():
     return ("", 3, "", pd.DataFrame(), None, gr.update(visible=False), gr.update(visible=False), "Status: Ready.", "", "", "", "", gr.update(visible=False), gr.update(visible=False), [], 0, gr.Button(visible=False), pd.DataFrame(), gr.update(visible=False))
 try:
     initialization_status = initialize_data_and_model()
 except Exception as e:
+    initialization_status = f"ERROR during startup: {e}. Please check logs."
 print(initialization_status)
 with gr.Blocks(theme=gr.themes.Soft()) as ui:
         retype_btn = gr.Button("Let Me Fix It", variant="stop")
     gr.Markdown("### Top Matches for Your Career Goal")
+    df_output = gr.DataFrame(label="Job Matches", interactive=False)
     with gr.Column(visible=False) as skill_jobs_col:
         gr.Markdown("### Potential Jobs to Consider (Given Your Skills)")