Spaces:

zlf18
/

test2

Sleeping

App Files Files Community

zlf18 commited on Oct 12, 2025

Commit

638f98a

verified ·

1 Parent(s): b6481eb

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -48

app.py CHANGED Viewed

@@ -61,31 +61,6 @@ FINETUNED_MODEL_ID = "its-zion-18/projfinetuned"
 KNOWN_WORDS = set()
 # --- CORE NLP & HELPER FUNCTIONS ---
-def expand_skills_with_llm(job_title: str, existing_skills: list, num_skills_to_add: int) -> list:
-    """
-    Uses the LLM to suggest additional skills based on a job title and existing skills.
-    """
-    if not LLM_PIPELINE or not job_title or num_skills_to_add <= 0:
-        return []
-    existing_skills_str = ', '.join(existing_skills)
-    prompt = f"""
-Instruct: A job has the title "{job_title}" and already lists these skills: {existing_skills_str}.
-Based on this, what are {num_skills_to_add} additional, closely related skills typically required for such a role?
-List only the new skills, separated by commas. Do not repeat skills from the original list. Do not include any preamble.
-Additional Skills:
-"""
-    try:
-        response = LLM_PIPELINE(prompt, max_new_tokens=50, do_sample=True, temperature=0.5)
-        generated_text = response[0]['generated_text']
-        skills_part = generated_text.split("Additional Skills:")[-1].strip()
-        new_skills = [skill.strip().lower() for skill in skills_part.split(',') if skill.strip()]
-        return list(dict.fromkeys(new_skills)) # Ensure unique skills are returned
-    except Exception as e:
-        print(f"🚨 ERROR expanding skills with LLM: {e}")
-        return []
 def _norm_skill_token(s: str) -> str:
     s = s.lower().strip()
     s = re.sub(r'[\(\)\[\]\{\}\*]', '', s)
@@ -267,6 +242,26 @@ Extracted Skills:
                     found_skills.add(skill)
             return list(found_skills)
         def extract_skills_hybrid(row) -> list[str]:
             text = row['text_for_skills']
             job_title = row.get('Job title', '') # Use original Job title for context
@@ -278,8 +273,7 @@ Extracted Skills:
             # If the combined list is still too short, expand it
             if len(combined_skills) < 6:
-                # MODIFIED: Call the global expand_skills_with_llm function
-                expanded_skills = expand_skills_with_llm(job_title, list(combined_skills), num_skills_to_add=6-len(combined_skills))
                 combined_skills.update(expanded_skills)
             return sorted(list(combined_skills))
@@ -329,12 +323,14 @@ def get_job_matches(dream_job: str, top_n: int, skills_text: str):
     emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
     recommendations_table = pd.DataFrame()
     recommendations_visible = False
     if user_skills:
         scored_df = score_jobs_by_skills(user_skills, emb_matches)
         skill_sorted_df = scored_df.sort_values(by='Skill Match Score', ascending=False).head(5)
         if not skill_sorted_df.empty:
             recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
@@ -343,6 +339,7 @@ def get_job_matches(dream_job: str, top_n: int, skills_text: str):
             recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
             recommendations_table = recs
             recommendations_visible = True
         display_df = scored_df.head(top_n)
         status = f"Found and **re-ranked** results by your {len(user_skills)} skills. Displaying top {len(display_df)}."
@@ -363,6 +360,7 @@ def get_job_matches(dream_job: str, top_n: int, skills_text: str):
     dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
     dropdown_value = dropdown_options[0][1] if dropdown_options else None
     return status, emb_matches, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), gr.Accordion(visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
 def rerank_current_results(initial_matches_df, skills_text, top_n):
@@ -371,6 +369,7 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
     initial_matches_df = pd.DataFrame(initial_matches_df)
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
     recommendations_table = pd.DataFrame()
     recommendations_visible = False
@@ -385,6 +384,7 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
         status = f"Results **re-ranked** based on your {len(user_skills)} skills."
         display_df = ranked_df.head(top_n)
         skill_sorted_df = ranked_df.sort_values(by='Skill Match Score', ascending=False).head(5)
         if not skill_sorted_df.empty:
             recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
@@ -393,6 +393,7 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
             recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
             recommendations_table = recs
             recommendations_visible = True
         table_to_show = display_df[['job_title', 'company', 'Final Score', 'Skill Match Score']]
         table_to_show = table_to_show.rename(columns={'Final Score': 'Overall Score'})
@@ -402,15 +403,18 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
     dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
     dropdown_value = dropdown_options[0][1] if dropdown_options else None
     return status, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
 def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: str):
     if not dream_job:
         return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(""), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)
     unrecognized_words = check_spelling_in_query(dream_job)
     if unrecognized_words:
         word_list_html = ", ".join([f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized_words])
         alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
         return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert_message, visible=True), gr.Row(visible=True), pd.DataFrame(), gr.Accordion(visible=False)
     status, emb_matches, table_to_show, dropdown, details_accordion, recommendations_table, recommendations_accordion = get_job_matches(dream_job, top_n, skills_text)
@@ -448,11 +452,12 @@ def on_select_job(job_id, skills_text):
         matched_job_skills_mask = torch.any(similarity_matrix > 0.58, dim=0)
         all_missing_skills = [skill for i, skill in enumerate(job_skills) if not matched_job_skills_mask[i]]
-        if score_val >= 0.98:
-            learning_plan_html = "<h4 style='color:green;'>🎉 You have all the required skills!</h4>"
-            job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
-            return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
         job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
         headline = "<b>Great fit!</b>" if score_val >= 0.8 else "<b>Good progress!</b>" if score_val >= 0.5 else "<b>Solid starting point.</b>"
         learning_plan_html = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
@@ -461,25 +466,11 @@ def on_select_job(job_id, skills_text):
         learning_plan_html += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
         return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
     else:
-        # --- MODIFIED LOGIC TO ENSURE AT LEAST 5 SKILLS ---
-        current_job_skills = list(job_skills)
-        job_title = str(row.get("job_title", ""))
-        if len(current_job_skills) < 5 and job_title and LLM_PIPELINE:
-            additional_skills_needed = 5 - len(current_job_skills)
-            newly_expanded_skills = expand_skills_with_llm(job_title, current_job_skills, num_skills_to_add=additional_skills_needed)
-            for skill in newly_expanded_skills:
-                if skill not in current_job_skills:
-                    current_job_skills.append(skill)
-        # --- END MODIFICATION ---
         headline = "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
-        skills_to_display = sorted(current_job_skills)[:5]
         items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
         learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
-        full_skill_list_for_state = sorted(current_job_skills)
         new_offset = len(skills_to_display)
         should_button_be_visible = len(full_skill_list_for_state) > 5
         return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), full_skill_list_for_state, new_offset, gr.Button(visible=should_button_be_visible)
@@ -494,6 +485,7 @@ def load_more_skills(full_skills_list, current_offset):
     return learning_plan_html, new_offset, gr.Button(visible=should_button_be_visible)
 def on_reset():
     return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "Status: Ready.", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False), [], 0, gr.Button(visible=False), pd.DataFrame(), gr.Accordion(visible=False))
 print("Starting application initialization...")
@@ -524,6 +516,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
     df_output = gr.DataFrame(label="Job Matches (Sorted by Overall Relevance)", interactive=False)
     with gr.Accordion("✨ Based on your current skills and career interest consider these jobs...", open=True, visible=False) as recommendations_accordion:
         recommendations_df_output = gr.DataFrame(label="Top Skill Matches", interactive=False)
@@ -537,6 +530,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
         learning_plan_output = gr.HTML(label="Learning Plan")
         load_more_btn = gr.Button("Load More Skills", visible=False)
     search_btn.click(fn=find_matches_and_rank_with_check, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
     search_anyway_btn.click(fn=find_matches_and_rank_anyway, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
     retype_btn.click(lambda: ("Status: Ready for you to retype.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(visible=False), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)), outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])

 KNOWN_WORDS = set()
 # --- CORE NLP & HELPER FUNCTIONS ---
 def _norm_skill_token(s: str) -> str:
     s = s.lower().strip()
     s = re.sub(r'[\(\)\[\]\{\}\*]', '', s)
                     found_skills.add(skill)
             return list(found_skills)
+        def expand_skills_with_llm(job_title: str, existing_skills: list) -> list:
+            if not LLM_PIPELINE or not job_title: return []
+            skills_to_add = 6 - len(existing_skills)
+            prompt = f"""
+Instruct: A job has the title "{job_title}" and requires the skills: {', '.join(existing_skills)}.
+Based on this, what are {skills_to_add} additional, closely related skills typically required for such a role?
+List only the new skills, separated by commas. Do not repeat skills from the original list.
+Additional Skills:
+"""
+            try:
+                response = LLM_PIPELINE(prompt, max_new_tokens=50, do_sample=True, temperature=0.5)
+                generated_text = response[0]['generated_text']
+                skills_part = generated_text.split("Additional Skills:")[-1].strip()
+                new_skills = [skill.strip().lower() for skill in skills_part.split(',') if skill.strip()]
+                return new_skills
+            except Exception:
+                return []
         def extract_skills_hybrid(row) -> list[str]:
             text = row['text_for_skills']
             job_title = row.get('Job title', '') # Use original Job title for context
             # If the combined list is still too short, expand it
             if len(combined_skills) < 6:
+                expanded_skills = expand_skills_with_llm(job_title, list(combined_skills))
                 combined_skills.update(expanded_skills)
             return sorted(list(combined_skills))
     emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
+    # --- NEW: Initialize variables for the recommendations section ---
     recommendations_table = pd.DataFrame()
     recommendations_visible = False
     if user_skills:
         scored_df = score_jobs_by_skills(user_skills, emb_matches)
+        # --- NEW: Logic to get top 5 jobs based purely on skill match score ---
         skill_sorted_df = scored_df.sort_values(by='Skill Match Score', ascending=False).head(5)
         if not skill_sorted_df.empty:
             recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
             recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
             recommendations_table = recs
             recommendations_visible = True
+        # --- END NEW ---
         display_df = scored_df.head(top_n)
         status = f"Found and **re-ranked** results by your {len(user_skills)} skills. Displaying top {len(display_df)}."
     dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
     dropdown_value = dropdown_options[0][1] if dropdown_options else None
+    # --- MODIFIED: Added new outputs for recommendations ---
     return status, emb_matches, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), gr.Accordion(visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
 def rerank_current_results(initial_matches_df, skills_text, top_n):
     initial_matches_df = pd.DataFrame(initial_matches_df)
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
+    # --- NEW: Initialize variables for the recommendations section ---
     recommendations_table = pd.DataFrame()
     recommendations_visible = False
         status = f"Results **re-ranked** based on your {len(user_skills)} skills."
         display_df = ranked_df.head(top_n)
+        # --- NEW: Logic to get top 5 jobs based purely on skill match score ---
         skill_sorted_df = ranked_df.sort_values(by='Skill Match Score', ascending=False).head(5)
         if not skill_sorted_df.empty:
             recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
             recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
             recommendations_table = recs
             recommendations_visible = True
+        # --- END NEW ---
         table_to_show = display_df[['job_title', 'company', 'Final Score', 'Skill Match Score']]
         table_to_show = table_to_show.rename(columns={'Final Score': 'Overall Score'})
     dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
     dropdown_value = dropdown_options[0][1] if dropdown_options else None
+    # --- MODIFIED: Added new outputs for recommendations ---
     return status, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
 def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: str):
     if not dream_job:
+        # --- MODIFIED: Added new default outputs ---
         return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(""), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)
     unrecognized_words = check_spelling_in_query(dream_job)
     if unrecognized_words:
         word_list_html = ", ".join([f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized_words])
         alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
+        # --- MODIFIED: Added new default outputs ---
         return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert_message, visible=True), gr.Row(visible=True), pd.DataFrame(), gr.Accordion(visible=False)
     status, emb_matches, table_to_show, dropdown, details_accordion, recommendations_table, recommendations_accordion = get_job_matches(dream_job, top_n, skills_text)
         matched_job_skills_mask = torch.any(similarity_matrix > 0.58, dim=0)
         all_missing_skills = [skill for i, skill in enumerate(job_skills) if not matched_job_skills_mask[i]]
+    if user_skills and score_val >= 0.98:
+        learning_plan_html = "<h4 style='color:green;'>🎉 You have all the required skills!</h4>"
+        job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
+        return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
+    if user_skills:
         job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
         headline = "<b>Great fit!</b>" if score_val >= 0.8 else "<b>Good progress!</b>" if score_val >= 0.5 else "<b>Solid starting point.</b>"
         learning_plan_html = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
         learning_plan_html += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
         return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
     else:
         headline = "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
+        skills_to_display = sorted(job_skills)[:5]
         items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
         learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
+        full_skill_list_for_state = sorted(job_skills)
         new_offset = len(skills_to_display)
         should_button_be_visible = len(full_skill_list_for_state) > 5
         return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), full_skill_list_for_state, new_offset, gr.Button(visible=should_button_be_visible)
     return learning_plan_html, new_offset, gr.Button(visible=should_button_be_visible)
 def on_reset():
+    # --- MODIFIED: Added new default outputs for reset ---
     return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "Status: Ready.", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False), [], 0, gr.Button(visible=False), pd.DataFrame(), gr.Accordion(visible=False))
 print("Starting application initialization...")
     df_output = gr.DataFrame(label="Job Matches (Sorted by Overall Relevance)", interactive=False)
+    # --- NEW: Added the recommendations section ---
     with gr.Accordion("✨ Based on your current skills and career interest consider these jobs...", open=True, visible=False) as recommendations_accordion:
         recommendations_df_output = gr.DataFrame(label="Top Skill Matches", interactive=False)
         learning_plan_output = gr.HTML(label="Learning Plan")
         load_more_btn = gr.Button("Load More Skills", visible=False)
+    # --- MODIFIED: Added new outputs to the click events ---
     search_btn.click(fn=find_matches_and_rank_with_check, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
     search_anyway_btn.click(fn=find_matches_and_rank_anyway, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
     retype_btn.click(lambda: ("Status: Ready for you to retype.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(visible=False), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)), outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])