Spaces:

zlf18
/

test2

Sleeping

App Files Files Community

zlf18 commited on Oct 12, 2025

Commit

b6481eb

verified ·

1 Parent(s): ac17735

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -91

app.py CHANGED Viewed

@@ -61,6 +61,31 @@ FINETUNED_MODEL_ID = "its-zion-18/projfinetuned"
 KNOWN_WORDS = set()
 # --- CORE NLP & HELPER FUNCTIONS ---
 def _norm_skill_token(s: str) -> str:
     s = s.lower().strip()
     s = re.sub(r'[\(\)\[\]\{\}\*]', '', s)
@@ -196,41 +221,44 @@ def initialize_data_and_model():
         ds = datasets.load_dataset("its-zion-18/Jobs-tabular-dataset")
         original_df = ds["original"].to_pandas()
-        # --- NEW: Advanced LLM Skill Extractor ---
-        # This new function uses a much more detailed prompt to get niche, specific skills.
-        def extract_skills_llm_advanced(job_title: str, duties: str, qualifications: str) -> list[str]:
-            if not LLM_PIPELINE: return []
-            # We combine the most important fields to give the LLM full context.
-            full_context = f"Job Title: {job_title}\n\nDuties: {duties}\n\nQualifications: {qualifications}"
-            # This prompt is highly specific to encourage better, more niche results.
             prompt = f"""
-        Instruct: You are a highly specialized technical recruiter and hiring manager. Your task is to meticulously extract a comprehensive list of the most critical and specific skills from the provided job description, paying special attention to the 'qualifications' and 'duties' sections.
-        Identify specific programming languages, software tools (e.g., AutoCAD, Figma, SAP), cloud technologies (e.g., AWS S3, Azure DevOps), data analysis tools (e.g., Tableau, Power BI), engineering concepts, and industry standards (e.g., ISO 13485, GMP).
-        Avoid overly generic soft skills like 'teamwork' or 'communication' unless they are explicitly emphasized as a core requirement. Prioritize tangible, niche competencies that truly define the role.
-        Return a single, comma-separated string of the extracted skills. Do not add any preamble or explanation.
-        [Job Description Context]
-        {full_context}
-        [Extracted Skills]
-        """
             try:
                 response = LLM_PIPELINE(prompt, max_new_tokens=150, do_sample=False, temperature=0.1)
                 generated_text = response[0]['generated_text']
-                # Robustly find the skills part after the final indicator
-                skills_part = generated_text.split("[Extracted Skills]")[-1].strip()
                 skills = [skill.strip() for skill in skills_part.split(',') if skill.strip()]
-                # Return a de-duplicated list, preserving order as much as possible
                 return list(dict.fromkeys(s.lower() for s in skills))
-            except Exception as e:
-                print(f"LLM skill extraction failed: {e}")
-                return []
         def extract_skills_direct_scan(text: str) -> list[str]:
             if not isinstance(text, str): return []
             found_skills = set()
@@ -239,58 +267,32 @@ def initialize_data_and_model():
                     found_skills.add(skill)
             return list(found_skills)
-        def expand_skills_with_llm(job_title: str, existing_skills: list) -> list:
-            if not LLM_PIPELINE or not job_title: return []
-            skills_to_add = 6 - len(existing_skills)
-            prompt = f"""
-        Instruct: A job has the title "{job_title}" and requires the skills: {', '.join(existing_skills)}.
-        Based on this, what are {skills_to_add} additional, closely related skills typically required for such a role?
-        List only the new skills, separated by commas. Do not repeat skills from the original list.
-        Additional Skills:
-        """
-            try:
-                response = LLM_PIPELINE(prompt, max_new_tokens=50, do_sample=True, temperature=0.5)
-                generated_text = response[0]['generated_text']
-                skills_part = generated_text.split("Additional Skills:")[-1].strip()
-                new_skills = [skill.strip().lower() for skill in skills_part.split(',') if skill.strip()]
-                return new_skills
-            except Exception:
-                return []
-        # --- MODIFIED: Hybrid Skill Extraction Logic ---
-        # This function is now simpler and more powerful. It prioritizes the advanced LLM extractor.
         def extract_skills_hybrid(row) -> list[str]:
-            # Extract the relevant text fields from the row
-            job_title = str(row.get('Job title', ''))
-            duties = str(row.get('Duties', ''))
-            qualifications = str(row.get('qualifications', ''))
-            description = str(row.get('Description', ''))
-            # The full text is used for the direct scan as a fallback
-            full_text_for_scan = " ".join([job_title, duties, qualifications, description])
-            # 🎯 Primary Method: Use the advanced LLM extractor for high-quality, niche skills
-            advanced_llm_skills = extract_skills_llm_advanced(job_title, duties, qualifications)
-            # 🛡️ Secondary Method: Use a direct scan as a fast and reliable backup for common skills
-            direct_skills = extract_skills_direct_scan(full_text_for_scan)
-            # Combine the results, giving priority to the LLM's findings
-            combined_skills = set(advanced_llm_skills) | set(direct_skills)
-            # If the combined list is still too short, use the LLM to expand it
             if len(combined_skills) < 6:
-                expanded_skills = expand_skills_with_llm(job_title, list(combined_skills))
                 combined_skills.update(expanded_skills)
             return sorted(list(combined_skills))
         print("--- Extracting skills with HYBRID ACCURACY model. Please wait... ---")
         # Apply the hybrid function row-wise to include job title context
         original_df['Skills'] = original_df.progress_apply(extract_skills_hybrid, axis=1)
         print(f"--- Saving processed data to {PROCESSED_DATA_PATH} for faster future startups ---")
         original_df.to_parquet(PROCESSED_DATA_PATH)
@@ -327,14 +329,12 @@ def get_job_matches(dream_job: str, top_n: int, skills_text: str):
     emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
-    # --- NEW: Initialize variables for the recommendations section ---
     recommendations_table = pd.DataFrame()
     recommendations_visible = False
     if user_skills:
         scored_df = score_jobs_by_skills(user_skills, emb_matches)
-        # --- NEW: Logic to get top 5 jobs based purely on skill match score ---
         skill_sorted_df = scored_df.sort_values(by='Skill Match Score', ascending=False).head(5)
         if not skill_sorted_df.empty:
             recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
@@ -343,7 +343,6 @@ def get_job_matches(dream_job: str, top_n: int, skills_text: str):
             recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
             recommendations_table = recs
             recommendations_visible = True
-        # --- END NEW ---
         display_df = scored_df.head(top_n)
         status = f"Found and **re-ranked** results by your {len(user_skills)} skills. Displaying top {len(display_df)}."
@@ -364,7 +363,6 @@ def get_job_matches(dream_job: str, top_n: int, skills_text: str):
     dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
     dropdown_value = dropdown_options[0][1] if dropdown_options else None
-    # --- MODIFIED: Added new outputs for recommendations ---
     return status, emb_matches, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), gr.Accordion(visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
 def rerank_current_results(initial_matches_df, skills_text, top_n):
@@ -373,7 +371,6 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
     initial_matches_df = pd.DataFrame(initial_matches_df)
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
-    # --- NEW: Initialize variables for the recommendations section ---
     recommendations_table = pd.DataFrame()
     recommendations_visible = False
@@ -388,7 +385,6 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
         status = f"Results **re-ranked** based on your {len(user_skills)} skills."
         display_df = ranked_df.head(top_n)
-        # --- NEW: Logic to get top 5 jobs based purely on skill match score ---
         skill_sorted_df = ranked_df.sort_values(by='Skill Match Score', ascending=False).head(5)
         if not skill_sorted_df.empty:
             recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
@@ -397,7 +393,6 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
             recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
             recommendations_table = recs
             recommendations_visible = True
-        # --- END NEW ---
         table_to_show = display_df[['job_title', 'company', 'Final Score', 'Skill Match Score']]
         table_to_show = table_to_show.rename(columns={'Final Score': 'Overall Score'})
@@ -407,18 +402,15 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
     dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
     dropdown_value = dropdown_options[0][1] if dropdown_options else None
-    # --- MODIFIED: Added new outputs for recommendations ---
     return status, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
 def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: str):
     if not dream_job:
-        # --- MODIFIED: Added new default outputs ---
         return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(""), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)
     unrecognized_words = check_spelling_in_query(dream_job)
     if unrecognized_words:
         word_list_html = ", ".join([f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized_words])
         alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
-        # --- MODIFIED: Added new default outputs ---
         return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert_message, visible=True), gr.Row(visible=True), pd.DataFrame(), gr.Accordion(visible=False)
     status, emb_matches, table_to_show, dropdown, details_accordion, recommendations_table, recommendations_accordion = get_job_matches(dream_job, top_n, skills_text)
@@ -456,12 +448,11 @@ def on_select_job(job_id, skills_text):
         matched_job_skills_mask = torch.any(similarity_matrix > 0.58, dim=0)
         all_missing_skills = [skill for i, skill in enumerate(job_skills) if not matched_job_skills_mask[i]]
-    if user_skills and score_val >= 0.98:
-        learning_plan_html = "<h4 style='color:green;'>🎉 You have all the required skills!</h4>"
-        job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
-        return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
-    if user_skills:
         job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
         headline = "<b>Great fit!</b>" if score_val >= 0.8 else "<b>Good progress!</b>" if score_val >= 0.5 else "<b>Solid starting point.</b>"
         learning_plan_html = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
@@ -470,11 +461,25 @@ def on_select_job(job_id, skills_text):
         learning_plan_html += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
         return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
     else:
         headline = "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
-        skills_to_display = sorted(job_skills)[:5]
         items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
         learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
-        full_skill_list_for_state = sorted(job_skills)
         new_offset = len(skills_to_display)
         should_button_be_visible = len(full_skill_list_for_state) > 5
         return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), full_skill_list_for_state, new_offset, gr.Button(visible=should_button_be_visible)
@@ -489,7 +494,6 @@ def load_more_skills(full_skills_list, current_offset):
     return learning_plan_html, new_offset, gr.Button(visible=should_button_be_visible)
 def on_reset():
-    # --- MODIFIED: Added new default outputs for reset ---
     return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "Status: Ready.", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False), [], 0, gr.Button(visible=False), pd.DataFrame(), gr.Accordion(visible=False))
 print("Starting application initialization...")
@@ -520,7 +524,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
     df_output = gr.DataFrame(label="Job Matches (Sorted by Overall Relevance)", interactive=False)
-    # --- NEW: Added the recommendations section ---
     with gr.Accordion("✨ Based on your current skills and career interest consider these jobs...", open=True, visible=False) as recommendations_accordion:
         recommendations_df_output = gr.DataFrame(label="Top Skill Matches", interactive=False)
@@ -534,7 +537,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
         learning_plan_output = gr.HTML(label="Learning Plan")
         load_more_btn = gr.Button("Load More Skills", visible=False)
-    # --- MODIFIED: Added new outputs to the click events ---
     search_btn.click(fn=find_matches_and_rank_with_check, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
     search_anyway_btn.click(fn=find_matches_and_rank_anyway, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
     retype_btn.click(lambda: ("Status: Ready for you to retype.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(visible=False), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)), outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])

 KNOWN_WORDS = set()
 # --- CORE NLP & HELPER FUNCTIONS ---
+def expand_skills_with_llm(job_title: str, existing_skills: list, num_skills_to_add: int) -> list:
+    """
+    Uses the LLM to suggest additional skills based on a job title and existing skills.
+    """
+    if not LLM_PIPELINE or not job_title or num_skills_to_add <= 0:
+        return []
+    existing_skills_str = ', '.join(existing_skills)
+    prompt = f"""
+Instruct: A job has the title "{job_title}" and already lists these skills: {existing_skills_str}.
+Based on this, what are {num_skills_to_add} additional, closely related skills typically required for such a role?
+List only the new skills, separated by commas. Do not repeat skills from the original list. Do not include any preamble.
+Additional Skills:
+"""
+    try:
+        response = LLM_PIPELINE(prompt, max_new_tokens=50, do_sample=True, temperature=0.5)
+        generated_text = response[0]['generated_text']
+        skills_part = generated_text.split("Additional Skills:")[-1].strip()
+        new_skills = [skill.strip().lower() for skill in skills_part.split(',') if skill.strip()]
+        return list(dict.fromkeys(new_skills)) # Ensure unique skills are returned
+    except Exception as e:
+        print(f"🚨 ERROR expanding skills with LLM: {e}")
+        return []
 def _norm_skill_token(s: str) -> str:
     s = s.lower().strip()
     s = re.sub(r'[\(\)\[\]\{\}\*]', '', s)
         ds = datasets.load_dataset("its-zion-18/Jobs-tabular-dataset")
         original_df = ds["original"].to_pandas()
+        def extract_skills_llm(text: str) -> list[str]:
+            if not isinstance(text, str) or len(text.strip()) < 20 or not LLM_PIPELINE: return []
             prompt = f"""
+Instruct: You are an expert technical recruiter. Extract the key skills from the job description text. List technical and soft skills as a comma-separated string.
+[Example 1]
+Text: "Requires 3+ years of experience in cloud infrastructure. Must be proficient in AWS, particularly EC2 and S3. Experience with Terraform for IaC is a plus."
+Extracted Skills: cloud infrastructure, aws, ec2, s3, terraform, infrastructure as code
+[Example 2]
+Text: "Seeking a team lead with strong project management abilities. Must communicate effectively with stakeholders and manage timelines using Agile methodologies like Scrum."
+Extracted Skills: project management, leadership, stakeholder communication, agile, scrum
+[Actual Task]
+Text: "{text}"
+Extracted Skills:
+"""
             try:
                 response = LLM_PIPELINE(prompt, max_new_tokens=150, do_sample=False, temperature=0.1)
                 generated_text = response[0]['generated_text']
+                skills_part = generated_text.split("Extracted Skills:")[-1].strip()
                 skills = [skill.strip() for skill in skills_part.split(',') if skill.strip()]
                 return list(dict.fromkeys(s.lower() for s in skills))
+            except Exception: return []
+        def extract_skills_nltk(text: str) -> list[str]:
+            if not isinstance(text, str): return []
+            text_lower = text.lower()
+            grammar = "NP: {<JJ.*>*<NN.*>+}"
+            chunk_parser = nltk.RegexpParser(grammar)
+            tokens = nltk.word_tokenize(text_lower)
+            tagged_tokens = nltk.pos_tag(tokens)
+            chunked_text = chunk_parser.parse(tagged_tokens)
+            potential_skills = set()
+            for subtree in chunked_text.subtrees():
+                if subtree.label() == 'NP':
+                    phrase = " ".join(word for word, tag in subtree.leaves())
+                    if _norm_skill_token(phrase) in SKILL_WHITELIST:
+                        potential_skills.add(_norm_skill_token(phrase))
+            return sorted(list(potential_skills))
         def extract_skills_direct_scan(text: str) -> list[str]:
             if not isinstance(text, str): return []
             found_skills = set()
                     found_skills.add(skill)
             return list(found_skills)
         def extract_skills_hybrid(row) -> list[str]:
+            text = row['text_for_skills']
+            job_title = row.get('Job title', '') # Use original Job title for context
+            llm_skills = extract_skills_llm(text)
+            nltk_skills = extract_skills_nltk(text)
+            direct_skills = extract_skills_direct_scan(text)
+            combined_skills = set(llm_skills) | set(nltk_skills) | set(direct_skills)
+            # If the combined list is still too short, expand it
             if len(combined_skills) < 6:
+                # MODIFIED: Call the global expand_skills_with_llm function
+                expanded_skills = expand_skills_with_llm(job_title, list(combined_skills), num_skills_to_add=6-len(combined_skills))
                 combined_skills.update(expanded_skills)
             return sorted(list(combined_skills))
+        def create_text_for_skills(row):
+            return " ".join([str(s) for s in [row.get("Job title"), row.get("Duties"), row.get("qualifications"), row.get("Description")] if pd.notna(s)])
+        original_df["text_for_skills"] = original_df.apply(create_text_for_skills, axis=1)
         print("--- Extracting skills with HYBRID ACCURACY model. Please wait... ---")
         # Apply the hybrid function row-wise to include job title context
         original_df['Skills'] = original_df.progress_apply(extract_skills_hybrid, axis=1)
+        original_df = original_df.drop(columns=['text_for_skills'])
         print(f"--- Saving processed data to {PROCESSED_DATA_PATH} for faster future startups ---")
         original_df.to_parquet(PROCESSED_DATA_PATH)
     emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
     recommendations_table = pd.DataFrame()
     recommendations_visible = False
     if user_skills:
         scored_df = score_jobs_by_skills(user_skills, emb_matches)
         skill_sorted_df = scored_df.sort_values(by='Skill Match Score', ascending=False).head(5)
         if not skill_sorted_df.empty:
             recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
             recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
             recommendations_table = recs
             recommendations_visible = True
         display_df = scored_df.head(top_n)
         status = f"Found and **re-ranked** results by your {len(user_skills)} skills. Displaying top {len(display_df)}."
     dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
     dropdown_value = dropdown_options[0][1] if dropdown_options else None
     return status, emb_matches, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), gr.Accordion(visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
 def rerank_current_results(initial_matches_df, skills_text, top_n):
     initial_matches_df = pd.DataFrame(initial_matches_df)
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
     recommendations_table = pd.DataFrame()
     recommendations_visible = False
         status = f"Results **re-ranked** based on your {len(user_skills)} skills."
         display_df = ranked_df.head(top_n)
         skill_sorted_df = ranked_df.sort_values(by='Skill Match Score', ascending=False).head(5)
         if not skill_sorted_df.empty:
             recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
             recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
             recommendations_table = recs
             recommendations_visible = True
         table_to_show = display_df[['job_title', 'company', 'Final Score', 'Skill Match Score']]
         table_to_show = table_to_show.rename(columns={'Final Score': 'Overall Score'})
     dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
     dropdown_value = dropdown_options[0][1] if dropdown_options else None
     return status, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
 def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: str):
     if not dream_job:
         return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(""), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)
     unrecognized_words = check_spelling_in_query(dream_job)
     if unrecognized_words:
         word_list_html = ", ".join([f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized_words])
         alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
         return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert_message, visible=True), gr.Row(visible=True), pd.DataFrame(), gr.Accordion(visible=False)
     status, emb_matches, table_to_show, dropdown, details_accordion, recommendations_table, recommendations_accordion = get_job_matches(dream_job, top_n, skills_text)
         matched_job_skills_mask = torch.any(similarity_matrix > 0.58, dim=0)
         all_missing_skills = [skill for i, skill in enumerate(job_skills) if not matched_job_skills_mask[i]]
+        if score_val >= 0.98:
+            learning_plan_html = "<h4 style='color:green;'>🎉 You have all the required skills!</h4>"
+            job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
+            return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
         job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
         headline = "<b>Great fit!</b>" if score_val >= 0.8 else "<b>Good progress!</b>" if score_val >= 0.5 else "<b>Solid starting point.</b>"
         learning_plan_html = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
         learning_plan_html += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
         return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
     else:
+        # --- MODIFIED LOGIC TO ENSURE AT LEAST 5 SKILLS ---
+        current_job_skills = list(job_skills)
+        job_title = str(row.get("job_title", ""))
+        if len(current_job_skills) < 5 and job_title and LLM_PIPELINE:
+            additional_skills_needed = 5 - len(current_job_skills)
+            newly_expanded_skills = expand_skills_with_llm(job_title, current_job_skills, num_skills_to_add=additional_skills_needed)
+            for skill in newly_expanded_skills:
+                if skill not in current_job_skills:
+                    current_job_skills.append(skill)
+        # --- END MODIFICATION ---
         headline = "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
+        skills_to_display = sorted(current_job_skills)[:5]
         items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
         learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
+        full_skill_list_for_state = sorted(current_job_skills)
         new_offset = len(skills_to_display)
         should_button_be_visible = len(full_skill_list_for_state) > 5
         return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), full_skill_list_for_state, new_offset, gr.Button(visible=should_button_be_visible)
     return learning_plan_html, new_offset, gr.Button(visible=should_button_be_visible)
 def on_reset():
     return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "Status: Ready.", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False), [], 0, gr.Button(visible=False), pd.DataFrame(), gr.Accordion(visible=False))
 print("Starting application initialization...")
     df_output = gr.DataFrame(label="Job Matches (Sorted by Overall Relevance)", interactive=False)
     with gr.Accordion("✨ Based on your current skills and career interest consider these jobs...", open=True, visible=False) as recommendations_accordion:
         recommendations_df_output = gr.DataFrame(label="Top Skill Matches", interactive=False)
         learning_plan_output = gr.HTML(label="Learning Plan")
         load_more_btn = gr.Button("Load More Skills", visible=False)
     search_btn.click(fn=find_matches_and_rank_with_check, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
     search_anyway_btn.click(fn=find_matches_and_rank_anyway, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
     retype_btn.click(lambda: ("Status: Ready for you to retype.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(visible=False), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)), outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])