Spaces:

zlf18
/

testjob

Sleeping

File size: 23,142 Bytes

bef519c
6baecdf
 
 
 
 
 
 
 
 
 
f33a827
 
ae49263
dd9737e
6baecdf
ae49263
f33a827
 
ae49263
 
 
f33a827
 
6baecdf
 
 
 
ae49263
 
6baecdf
 
bbf6af8
6baecdf
ae49263
6baecdf
ae49263
f33a827
 
 
 
ae49263
 
f33a827
 
 
 
 
ae49263
 
f33a827
6baecdf
 
 
 
 
 
 
 
ae49263
 
 
6baecdf
 
 
 
 
ae49263
6baecdf
 
 
 
ae49263
6baecdf
 
 
 
 
ae49263
 
6baecdf
 
 
 
 
 
 
ae49263
6baecdf
ae49263
6baecdf
 
ae49263
6baecdf
 
 
0522165
 
 
dd9737e
0522165
 
 
 
 
 
dd9737e
0522165
 
 
 
 
dd9737e
0522165
 
 
 
 
dd9737e
0522165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f33a827
dd9737e
 
 
 
 
 
 
 
0522165
f33a827
6baecdf
bbf6af8
6baecdf
bbf6af8
ae49263
 
 
 
 
 
0522165
dd9737e
ae49263
6baecdf
bbf6af8
6baecdf
 
bbf6af8
0522165
 
bbf6af8
6baecdf
 
 
bbf6af8
6baecdf
 
bbf6af8
6baecdf
 
 
 
f33a827
6baecdf
 
bbf6af8
6baecdf
 
 
bbf6af8
6baecdf
 
 
dd9737e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0522165
 
 
 
dd9737e
 
 
 
 
 
 
 
 
 
6baecdf
 
 
 
 
 
 
 
f33a827
6baecdf
ae49263
6baecdf
ae49263
 
 
 
6baecdf
 
 
ae49263
6baecdf
 
 
 
 
ae49263
6baecdf
 
 
 
ae49263
 
 
6baecdf
ae49263
6baecdf
ae49263
 
 
79dbc6d
 
ae49263
 
 
 
 
 
 
6baecdf
 
f33a827
 
0522165
6baecdf
ae49263
6baecdf
0522165
 
 
 
f33a827
0522165
ae49263
f33a827
ae49263
 
 
 
f33a827
 
ae49263
 
 
 
 
 
 
 
f33a827
 
ae49263
 
 
 
 
 
 
 
 
 
 
 
 
 
6baecdf
 
f33a827
6baecdf
 
 
 
 
 
f33a827
6baecdf
 
ae49263
6baecdf
 
 
 
 
f33a827
 
 
6baecdf
 
 
 
 
 
 
 
ae49263
6baecdf
 
 
 
 
 
 
ae49263
 
 
6baecdf
f33a827
6baecdf
 
ae49263
 
 
 
79dbc6d
ae49263

# File: app.py
import pandas as pd
import datasets
from sentence_transformers import SentenceTransformer, util
import torch
import re
import nltk
from nltk.corpus import words, stopwords
import urllib.parse as _url
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import gradio as gr
import spacy
from spacy.matcher import Matcher
import json
from collections import Counter

# --- Download NLTK data ---
for package in ['words', 'stopwords', 'averaged_perceptron_tagger', 'punkt']:
    try:
        if package in ['words', 'stopwords']: nltk.data.find(f'corpora/{package}')
        elif package == 'averaged_perceptron_tagger': nltk.data.find(f'taggers/{package}')
        else: nltk.data.find(f'tokenizers/{package}')
    except LookupError:
        nltk.download(package)

STOPWORDS = set(stopwords.words('english'))

# --- GLOBAL STATE & DATA ---
original_df, combined_df, model = None, None, None
combined_job_embeddings, original_job_title_embeddings = None, None
LLM_PIPELINE = None
LLM_MODEL_NAME = "microsoft/phi-2"
FINETUNED_MODEL_ID = "its-zion-18/projfinetuned"
KNOWN_WORDS = set()
AI_VALIDATED_SKILLS = set()

# --- Initialize spaCy ---
print("--- Initializing spaCy ---")
try:
    nlp = spacy.load("en_core_web_sm")
    skill_patterns = [
        [{"POS": "PROPN"}], [{"POS": "NOUN"}, {"POS": "NOUN"}],
        [{"POS": "ADJ"}, {"POS": "NOUN"}], [{"POS": "PROPN"}, {"POS": "NOUN"}]
    ]
    matcher = Matcher(nlp.vocab)
    matcher.add("SKILL", skill_patterns)
    print("--- spaCy Initialized Successfully ---")
except Exception as e:
    print(f"🚨 ERROR initializing spaCy: {e}")
    nlp, matcher = None, None

# --- CORE NLP & HELPER FUNCTIONS ---
def _norm_skill_token(s: str) -> str:
    s = s.lower().strip()
    s = re.sub(r'[\(\)\[\]\{\}\*]', '', s)
    s = re.sub(r'^\W+|\W+$', '', s)
    s = re.sub(r'\s+', ' ', s)
    return s

def _skill_match(token1: str, token2: str) -> bool:
    t1, t2 = _norm_skill_token(token1), _norm_skill_token(token2)
    return t1 == t2 or t1 in t2 or t2 in t1

def build_known_vocabulary(df: pd.DataFrame):
    global KNOWN_WORDS
    english_words = set(w.lower() for w in words.words())
    job_words = set(re.findall(r'\b\w+\b', " ".join(df['full_text'].astype(str).tolist()).lower()))
    KNOWN_WORDS = english_words | {w for w in job_words if w.isalpha() and len(w) > 2}
    return "Known vocabulary built."

def check_spelling_in_query(query: str) -> list[str]:
    words_in_query = query.lower().split()
    return list({w for w in words_in_query if w.isalpha() and len(w) > 1 and w not in KNOWN_WORDS})

def initialize_llm_client():
    global LLM_PIPELINE
    try:
        tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME, trust_remote_code=True)
        model_llm = AutoModelForCausalLM.from_pretrained(LLM_MODEL_NAME, torch_dtype="auto", device_map="auto", trust_remote_code=True)
        LLM_PIPELINE = pipeline("text-generation", model=model_llm, tokenizer=tokenizer, max_new_tokens=100, do_sample=True, temperature=0.7)
        return True
    except Exception as e:
        print(f"🚨 ERROR initializing local LLM: {e}")
        return False

def llm_expand_query(user_input: str) -> str:
    if not LLM_PIPELINE: return user_input
    prompt = f"User's career interest: '{user_input}'\nInstruction: Based on the user's interest, write a concise, single-sentence summary (40-60 words) that elaborates on the core intent, typical skills, and responsibilities. Do not include a preamble, the user input, or any list formatting in the output. Just the expanded sentence.\nExpanded Intent:"
    try:
        response = LLM_PIPELINE(prompt, max_new_tokens=100, do_sample=True, temperature=0.6)
        expanded_query = response[0]['generated_text'].strip().split("Expanded Intent:")[-1].strip()
        final_query = user_input + ". " + expanded_query.replace('\n', ' ').replace(':', '').strip()
        return final_query.replace('..', '.').strip()
    except Exception:
        return user_input

def extract_fallback_keywords(text: str, user_skills: list[str], top_n=7) -> list[str]:
    """Smarter fallback that prioritizes keywords semantically similar to the user's input."""
    if not isinstance(text, str) or not nlp: return []
    
    junk_words = STOPWORDS.union({
        'experience', 'ability', 'knowledge', 'skill', 'skills', 'degree', 'education', 'work', 'year', 'years', 'job', 'role', 'team',
        'company', 'duties', 'responsibilities', 'requirements', 'qualifications', 'description', 'position', 'opportunity', 'candidate',
        'application', 'applications', 'university', 'college', 'school', 'department', 'program', 'field', 'service', 'level'
    })

    doc = nlp(text.lower())
    candidates = set()
    for ent in doc.ents:
        if ent.label_ in ['GPE', 'ORG', 'DATE', 'PERSON', 'MONEY', 'CARDINAL', 'TIME']:
            junk_words.add(ent.text)

    for chunk in doc.noun_chunks:
        chunk_text = chunk.text.strip()
        if len(chunk_text) > 3 and not any(junk in chunk_text.split() for junk in junk_words) and not chunk_text.isnumeric():
            candidates.add(chunk_text)

    if not candidates: return []
    
    candidates = list(candidates)

    if user_skills and model:
        user_skills_embedding = model.encode(user_skills, convert_to_tensor=True)
        candidate_embeddings = model.encode(candidates, convert_to_tensor=True)
        
        cos_scores = util.cos_sim(candidate_embeddings, user_skills_embedding)
        top_scores, _ = torch.max(cos_scores, dim=1)
        
        scored_candidates = sorted(zip(candidates, top_scores.tolist()), key=lambda x: x[1], reverse=True)
        
        return [candidate for candidate, score in scored_candidates if score > 0.2][:top_n]

    return sorted(candidates)[:top_n]

def get_skills_from_text(row: pd.Series, user_skills: list[str]) -> list[str]:
    """Primary skill extraction: uses AI-validated list first, then a smart fallback."""
    full_text = " ".join([str(row.get(col, '')) for col in ['qualifications', 'Duties', 'Description']])
    if not full_text.strip(): return []

    if nlp and matcher:
        doc = nlp(full_text.lower())
        matches = matcher(doc)
        skills = {doc[start:end].text.strip() for _, start, end in matches}
        validated_skills = sorted([s for s in skills if s in AI_VALIDATED_SKILLS])
        if validated_skills:
            return validated_skills

    return extract_fallback_keywords(full_text, user_skills)

def initialize_data_and_model():
    global original_df, combined_df, model, combined_job_embeddings, original_job_title_embeddings, AI_VALIDATED_SKILLS
    if not initialize_llm_client(): print("Warning: LLM Client failed to initialize.")
    
    print("--- Loading pre-computed skills from validated_skills.json ---")
    try:
        with open("validated_skills.json", "r") as f:
            AI_VALIDATED_SKILLS = set(json.load(f))
        print(f"--- Loaded {len(AI_VALIDATED_SKILLS)} AI-validated skills ---")
    except FileNotFoundError:
        print("🚨 WARNING: validated_skills.json not found. Skill extraction will rely on fallback method.")
        AI_VALIDATED_SKILLS = set()

    print("--- Loading Datasets ---")
    ds = datasets.load_dataset("its-zion-18/Jobs-tabular-dataset")
    original_df = ds["original"].to_pandas()
    augmented_df = ds["augmented"].to_pandas()
    
    print("--- Mapping skills to each job description (initial pass) ---")
    original_df['Skills'] = original_df.apply(lambda row: get_skills_from_text(row, user_skills=[]), axis=1)
    
    original_df['job_id'] = original_df.index
    max_id = len(original_df) - 1
    augmented_df['job_id'] = augmented_df.index.map(lambda i: min(i // 20, max_id))
    
    def create_full_text(row):
        return " ".join([str(s) for s in [row.get("Job title"), row.get("Company"), row.get("Duties"), row.get("qualifications"), row.get("Description")]])
    
    original_df["full_text"] = original_df.apply(create_full_text, axis=1)
    augmented_df["full_text"] = augmented_df.apply(create_full_text, axis=1)
    combined_df = pd.concat([original_df.copy(), augmented_df.copy()], ignore_index=True)
    original_df = original_df.rename(columns={'Job title': 'job_title', 'Company': 'company'})

    print("--- Loading Fine-Tuned Sentence Transformer Model ---")
    model = SentenceTransformer(FINETUNED_MODEL_ID)

    print("--- Encoding Embeddings ---")
    combined_job_embeddings = model.encode(combined_df["full_text"].tolist(), convert_to_tensor=True, show_progress_bar=True)
    original_job_title_embeddings = model.encode(original_df["job_title"].tolist(), convert_to_tensor=True, show_progress_bar=True)
    
    build_known_vocabulary(combined_df)
    return "--- Initialization Complete ---"

def find_job_matches(original_user_query: str, expanded_user_query: str, top_k: int = 50) -> pd.DataFrame:
    expanded_user_embedding = model.encode(expanded_user_query, convert_to_tensor=True)
    general_similarity_scores = util.cos_sim(expanded_user_embedding, combined_job_embeddings)[0]
    top_indices = torch.topk(general_similarity_scores, k=len(combined_df))
    sorted_combined_df = combined_df.iloc[top_indices.indices.cpu()].copy()
    sorted_combined_df['general_score'] = top_indices.values.cpu().numpy()
    unique_matches = sorted_combined_df.drop_duplicates(subset=['job_id'], keep='first').set_index('job_id')
    original_user_embedding = model.encode(original_user_query, convert_to_tensor=True)
    title_boost_scores = util.cos_sim(original_user_embedding, original_job_title_embeddings)[0].cpu().numpy()
    title_boost_map = pd.Series(title_boost_scores, index=original_df['job_id'])
    unique_matches['title_boost_score'] = unique_matches.index.map(title_boost_map).fillna(0)
    unique_matches['Similarity Score'] = (0.70 * unique_matches['general_score'] + 0.30 * unique_matches['title_boost_score'])
    final_job_ids = unique_matches.sort_values(by='Similarity Score', ascending=False).head(top_k).index.tolist()
    final_results_df = original_df[original_df['job_id'].isin(final_job_ids)].copy()
    scores_df = unique_matches.reset_index()[['job_id', 'Similarity Score']].copy()
    final_results_df = pd.merge(final_results_df, scores_df, on='job_id', how='left')
    return final_results_df.sort_values(by='Similarity Score', ascending=False).reset_index(drop=True).set_index('job_id', drop=False).rename(columns={'job_id': 'Job ID'})

def score_jobs_by_skills(user_tokens: list[str], df_to_rank: pd.DataFrame) -> pd.DataFrame:
    if df_to_rank is None or df_to_rank.empty: return pd.DataFrame()
    ranked_df = df_to_rank.copy()
    
    # Re-extract skills for the ranked DF using the user's context for better fallback results
    ranked_df['Skills'] = ranked_df.apply(lambda row: get_skills_from_text(row, user_skills=user_tokens), axis=1)

    if 'Skills' not in ranked_df.columns: return ranked_df.sort_values(by='Similarity Score', ascending=False)
    def calculate_match(row, user_tokens):
        job_skills = row.get('Skills', [])
        if not isinstance(job_skills, list): return [], 0, 0.0
        matched_skills = [s for s in job_skills if any(_skill_match(ut, s) for ut in user_tokens)]
        return matched_skills, len(matched_skills), len(matched_skills) / len(job_skills) if job_skills else 0.0
    results = ranked_df.apply(lambda row: calculate_match(row, user_tokens), axis=1, result_type='expand')
    ranked_df[['Skill Matches', 'Skill Match Count', 'Skill Match Score']] = results
    return ranked_df.sort_values(by=['Skill Match Score', 'Similarity Score'], ascending=[False, False]).reset_index(drop=True).set_index('Job ID', drop=False).rename_axis(None)

def _course_links_for(skill: str) -> str:
    q = _url.quote(skill)
    links = [("Coursera", f"https://www.coursera.org/search?query={q}"), ("edX", f"https://www.edx.org/search?q={q}"), ("Udemy", f"https://www.udemy.com/courses/search/?q={q}"), ("YouTube", f"https://www.youtube.com/results?search_query={q}+tutorial")]
    return " • ".join([f'<a href="{u}" target="_blank" style="color: #007bff;">{name}</a>' for name, u in links])

# --- GRADIO INTERFACE FUNCTIONS ---
def get_job_matches(dream_job: str, top_n: int, skills_text: str):
    expanded_desc = llm_expand_query(dream_job)
    emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
    user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
    display_df = score_jobs_by_skills(user_skills, emb_matches) if user_skills else emb_matches
    display_df = display_df.head(top_n)
    status = f"Found and **re-ranked** results by your {len(user_skills)} skills." if user_skills else f"Found {len(display_df)} top matches."
    table_to_show = display_df[['job_title', 'company', 'Similarity Score', 'Skill Match Score']] if 'Skill Match Score' in display_df.columns else display_df[['job_title', 'company', 'Similarity Score']]
    options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
    return status, emb_matches, table_to_show, gr.Dropdown(choices=options, value=options[0][1] if options else None, visible=True), gr.Accordion(visible=True)

def rerank_current_results(initial_matches_df, skills_text, top_n):
    if initial_matches_df is None or pd.DataFrame(initial_matches_df).empty:
        return "Please find matches first.", pd.DataFrame(), gr.Dropdown(visible=False)
    initial_matches_df = pd.DataFrame(initial_matches_df)
    user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
    if not user_skills:
        display_df = initial_matches_df.head(top_n)
        table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
        status = "Skills cleared. Showing original results."
    else:
        ranked_df = score_jobs_by_skills(user_skills, initial_matches_df)
        display_df = ranked_df.head(top_n)
        table_to_show = display_df[['job_title', 'company', 'Similarity Score', 'Skill Match Score']]
        status = f"Results **re-ranked** based on {len(user_skills)} skills."
    options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
    return status, table_to_show, gr.Dropdown(choices=options, value=options[0][1] if options else None, visible=True)

def find_matches_and_rank_with_check(dream_job, top_n, skills_text):
    if not dream_job:
        return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(), gr.Row(visible=False)
    unrecognized = check_spelling_in_query(dream_job)
    if unrecognized:
        word_list_html = ", ".join(f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized)
        alert = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
        return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert, visible=True), gr.Row(visible=True)
    status, matches, table, dropdown, accordion = get_job_matches(dream_job, top_n, skills_text)
    return status, matches, table, dropdown, accordion, gr.Markdown(visible=False), gr.Row(visible=False)

def find_matches_and_rank_anyway(dream_job, top_n, skills_text):
    status, matches, table, dropdown, accordion = get_job_matches(dream_job, top_n, skills_text)
    return status, matches, table, dropdown, accordion, gr.Markdown(visible=False), gr.Row(visible=False)

def on_select_job(job_id, skills_text):
    if job_id is None:
        return "", "", "", "", "", gr.Accordion(visible=False), [], 0, gr.Button(visible=False)
    
    row = original_df.loc[job_id]
    details = f"### {row.get('job_title', '')} — {row.get('company', '')}"
    user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
    
    # Re-run skill extraction with user context to ensure the learning plan is relevant
    job_skills = get_skills_from_text(row, user_skills)
    
    if not job_skills:
        plan = "<p><i>No specific skills were extracted for this job.</i></p>"
        return details, row.get('Duties', ''), row.get('qualifications', ''), row.get('Description', ''), plan, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
    
    missing = sorted([s for s in job_skills if not any(_skill_match(ut, s) for ut in user_skills)], key=str.lower)
    if not missing:
        plan = "<h4 style='color:green;'>🎉 You have all the required skills!</h4>"
        return details, row.get('Duties', ''), row.get('qualifications', ''), row.get('Description', ''), plan, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)

    if user_skills:
        score = (len(job_skills) - len(missing)) / len(job_skills) if job_skills else 0
        details += f"\n**Your skill match:** {score:.1%}"
        headline = "<b>Great fit!</b>" if score >= 0.8 else "<b>Good progress!</b>" if score >= 0.5 else "<b>Solid starting point.</b>"
        plan = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
        skills_to_show = missing[:5]
        items = [f"<li><b>{s}</b><br>• Learn: {_course_links_for(s)}</li>" for s in skills_to_show]
        plan += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items)}</ul>"
        return details, row.get('Duties', ''), row.get('qualifications', ''), row.get('Description', ''), plan, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
    else:
        headline = "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
        skills_to_show = missing[:5]
        items = [f"<li><b>{s}</b><br>• Learn: {_course_links_for(s)}</li>" for s in skills_to_show]
        plan = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items)}</ul>"
        offset = len(skills_to_show)
        show_btn = len(missing) > 5
        return details, row.get('Duties', ''), row.get('qualifications', ''), row.get('Description', ''), plan, gr.Accordion(visible=True), missing, offset, gr.Button(visible=show_btn)

def load_more_skills(full_list, offset):
    new_offset = offset + 5
    skills_to_show = full_list[:new_offset]
    items = [f"<li><b>{s}</b><br>• Learn: {_course_links_for(s)}</li>" for s in skills_to_show]
    plan = f"<h4>To be a good fit for this role, you'll need to learn these skills:</h4><ul style='list-style-type: none; padding-left: 0;'>{''.join(items)}</ul>"
    show_btn = new_offset < len(full_list)
    return plan, new_offset, gr.Button(visible=show_btn)

def on_reset():
    return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "Status: Ready.", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False), [], 0, gr.Button(visible=False))

# --- Run Initialization ---
print("Starting application initialization...")
initialization_status = initialize_data_and_model()
print(initialization_status)

# --- Gradio Interface Definition ---
with gr.Blocks(theme=gr.themes.Soft()) as ui:
    gr.Markdown("# Hybrid Career Planner & Skill Gap Analyzer")
    initial_matches_state, missing_skills_state, skills_offset_state = gr.State(), gr.State([]), gr.State(0)

    with gr.Row():
        with gr.Column(scale=3):
            dream_text = gr.Textbox(label='Your Dream Job Description', lines=3, placeholder="e.g., 'A role in a tech startup focused on machine learning...'")
            with gr.Accordion("Optional: Add Your Skills to Re-rank Results", open=False):
                with gr.Row():
                    skills_text = gr.Textbox(label='Your Skills (comma-separated)', placeholder="e.g., Python, data analysis", scale=3)
                    rerank_btn = gr.Button("Re-rank", variant="secondary", scale=1)
        with gr.Column(scale=1):
            topk_slider = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Number of Matches")
            search_btn = gr.Button("Find Matches", variant="primary")
            reset_btn = gr.Button("Reset All")
            
    status_text = gr.Markdown("Status: Ready.")
    spelling_alert = gr.Markdown(visible=False)
    with gr.Row(visible=False) as spelling_row:
        search_anyway_btn, retype_btn = gr.Button("Search Anyway", variant="secondary"), gr.Button("Let Me Fix It", variant="stop")
        
    df_output = gr.DataFrame(label="Job Matches", interactive=False)
    job_selector = gr.Dropdown(label="Select a job to see more details & learning plan:", visible=False)
    
    with gr.Accordion("Job Details & Learning Plan", open=False, visible=False) as details_accordion:
        job_details_markdown = gr.Markdown()
        with gr.Tabs():
            with gr.TabItem("Duties"): duties_markdown = gr.Markdown()
            with gr.TabItem("Qualifications"): qualifications_markdown = gr.Markdown()
            with gr.TabItem("Full Description"): description_markdown = gr.Markdown()
        learning_plan_output = gr.HTML(label="Learning Plan")
        load_more_btn = gr.Button("Load More Skills", visible=False)

    # --- Event Handlers ---
    search_btn.click(fn=find_matches_and_rank_with_check, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row])
    search_anyway_btn.click(fn=find_matches_and_rank_anyway, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row])
    retype_btn.click(lambda: ("Status: Ready for you to retype.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(visible=False), gr.Row(visible=False)), outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row])
    reset_btn.click(fn=on_reset, outputs=[dream_text, topk_slider, skills_text, df_output, initial_matches_state, job_selector, details_accordion, status_text, job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, spelling_alert, spelling_row, missing_skills_state, skills_offset_state, load_more_btn], queue=False)
    rerank_btn.click(fn=rerank_current_results, inputs=[initial_matches_state, skills_text, topk_slider], outputs=[status_text, df_output, job_selector])
    job_selector.change(fn=on_select_job, inputs=[job_selector, skills_text], outputs=[job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, learning_plan_output, details_accordion, missing_skills_state, skills_offset_state, load_more_btn])
    load_more_btn.click(fn=load_more_skills, inputs=[missing_skills_state, skills_offset_state], outputs=[learning_plan_output, skills_offset_state, load_more_btn])

# Only launch the UI if the initialization was successful
if __name__ == '__main__':
    if initialization_status == "--- Initialization Complete ---":
        ui.launch()
    else:
        print("Gradio UI will not launch due to initialization failure.")