File size: 23,142 Bytes
bef519c
6baecdf
 
 
 
 
 
 
 
 
 
f33a827
 
ae49263
dd9737e
6baecdf
ae49263
f33a827
 
ae49263
 
 
f33a827
 
6baecdf
 
 
 
ae49263
 
6baecdf
 
bbf6af8
6baecdf
ae49263
6baecdf
ae49263
f33a827
 
 
 
ae49263
 
f33a827
 
 
 
 
ae49263
 
f33a827
6baecdf
 
 
 
 
 
 
 
ae49263
 
 
6baecdf
 
 
 
 
ae49263
6baecdf
 
 
 
ae49263
6baecdf
 
 
 
 
ae49263
 
6baecdf
 
 
 
 
 
 
ae49263
6baecdf
ae49263
6baecdf
 
ae49263
6baecdf
 
 
0522165
 
 
dd9737e
0522165
 
 
 
 
 
dd9737e
0522165
 
 
 
 
dd9737e
0522165
 
 
 
 
dd9737e
0522165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f33a827
dd9737e
 
 
 
 
 
 
 
0522165
f33a827
6baecdf
bbf6af8
6baecdf
bbf6af8
ae49263
 
 
 
 
 
0522165
dd9737e
ae49263
6baecdf
bbf6af8
6baecdf
 
bbf6af8
0522165
 
bbf6af8
6baecdf
 
 
bbf6af8
6baecdf
 
bbf6af8
6baecdf
 
 
 
f33a827
6baecdf
 
bbf6af8
6baecdf
 
 
bbf6af8
6baecdf
 
 
dd9737e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0522165
 
 
 
dd9737e
 
 
 
 
 
 
 
 
 
6baecdf
 
 
 
 
 
 
 
f33a827
6baecdf
ae49263
6baecdf
ae49263
 
 
 
6baecdf
 
 
ae49263
6baecdf
 
 
 
 
ae49263
6baecdf
 
 
 
ae49263
 
 
6baecdf
ae49263
6baecdf
ae49263
 
 
79dbc6d
 
ae49263
 
 
 
 
 
 
6baecdf
 
f33a827
 
0522165
6baecdf
ae49263
6baecdf
0522165
 
 
 
f33a827
0522165
ae49263
f33a827
ae49263
 
 
 
f33a827
 
ae49263
 
 
 
 
 
 
 
f33a827
 
ae49263
 
 
 
 
 
 
 
 
 
 
 
 
 
6baecdf
 
f33a827
6baecdf
 
 
 
 
 
f33a827
6baecdf
 
ae49263
6baecdf
 
 
 
 
f33a827
 
 
6baecdf
 
 
 
 
 
 
 
ae49263
6baecdf
 
 
 
 
 
 
ae49263
 
 
6baecdf
f33a827
6baecdf
 
ae49263
 
 
 
79dbc6d
ae49263
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
# File: app.py
import pandas as pd
import datasets
from sentence_transformers import SentenceTransformer, util
import torch
import re
import nltk
from nltk.corpus import words, stopwords
import urllib.parse as _url
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import gradio as gr
import spacy
from spacy.matcher import Matcher
import json
from collections import Counter

# --- Download NLTK data ---
for package in ['words', 'stopwords', 'averaged_perceptron_tagger', 'punkt']:
    try:
        if package in ['words', 'stopwords']: nltk.data.find(f'corpora/{package}')
        elif package == 'averaged_perceptron_tagger': nltk.data.find(f'taggers/{package}')
        else: nltk.data.find(f'tokenizers/{package}')
    except LookupError:
        nltk.download(package)

STOPWORDS = set(stopwords.words('english'))

# --- GLOBAL STATE & DATA ---
original_df, combined_df, model = None, None, None
combined_job_embeddings, original_job_title_embeddings = None, None
LLM_PIPELINE = None
LLM_MODEL_NAME = "microsoft/phi-2"
FINETUNED_MODEL_ID = "its-zion-18/projfinetuned"
KNOWN_WORDS = set()
AI_VALIDATED_SKILLS = set()

# --- Initialize spaCy ---
print("--- Initializing spaCy ---")
try:
    nlp = spacy.load("en_core_web_sm")
    skill_patterns = [
        [{"POS": "PROPN"}], [{"POS": "NOUN"}, {"POS": "NOUN"}],
        [{"POS": "ADJ"}, {"POS": "NOUN"}], [{"POS": "PROPN"}, {"POS": "NOUN"}]
    ]
    matcher = Matcher(nlp.vocab)
    matcher.add("SKILL", skill_patterns)
    print("--- spaCy Initialized Successfully ---")
except Exception as e:
    print(f"🚨 ERROR initializing spaCy: {e}")
    nlp, matcher = None, None

# --- CORE NLP & HELPER FUNCTIONS ---
def _norm_skill_token(s: str) -> str:
    s = s.lower().strip()
    s = re.sub(r'[\(\)\[\]\{\}\*]', '', s)
    s = re.sub(r'^\W+|\W+$', '', s)
    s = re.sub(r'\s+', ' ', s)
    return s

def _skill_match(token1: str, token2: str) -> bool:
    t1, t2 = _norm_skill_token(token1), _norm_skill_token(token2)
    return t1 == t2 or t1 in t2 or t2 in t1

def build_known_vocabulary(df: pd.DataFrame):
    global KNOWN_WORDS
    english_words = set(w.lower() for w in words.words())
    job_words = set(re.findall(r'\b\w+\b', " ".join(df['full_text'].astype(str).tolist()).lower()))
    KNOWN_WORDS = english_words | {w for w in job_words if w.isalpha() and len(w) > 2}
    return "Known vocabulary built."

def check_spelling_in_query(query: str) -> list[str]:
    words_in_query = query.lower().split()
    return list({w for w in words_in_query if w.isalpha() and len(w) > 1 and w not in KNOWN_WORDS})

def initialize_llm_client():
    global LLM_PIPELINE
    try:
        tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME, trust_remote_code=True)
        model_llm = AutoModelForCausalLM.from_pretrained(LLM_MODEL_NAME, torch_dtype="auto", device_map="auto", trust_remote_code=True)
        LLM_PIPELINE = pipeline("text-generation", model=model_llm, tokenizer=tokenizer, max_new_tokens=100, do_sample=True, temperature=0.7)
        return True
    except Exception as e:
        print(f"🚨 ERROR initializing local LLM: {e}")
        return False

def llm_expand_query(user_input: str) -> str:
    if not LLM_PIPELINE: return user_input
    prompt = f"User's career interest: '{user_input}'\nInstruction: Based on the user's interest, write a concise, single-sentence summary (40-60 words) that elaborates on the core intent, typical skills, and responsibilities. Do not include a preamble, the user input, or any list formatting in the output. Just the expanded sentence.\nExpanded Intent:"
    try:
        response = LLM_PIPELINE(prompt, max_new_tokens=100, do_sample=True, temperature=0.6)
        expanded_query = response[0]['generated_text'].strip().split("Expanded Intent:")[-1].strip()
        final_query = user_input + ". " + expanded_query.replace('\n', ' ').replace(':', '').strip()
        return final_query.replace('..', '.').strip()
    except Exception:
        return user_input

def extract_fallback_keywords(text: str, user_skills: list[str], top_n=7) -> list[str]:
    """Smarter fallback that prioritizes keywords semantically similar to the user's input."""
    if not isinstance(text, str) or not nlp: return []
    
    junk_words = STOPWORDS.union({
        'experience', 'ability', 'knowledge', 'skill', 'skills', 'degree', 'education', 'work', 'year', 'years', 'job', 'role', 'team',
        'company', 'duties', 'responsibilities', 'requirements', 'qualifications', 'description', 'position', 'opportunity', 'candidate',
        'application', 'applications', 'university', 'college', 'school', 'department', 'program', 'field', 'service', 'level'
    })

    doc = nlp(text.lower())
    candidates = set()
    for ent in doc.ents:
        if ent.label_ in ['GPE', 'ORG', 'DATE', 'PERSON', 'MONEY', 'CARDINAL', 'TIME']:
            junk_words.add(ent.text)

    for chunk in doc.noun_chunks:
        chunk_text = chunk.text.strip()
        if len(chunk_text) > 3 and not any(junk in chunk_text.split() for junk in junk_words) and not chunk_text.isnumeric():
            candidates.add(chunk_text)

    if not candidates: return []
    
    candidates = list(candidates)

    if user_skills and model:
        user_skills_embedding = model.encode(user_skills, convert_to_tensor=True)
        candidate_embeddings = model.encode(candidates, convert_to_tensor=True)
        
        cos_scores = util.cos_sim(candidate_embeddings, user_skills_embedding)
        top_scores, _ = torch.max(cos_scores, dim=1)
        
        scored_candidates = sorted(zip(candidates, top_scores.tolist()), key=lambda x: x[1], reverse=True)
        
        return [candidate for candidate, score in scored_candidates if score > 0.2][:top_n]

    return sorted(candidates)[:top_n]

def get_skills_from_text(row: pd.Series, user_skills: list[str]) -> list[str]:
    """Primary skill extraction: uses AI-validated list first, then a smart fallback."""
    full_text = " ".join([str(row.get(col, '')) for col in ['qualifications', 'Duties', 'Description']])
    if not full_text.strip(): return []

    if nlp and matcher:
        doc = nlp(full_text.lower())
        matches = matcher(doc)
        skills = {doc[start:end].text.strip() for _, start, end in matches}
        validated_skills = sorted([s for s in skills if s in AI_VALIDATED_SKILLS])
        if validated_skills:
            return validated_skills

    return extract_fallback_keywords(full_text, user_skills)

def initialize_data_and_model():
    global original_df, combined_df, model, combined_job_embeddings, original_job_title_embeddings, AI_VALIDATED_SKILLS
    if not initialize_llm_client(): print("Warning: LLM Client failed to initialize.")
    
    print("--- Loading pre-computed skills from validated_skills.json ---")
    try:
        with open("validated_skills.json", "r") as f:
            AI_VALIDATED_SKILLS = set(json.load(f))
        print(f"--- Loaded {len(AI_VALIDATED_SKILLS)} AI-validated skills ---")
    except FileNotFoundError:
        print("🚨 WARNING: validated_skills.json not found. Skill extraction will rely on fallback method.")
        AI_VALIDATED_SKILLS = set()

    print("--- Loading Datasets ---")
    ds = datasets.load_dataset("its-zion-18/Jobs-tabular-dataset")
    original_df = ds["original"].to_pandas()
    augmented_df = ds["augmented"].to_pandas()
    
    print("--- Mapping skills to each job description (initial pass) ---")
    original_df['Skills'] = original_df.apply(lambda row: get_skills_from_text(row, user_skills=[]), axis=1)
    
    original_df['job_id'] = original_df.index
    max_id = len(original_df) - 1
    augmented_df['job_id'] = augmented_df.index.map(lambda i: min(i // 20, max_id))
    
    def create_full_text(row):
        return " ".join([str(s) for s in [row.get("Job title"), row.get("Company"), row.get("Duties"), row.get("qualifications"), row.get("Description")]])
    
    original_df["full_text"] = original_df.apply(create_full_text, axis=1)
    augmented_df["full_text"] = augmented_df.apply(create_full_text, axis=1)
    combined_df = pd.concat([original_df.copy(), augmented_df.copy()], ignore_index=True)
    original_df = original_df.rename(columns={'Job title': 'job_title', 'Company': 'company'})

    print("--- Loading Fine-Tuned Sentence Transformer Model ---")
    model = SentenceTransformer(FINETUNED_MODEL_ID)

    print("--- Encoding Embeddings ---")
    combined_job_embeddings = model.encode(combined_df["full_text"].tolist(), convert_to_tensor=True, show_progress_bar=True)
    original_job_title_embeddings = model.encode(original_df["job_title"].tolist(), convert_to_tensor=True, show_progress_bar=True)
    
    build_known_vocabulary(combined_df)
    return "--- Initialization Complete ---"

def find_job_matches(original_user_query: str, expanded_user_query: str, top_k: int = 50) -> pd.DataFrame:
    expanded_user_embedding = model.encode(expanded_user_query, convert_to_tensor=True)
    general_similarity_scores = util.cos_sim(expanded_user_embedding, combined_job_embeddings)[0]
    top_indices = torch.topk(general_similarity_scores, k=len(combined_df))
    sorted_combined_df = combined_df.iloc[top_indices.indices.cpu()].copy()
    sorted_combined_df['general_score'] = top_indices.values.cpu().numpy()
    unique_matches = sorted_combined_df.drop_duplicates(subset=['job_id'], keep='first').set_index('job_id')
    original_user_embedding = model.encode(original_user_query, convert_to_tensor=True)
    title_boost_scores = util.cos_sim(original_user_embedding, original_job_title_embeddings)[0].cpu().numpy()
    title_boost_map = pd.Series(title_boost_scores, index=original_df['job_id'])
    unique_matches['title_boost_score'] = unique_matches.index.map(title_boost_map).fillna(0)
    unique_matches['Similarity Score'] = (0.70 * unique_matches['general_score'] + 0.30 * unique_matches['title_boost_score'])
    final_job_ids = unique_matches.sort_values(by='Similarity Score', ascending=False).head(top_k).index.tolist()
    final_results_df = original_df[original_df['job_id'].isin(final_job_ids)].copy()
    scores_df = unique_matches.reset_index()[['job_id', 'Similarity Score']].copy()
    final_results_df = pd.merge(final_results_df, scores_df, on='job_id', how='left')
    return final_results_df.sort_values(by='Similarity Score', ascending=False).reset_index(drop=True).set_index('job_id', drop=False).rename(columns={'job_id': 'Job ID'})

def score_jobs_by_skills(user_tokens: list[str], df_to_rank: pd.DataFrame) -> pd.DataFrame:
    if df_to_rank is None or df_to_rank.empty: return pd.DataFrame()
    ranked_df = df_to_rank.copy()
    
    # Re-extract skills for the ranked DF using the user's context for better fallback results
    ranked_df['Skills'] = ranked_df.apply(lambda row: get_skills_from_text(row, user_skills=user_tokens), axis=1)

    if 'Skills' not in ranked_df.columns: return ranked_df.sort_values(by='Similarity Score', ascending=False)
    def calculate_match(row, user_tokens):
        job_skills = row.get('Skills', [])
        if not isinstance(job_skills, list): return [], 0, 0.0
        matched_skills = [s for s in job_skills if any(_skill_match(ut, s) for ut in user_tokens)]
        return matched_skills, len(matched_skills), len(matched_skills) / len(job_skills) if job_skills else 0.0
    results = ranked_df.apply(lambda row: calculate_match(row, user_tokens), axis=1, result_type='expand')
    ranked_df[['Skill Matches', 'Skill Match Count', 'Skill Match Score']] = results
    return ranked_df.sort_values(by=['Skill Match Score', 'Similarity Score'], ascending=[False, False]).reset_index(drop=True).set_index('Job ID', drop=False).rename_axis(None)

def _course_links_for(skill: str) -> str:
    q = _url.quote(skill)
    links = [("Coursera", f"https://www.coursera.org/search?query={q}"), ("edX", f"https://www.edx.org/search?q={q}"), ("Udemy", f"https://www.udemy.com/courses/search/?q={q}"), ("YouTube", f"https://www.youtube.com/results?search_query={q}+tutorial")]
    return " • ".join([f'<a href="{u}" target="_blank" style="color: #007bff;">{name}</a>' for name, u in links])

# --- GRADIO INTERFACE FUNCTIONS ---
def get_job_matches(dream_job: str, top_n: int, skills_text: str):
    expanded_desc = llm_expand_query(dream_job)
    emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
    user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
    display_df = score_jobs_by_skills(user_skills, emb_matches) if user_skills else emb_matches
    display_df = display_df.head(top_n)
    status = f"Found and **re-ranked** results by your {len(user_skills)} skills." if user_skills else f"Found {len(display_df)} top matches."
    table_to_show = display_df[['job_title', 'company', 'Similarity Score', 'Skill Match Score']] if 'Skill Match Score' in display_df.columns else display_df[['job_title', 'company', 'Similarity Score']]
    options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
    return status, emb_matches, table_to_show, gr.Dropdown(choices=options, value=options[0][1] if options else None, visible=True), gr.Accordion(visible=True)

def rerank_current_results(initial_matches_df, skills_text, top_n):
    if initial_matches_df is None or pd.DataFrame(initial_matches_df).empty:
        return "Please find matches first.", pd.DataFrame(), gr.Dropdown(visible=False)
    initial_matches_df = pd.DataFrame(initial_matches_df)
    user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
    if not user_skills:
        display_df = initial_matches_df.head(top_n)
        table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
        status = "Skills cleared. Showing original results."
    else:
        ranked_df = score_jobs_by_skills(user_skills, initial_matches_df)
        display_df = ranked_df.head(top_n)
        table_to_show = display_df[['job_title', 'company', 'Similarity Score', 'Skill Match Score']]
        status = f"Results **re-ranked** based on {len(user_skills)} skills."
    options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
    return status, table_to_show, gr.Dropdown(choices=options, value=options[0][1] if options else None, visible=True)

def find_matches_and_rank_with_check(dream_job, top_n, skills_text):
    if not dream_job:
        return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(), gr.Row(visible=False)
    unrecognized = check_spelling_in_query(dream_job)
    if unrecognized:
        word_list_html = ", ".join(f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized)
        alert = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
        return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert, visible=True), gr.Row(visible=True)
    status, matches, table, dropdown, accordion = get_job_matches(dream_job, top_n, skills_text)
    return status, matches, table, dropdown, accordion, gr.Markdown(visible=False), gr.Row(visible=False)

def find_matches_and_rank_anyway(dream_job, top_n, skills_text):
    status, matches, table, dropdown, accordion = get_job_matches(dream_job, top_n, skills_text)
    return status, matches, table, dropdown, accordion, gr.Markdown(visible=False), gr.Row(visible=False)

def on_select_job(job_id, skills_text):
    if job_id is None:
        return "", "", "", "", "", gr.Accordion(visible=False), [], 0, gr.Button(visible=False)
    
    row = original_df.loc[job_id]
    details = f"### {row.get('job_title', '')}{row.get('company', '')}"
    user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
    
    # Re-run skill extraction with user context to ensure the learning plan is relevant
    job_skills = get_skills_from_text(row, user_skills)
    
    if not job_skills:
        plan = "<p><i>No specific skills were extracted for this job.</i></p>"
        return details, row.get('Duties', ''), row.get('qualifications', ''), row.get('Description', ''), plan, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
    
    missing = sorted([s for s in job_skills if not any(_skill_match(ut, s) for ut in user_skills)], key=str.lower)
    if not missing:
        plan = "<h4 style='color:green;'>🎉 You have all the required skills!</h4>"
        return details, row.get('Duties', ''), row.get('qualifications', ''), row.get('Description', ''), plan, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)

    if user_skills:
        score = (len(job_skills) - len(missing)) / len(job_skills) if job_skills else 0
        details += f"\n**Your skill match:** {score:.1%}"
        headline = "<b>Great fit!</b>" if score >= 0.8 else "<b>Good progress!</b>" if score >= 0.5 else "<b>Solid starting point.</b>"
        plan = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
        skills_to_show = missing[:5]
        items = [f"<li><b>{s}</b><br>• Learn: {_course_links_for(s)}</li>" for s in skills_to_show]
        plan += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items)}</ul>"
        return details, row.get('Duties', ''), row.get('qualifications', ''), row.get('Description', ''), plan, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
    else:
        headline = "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
        skills_to_show = missing[:5]
        items = [f"<li><b>{s}</b><br>• Learn: {_course_links_for(s)}</li>" for s in skills_to_show]
        plan = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items)}</ul>"
        offset = len(skills_to_show)
        show_btn = len(missing) > 5
        return details, row.get('Duties', ''), row.get('qualifications', ''), row.get('Description', ''), plan, gr.Accordion(visible=True), missing, offset, gr.Button(visible=show_btn)

def load_more_skills(full_list, offset):
    new_offset = offset + 5
    skills_to_show = full_list[:new_offset]
    items = [f"<li><b>{s}</b><br>• Learn: {_course_links_for(s)}</li>" for s in skills_to_show]
    plan = f"<h4>To be a good fit for this role, you'll need to learn these skills:</h4><ul style='list-style-type: none; padding-left: 0;'>{''.join(items)}</ul>"
    show_btn = new_offset < len(full_list)
    return plan, new_offset, gr.Button(visible=show_btn)

def on_reset():
    return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "Status: Ready.", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False), [], 0, gr.Button(visible=False))

# --- Run Initialization ---
print("Starting application initialization...")
initialization_status = initialize_data_and_model()
print(initialization_status)

# --- Gradio Interface Definition ---
with gr.Blocks(theme=gr.themes.Soft()) as ui:
    gr.Markdown("# Hybrid Career Planner & Skill Gap Analyzer")
    initial_matches_state, missing_skills_state, skills_offset_state = gr.State(), gr.State([]), gr.State(0)

    with gr.Row():
        with gr.Column(scale=3):
            dream_text = gr.Textbox(label='Your Dream Job Description', lines=3, placeholder="e.g., 'A role in a tech startup focused on machine learning...'")
            with gr.Accordion("Optional: Add Your Skills to Re-rank Results", open=False):
                with gr.Row():
                    skills_text = gr.Textbox(label='Your Skills (comma-separated)', placeholder="e.g., Python, data analysis", scale=3)
                    rerank_btn = gr.Button("Re-rank", variant="secondary", scale=1)
        with gr.Column(scale=1):
            topk_slider = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Number of Matches")
            search_btn = gr.Button("Find Matches", variant="primary")
            reset_btn = gr.Button("Reset All")
            
    status_text = gr.Markdown("Status: Ready.")
    spelling_alert = gr.Markdown(visible=False)
    with gr.Row(visible=False) as spelling_row:
        search_anyway_btn, retype_btn = gr.Button("Search Anyway", variant="secondary"), gr.Button("Let Me Fix It", variant="stop")
        
    df_output = gr.DataFrame(label="Job Matches", interactive=False)
    job_selector = gr.Dropdown(label="Select a job to see more details & learning plan:", visible=False)
    
    with gr.Accordion("Job Details & Learning Plan", open=False, visible=False) as details_accordion:
        job_details_markdown = gr.Markdown()
        with gr.Tabs():
            with gr.TabItem("Duties"): duties_markdown = gr.Markdown()
            with gr.TabItem("Qualifications"): qualifications_markdown = gr.Markdown()
            with gr.TabItem("Full Description"): description_markdown = gr.Markdown()
        learning_plan_output = gr.HTML(label="Learning Plan")
        load_more_btn = gr.Button("Load More Skills", visible=False)

    # --- Event Handlers ---
    search_btn.click(fn=find_matches_and_rank_with_check, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row])
    search_anyway_btn.click(fn=find_matches_and_rank_anyway, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row])
    retype_btn.click(lambda: ("Status: Ready for you to retype.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(visible=False), gr.Row(visible=False)), outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row])
    reset_btn.click(fn=on_reset, outputs=[dream_text, topk_slider, skills_text, df_output, initial_matches_state, job_selector, details_accordion, status_text, job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, spelling_alert, spelling_row, missing_skills_state, skills_offset_state, load_more_btn], queue=False)
    rerank_btn.click(fn=rerank_current_results, inputs=[initial_matches_state, skills_text, topk_slider], outputs=[status_text, df_output, job_selector])
    job_selector.change(fn=on_select_job, inputs=[job_selector, skills_text], outputs=[job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, learning_plan_output, details_accordion, missing_skills_state, skills_offset_state, load_more_btn])
    load_more_btn.click(fn=load_more_skills, inputs=[missing_skills_state, skills_offset_state], outputs=[learning_plan_output, skills_offset_state, load_more_btn])

# Only launch the UI if the initialization was successful
if __name__ == '__main__':
    if initialization_status == "--- Initialization Complete ---":
        ui.launch()
    else:
        print("Gradio UI will not launch due to initialization failure.")