Update app.py
Browse files
app.py
CHANGED
|
@@ -221,7 +221,6 @@ Extracted Skills:
|
|
| 221 |
chunk_parser = nltk.RegexpParser(grammar)
|
| 222 |
tokens = nltk.word_tokenize(text_lower)
|
| 223 |
tagged_tokens = nltk.pos_tag(tokens)
|
| 224 |
-
chunked_text = chunk_parser.parse(tagged_tokens)
|
| 225 |
potential_skills = set()
|
| 226 |
for subtree in chunked_text.subtrees():
|
| 227 |
if subtree.label() == 'NP':
|
|
@@ -230,12 +229,10 @@ Extracted Skills:
|
|
| 230 |
potential_skills.add(_norm_skill_token(phrase))
|
| 231 |
return sorted(list(potential_skills))
|
| 232 |
|
| 233 |
-
# NEW: Third extraction method for maximum coverage
|
| 234 |
def extract_skills_direct_scan(text: str) -> list[str]:
|
| 235 |
if not isinstance(text, str): return []
|
| 236 |
found_skills = set()
|
| 237 |
for skill in SKILL_WHITELIST:
|
| 238 |
-
# Use word boundaries to avoid matching substrings like 'art' in 'startup'
|
| 239 |
if re.search(r'\b' + re.escape(skill) + r'\b', text, re.IGNORECASE):
|
| 240 |
found_skills.add(skill)
|
| 241 |
return list(found_skills)
|
|
@@ -244,7 +241,6 @@ Extracted Skills:
|
|
| 244 |
llm_skills = extract_skills_llm(text)
|
| 245 |
nltk_skills = extract_skills_nltk(text)
|
| 246 |
direct_skills = extract_skills_direct_scan(text)
|
| 247 |
-
# Combine all sources and return a unique, sorted list
|
| 248 |
combined_skills = set(llm_skills) | set(nltk_skills) | set(direct_skills)
|
| 249 |
return sorted(list(combined_skills))
|
| 250 |
|
|
@@ -431,4 +427,14 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
|
|
| 431 |
with gr.TabItem("Duties"): duties_markdown = gr.Markdown()
|
| 432 |
with gr.TabItem("Qualifications"): qualifications_markdown = gr.Markdown()
|
| 433 |
with gr.TabItem("Full Description"): description_markdown = gr.Markdown()
|
| 434 |
-
learning_plan_output = gr.HTML(label="Learning
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
chunk_parser = nltk.RegexpParser(grammar)
|
| 222 |
tokens = nltk.word_tokenize(text_lower)
|
| 223 |
tagged_tokens = nltk.pos_tag(tokens)
|
|
|
|
| 224 |
potential_skills = set()
|
| 225 |
for subtree in chunked_text.subtrees():
|
| 226 |
if subtree.label() == 'NP':
|
|
|
|
| 229 |
potential_skills.add(_norm_skill_token(phrase))
|
| 230 |
return sorted(list(potential_skills))
|
| 231 |
|
|
|
|
| 232 |
def extract_skills_direct_scan(text: str) -> list[str]:
|
| 233 |
if not isinstance(text, str): return []
|
| 234 |
found_skills = set()
|
| 235 |
for skill in SKILL_WHITELIST:
|
|
|
|
| 236 |
if re.search(r'\b' + re.escape(skill) + r'\b', text, re.IGNORECASE):
|
| 237 |
found_skills.add(skill)
|
| 238 |
return list(found_skills)
|
|
|
|
| 241 |
llm_skills = extract_skills_llm(text)
|
| 242 |
nltk_skills = extract_skills_nltk(text)
|
| 243 |
direct_skills = extract_skills_direct_scan(text)
|
|
|
|
| 244 |
combined_skills = set(llm_skills) | set(nltk_skills) | set(direct_skills)
|
| 245 |
return sorted(list(combined_skills))
|
| 246 |
|
|
|
|
| 427 |
with gr.TabItem("Duties"): duties_markdown = gr.Markdown()
|
| 428 |
with gr.TabItem("Qualifications"): qualifications_markdown = gr.Markdown()
|
| 429 |
with gr.TabItem("Full Description"): description_markdown = gr.Markdown()
|
| 430 |
+
learning_plan_output = gr.HTML(label="Learning Plan")
|
| 431 |
+
load_more_btn = gr.Button("Load More Skills", visible=False)
|
| 432 |
+
search_btn.click(fn=find_matches_and_rank_with_check, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row])
|
| 433 |
+
search_anyway_btn.click(fn=find_matches_and_rank_anyway, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row])
|
| 434 |
+
retype_btn.click(lambda: ("Status: Ready for you to retype.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(visible=False), gr.Row(visible=False)), outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row])
|
| 435 |
+
reset_btn.click(fn=on_reset, outputs=[dream_text, topk_slider, skills_text, df_output, initial_matches_state, job_selector, details_accordion, status_text, job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, spelling_alert, spelling_row, missing_skills_state, skills_offset_state, load_more_btn], queue=False)
|
| 436 |
+
rerank_btn.click(fn=rerank_current_results, inputs=[initial_matches_state, skills_text, topk_slider], outputs=[status_text, df_output, job_selector])
|
| 437 |
+
job_selector.change(fn=on_select_job, inputs=[job_selector, skills_text], outputs=[job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, learning_plan_output, details_accordion, missing_skills_state, skills_offset_state, load_more_btn])
|
| 438 |
+
load_more_btn.click(fn=load_more_skills, inputs=[missing_skills_state, skills_offset_state], outputs=[learning_plan_output, skills_offset_state, load_more_btn])
|
| 439 |
+
|
| 440 |
+
ui.launch()
|