NotRev commited on
Commit
cf174b3
·
verified ·
1 Parent(s): dfff558

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +12 -8
src/streamlit_app.py CHANGED
@@ -2,8 +2,6 @@ import streamlit as st
2
  from transformers import pipeline
3
  import json
4
  import os
5
- # Note: You must ensure your requirements.txt still includes:
6
- # transformers, accelerate, streamlit, torch
7
 
8
  # --- Page Configuration ---
9
  st.set_page_config(page_title="Skill vs Knowledge Extractor", layout="wide")
@@ -13,14 +11,21 @@ def load_models():
13
  # Load NER (Finds the terms) and Zero-Shot Classifier (Categorizes them)
14
  try:
15
  st.info("Loading AI Models (Hugging Face local models)... This may take a moment.")
 
16
  # Model 1: Named Entity Recognition for finding candidate terms
17
- ner_pipe = pipeline("token-classification", model="jjzha/jobbert-base-cased-v2", aggregation_strategy="simple")
 
 
 
18
 
19
  # Model 2: Zero-Shot Classification for categorizing terms
20
- classifier_pipe = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-1")
 
21
 
22
  return ner_pipe, classifier_pipe
23
  except Exception as e:
 
 
24
  st.error(f"FATAL: Error loading models. Ensure 'transformers', 'accelerate', 'streamlit', and 'torch' are installed. Details: {e}")
25
  return None, None
26
 
@@ -43,9 +48,8 @@ def process_text(text, ner_pipe, classifier_pipe):
43
  # --- THESIS ENHANCEMENT: Heuristic Post-Processing Overrides ---
44
  # These lists are used to correct the known (and often variable) biases
45
  # of the zero-shot classifier for specific technical terms.
46
- # This is a justifiable heuristic in a research pipeline to improve final output quality.
47
- SKILL_OVERRIDES = ["RAG", "function calling", "LoRA", "CI/CD pipelines", "DeepEval", "RAGAS"]
48
- KNOWLEDGE_OVERRIDES = ["clean code practices", "English fluency", "async code"] # Examples of concepts often misclassified as skill
49
 
50
  skills, knowledge = [], []
51
  classification_labels = ["software tool or technology", "concept or knowledge"]
@@ -85,7 +89,7 @@ ner_pipe, classifier_pipe = load_models()
85
 
86
  if ner_pipe and classifier_pipe:
87
  st.markdown("""
88
- ***Methodology:*** *This application uses a two-stage NLP pipeline: 1) The `jjzha/jobbert-base-cased-v2` NER model to identify relevant terms, followed by 2) The `valhalla/distilbart-mnli-12-1` Zero-Shot Classifier to categorize them as 'SKILL' or 'KNOWLEDGE'.*
89
  """)
90
  job_description = st.text_area(
91
  "Job Description Text",
 
2
  from transformers import pipeline
3
  import json
4
  import os
 
 
5
 
6
  # --- Page Configuration ---
7
  st.set_page_config(page_title="Skill vs Knowledge Extractor", layout="wide")
 
11
  # Load NER (Finds the terms) and Zero-Shot Classifier (Categorizes them)
12
  try:
13
  st.info("Loading AI Models (Hugging Face local models)... This may take a moment.")
14
+
15
  # Model 1: Named Entity Recognition for finding candidate terms
16
+ # CORRECTED MODEL ID: "jjzha/jobbert-base-cased"
17
+ ner_pipe = pipeline("token-classification",
18
+ model="jjzha/jobbert-base-cased",
19
+ aggregation_strategy="simple")
20
 
21
  # Model 2: Zero-Shot Classification for categorizing terms
22
+ classifier_pipe = pipeline("zero-shot-classification",
23
+ model="valhalla/distilbart-mnli-12-1")
24
 
25
  return ner_pipe, classifier_pipe
26
  except Exception as e:
27
+ # Note: If the error persists, check your internet connection and ensure
28
+ # your device has enough memory to download these large models.
29
  st.error(f"FATAL: Error loading models. Ensure 'transformers', 'accelerate', 'streamlit', and 'torch' are installed. Details: {e}")
30
  return None, None
31
 
 
48
  # --- THESIS ENHANCEMENT: Heuristic Post-Processing Overrides ---
49
  # These lists are used to correct the known (and often variable) biases
50
  # of the zero-shot classifier for specific technical terms.
51
+ SKILL_OVERRIDES = ["RAG", "function calling", "LoRA", "CI/CD pipelines", "DeepEval", "RAGAS", "Azure", "AWS"]
52
+ KNOWLEDGE_OVERRIDES = ["clean code practices", "English fluency", "async code", "team leadership", "agile methodologies"]
 
53
 
54
  skills, knowledge = [], []
55
  classification_labels = ["software tool or technology", "concept or knowledge"]
 
89
 
90
  if ner_pipe and classifier_pipe:
91
  st.markdown("""
92
+ ***Methodology:*** *This application uses a two-stage NLP pipeline: 1) The `jjzha/jobbert-base-cased` NER model to identify relevant terms, followed by 2) The `valhalla/distilbart-mnli-12-1` Zero-Shot Classifier to categorize them as 'SKILL' or 'KNOWLEDGE'. A heuristic post-processing layer ensures high precision for key technical terms.*
93
  """)
94
  job_description = st.text_area(
95
  "Job Description Text",