NotRev commited on
Commit
6f4dbd2
·
verified ·
1 Parent(s): 682a2d1

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +4 -8
src/streamlit_app.py CHANGED
@@ -18,14 +18,14 @@ def load_models():
18
  ner_pipe = pipeline(
19
  "token-classification",
20
  model="jjzha/jobbert-base-cased-v2",
21
- aggregation_strategy="simple" # Merges sub-word tokens
22
  )
23
 
24
  # 2. Zero-Shot Classification Model (Categorizes the terms)
25
  st.info("Loading Zero-Shot Classification Model...")
26
  classifier_pipe = pipeline(
27
  "zero-shot-classification",
28
- model="valhalla/distilbart-mnli-12-1" # Smaller, faster classification model
29
  )
30
  return ner_pipe, classifier_pipe
31
  except Exception as e:
@@ -42,11 +42,10 @@ def process_text(text, ner_pipe, classifier_pipe):
42
  # Step 1: Extract Entities (Candidates)
43
  ner_results = ner_pipe(text)
44
 
45
- # Filter and clean extracted words, removing very short, possibly meaningless terms
46
  candidates = set()
47
  for entity in ner_results:
48
  word = entity['word'].strip()
49
- if len(word.split()) > 1 or len(word) > 2: # Keep multi-word phrases or single words longer than 2 chars
50
  candidates.add(word)
51
 
52
  candidates = list(candidates)
@@ -57,21 +56,18 @@ def process_text(text, ner_pipe, classifier_pipe):
57
  skills = []
58
  knowledge = []
59
 
60
- # These are the labels the Zero-Shot model will use for classification
61
  classification_labels = ["software tool or technology", "concept or knowledge"]
62
 
63
  for candidate in candidates:
64
  try:
65
- # Classify the term
66
  result = classifier_pipe(candidate, candidate_labels=classification_labels)
67
  top_label = result['labels'][0]
68
 
69
- # Append to the correct list
70
  if top_label == "software tool or technology":
71
  skills.append(candidate)
72
  else:
73
  knowledge.append(candidate)
74
- except Exception as e:
75
  # Fallback for classification errors
76
  knowledge.append(candidate)
77
 
 
18
  ner_pipe = pipeline(
19
  "token-classification",
20
  model="jjzha/jobbert-base-cased-v2",
21
+ aggregation_strategy="simple"
22
  )
23
 
24
  # 2. Zero-Shot Classification Model (Categorizes the terms)
25
  st.info("Loading Zero-Shot Classification Model...")
26
  classifier_pipe = pipeline(
27
  "zero-shot-classification",
28
+ model="valhalla/distilbart-mnli-12-1"
29
  )
30
  return ner_pipe, classifier_pipe
31
  except Exception as e:
 
42
  # Step 1: Extract Entities (Candidates)
43
  ner_results = ner_pipe(text)
44
 
 
45
  candidates = set()
46
  for entity in ner_results:
47
  word = entity['word'].strip()
48
+ if len(word.split()) > 1 or len(word) > 2:
49
  candidates.add(word)
50
 
51
  candidates = list(candidates)
 
56
  skills = []
57
  knowledge = []
58
 
 
59
  classification_labels = ["software tool or technology", "concept or knowledge"]
60
 
61
  for candidate in candidates:
62
  try:
 
63
  result = classifier_pipe(candidate, candidate_labels=classification_labels)
64
  top_label = result['labels'][0]
65
 
 
66
  if top_label == "software tool or technology":
67
  skills.append(candidate)
68
  else:
69
  knowledge.append(candidate)
70
+ except Exception:
71
  # Fallback for classification errors
72
  knowledge.append(candidate)
73