Spaces:

NotRev
/

ThesisLast

Sleeping

App Files Files Community

NotRev commited on Dec 11, 2025

Commit

6f4dbd2

verified ·

1 Parent(s): 682a2d1

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +4 -8

src/streamlit_app.py CHANGED Viewed

@@ -18,14 +18,14 @@ def load_models():
         ner_pipe = pipeline(
             "token-classification",
             model="jjzha/jobbert-base-cased-v2",
-            aggregation_strategy="simple" # Merges sub-word tokens
         )
         # 2. Zero-Shot Classification Model (Categorizes the terms)
         st.info("Loading Zero-Shot Classification Model...")
         classifier_pipe = pipeline(
             "zero-shot-classification",
-            model="valhalla/distilbart-mnli-12-1" # Smaller, faster classification model
         )
         return ner_pipe, classifier_pipe
     except Exception as e:
@@ -42,11 +42,10 @@ def process_text(text, ner_pipe, classifier_pipe):
     # Step 1: Extract Entities (Candidates)
     ner_results = ner_pipe(text)
-    # Filter and clean extracted words, removing very short, possibly meaningless terms
     candidates = set()
     for entity in ner_results:
         word = entity['word'].strip()
-        if len(word.split()) > 1 or len(word) > 2: # Keep multi-word phrases or single words longer than 2 chars
             candidates.add(word)
     candidates = list(candidates)
@@ -57,21 +56,18 @@ def process_text(text, ner_pipe, classifier_pipe):
     skills = []
     knowledge = []
-    # These are the labels the Zero-Shot model will use for classification
     classification_labels = ["software tool or technology", "concept or knowledge"]
     for candidate in candidates:
         try:
-            # Classify the term
             result = classifier_pipe(candidate, candidate_labels=classification_labels)
             top_label = result['labels'][0]
-            # Append to the correct list
             if top_label == "software tool or technology":
                 skills.append(candidate)
             else:
                 knowledge.append(candidate)
-        except Exception as e:
             # Fallback for classification errors
             knowledge.append(candidate)

         ner_pipe = pipeline(
             "token-classification",
             model="jjzha/jobbert-base-cased-v2",
+            aggregation_strategy="simple"
         )
         # 2. Zero-Shot Classification Model (Categorizes the terms)
         st.info("Loading Zero-Shot Classification Model...")
         classifier_pipe = pipeline(
             "zero-shot-classification",
+            model="valhalla/distilbart-mnli-12-1"
         )
         return ner_pipe, classifier_pipe
     except Exception as e:
     # Step 1: Extract Entities (Candidates)
     ner_results = ner_pipe(text)
     candidates = set()
     for entity in ner_results:
         word = entity['word'].strip()
+        if len(word.split()) > 1 or len(word) > 2:
             candidates.add(word)
     candidates = list(candidates)
     skills = []
     knowledge = []
     classification_labels = ["software tool or technology", "concept or knowledge"]
     for candidate in candidates:
         try:
             result = classifier_pipe(candidate, candidate_labels=classification_labels)
             top_label = result['labels'][0]
             if top_label == "software tool or technology":
                 skills.append(candidate)
             else:
                 knowledge.append(candidate)
+        except Exception:
             # Fallback for classification errors
             knowledge.append(candidate)