Spaces:

NotRev
/

ThesisLast

Sleeping

App Files Files Community

NotRev commited on Dec 11, 2025

Commit

cf174b3

verified ·

1 Parent(s): dfff558

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +12 -8

src/streamlit_app.py CHANGED Viewed

@@ -2,8 +2,6 @@ import streamlit as st
 from transformers import pipeline
 import json
 import os
-# Note: You must ensure your requirements.txt still includes:
-# transformers, accelerate, streamlit, torch
 # --- Page Configuration ---
 st.set_page_config(page_title="Skill vs Knowledge Extractor", layout="wide")
@@ -13,14 +11,21 @@ def load_models():
     # Load NER (Finds the terms) and Zero-Shot Classifier (Categorizes them)
     try:
         st.info("Loading AI Models (Hugging Face local models)... This may take a moment.")
         # Model 1: Named Entity Recognition for finding candidate terms
-        ner_pipe = pipeline("token-classification", model="jjzha/jobbert-base-cased-v2", aggregation_strategy="simple")
         # Model 2: Zero-Shot Classification for categorizing terms
-        classifier_pipe = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-1")
         return ner_pipe, classifier_pipe
     except Exception as e:
         st.error(f"FATAL: Error loading models. Ensure 'transformers', 'accelerate', 'streamlit', and 'torch' are installed. Details: {e}")
         return None, None
@@ -43,9 +48,8 @@ def process_text(text, ner_pipe, classifier_pipe):
     # --- THESIS ENHANCEMENT: Heuristic Post-Processing Overrides ---
     # These lists are used to correct the known (and often variable) biases
     # of the zero-shot classifier for specific technical terms.
-    # This is a justifiable heuristic in a research pipeline to improve final output quality.
-    SKILL_OVERRIDES = ["RAG", "function calling", "LoRA", "CI/CD pipelines", "DeepEval", "RAGAS"]
-    KNOWLEDGE_OVERRIDES = ["clean code practices", "English fluency", "async code"] # Examples of concepts often misclassified as skill
     skills, knowledge = [], []
     classification_labels = ["software tool or technology", "concept or knowledge"]
@@ -85,7 +89,7 @@ ner_pipe, classifier_pipe = load_models()
 if ner_pipe and classifier_pipe:
     st.markdown("""
-    ***Methodology:*** *This application uses a two-stage NLP pipeline: 1) The `jjzha/jobbert-base-cased-v2` NER model to identify relevant terms, followed by 2) The `valhalla/distilbart-mnli-12-1` Zero-Shot Classifier to categorize them as 'SKILL' or 'KNOWLEDGE'.*
     """)
     job_description = st.text_area(
         "Job Description Text",

 from transformers import pipeline
 import json
 import os
 # --- Page Configuration ---
 st.set_page_config(page_title="Skill vs Knowledge Extractor", layout="wide")
     # Load NER (Finds the terms) and Zero-Shot Classifier (Categorizes them)
     try:
         st.info("Loading AI Models (Hugging Face local models)... This may take a moment.")
         # Model 1: Named Entity Recognition for finding candidate terms
+        # CORRECTED MODEL ID: "jjzha/jobbert-base-cased"
+        ner_pipe = pipeline("token-classification",
+                             model="jjzha/jobbert-base-cased",
+                             aggregation_strategy="simple")
         # Model 2: Zero-Shot Classification for categorizing terms
+        classifier_pipe = pipeline("zero-shot-classification",
+                                   model="valhalla/distilbart-mnli-12-1")
         return ner_pipe, classifier_pipe
     except Exception as e:
+        # Note: If the error persists, check your internet connection and ensure
+        # your device has enough memory to download these large models.
         st.error(f"FATAL: Error loading models. Ensure 'transformers', 'accelerate', 'streamlit', and 'torch' are installed. Details: {e}")
         return None, None
     # --- THESIS ENHANCEMENT: Heuristic Post-Processing Overrides ---
     # These lists are used to correct the known (and often variable) biases
     # of the zero-shot classifier for specific technical terms.
+    SKILL_OVERRIDES = ["RAG", "function calling", "LoRA", "CI/CD pipelines", "DeepEval", "RAGAS", "Azure", "AWS"]
+    KNOWLEDGE_OVERRIDES = ["clean code practices", "English fluency", "async code", "team leadership", "agile methodologies"]
     skills, knowledge = [], []
     classification_labels = ["software tool or technology", "concept or knowledge"]
 if ner_pipe and classifier_pipe:
     st.markdown("""
+    ***Methodology:*** *This application uses a two-stage NLP pipeline: 1) The `jjzha/jobbert-base-cased` NER model to identify relevant terms, followed by 2) The `valhalla/distilbart-mnli-12-1` Zero-Shot Classifier to categorize them as 'SKILL' or 'KNOWLEDGE'. A heuristic post-processing layer ensures high precision for key technical terms.*
     """)
     job_description = st.text_area(
         "Job Description Text",