Spaces:

NotRev
/

ThesisLast

Running

App Files Files Community

NotRev commited on Dec 11, 2025

Commit

682a2d1

verified ·

1 Parent(s): d5d887f

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +117 -76

src/streamlit_app.py CHANGED Viewed

@@ -1,76 +1,117 @@
-import streamlit as st
-from transformers import pipeline
-import json
-# Page Config
-st.set_page_config(page_title="Skill vs Knowledge Extractor", layout="wide")
-@st.cache_resource
-def load_models():
-    # 1. Load the Entity Extraction Model (Finds the terms)
-    ner_pipe = pipeline("token-classification", model="jjzha/jobbert-base-cased-v2", aggregation_strategy="simple")
-    # 2. Load the Zero-Shot Classification Model (Categorizes them)
-    # We use a smaller, faster model for speed
-    classifier_pipe = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-1")
-    return ner_pipe, classifier_pipe
-def process_text(text, ner_pipe, classifier_pipe):
-    if not text:
-        return {"SKILL": [], "KNOWLEDGE": []}
-    # Step 1: Extract Entities (Candidates)
-    ner_results = ner_pipe(text)
-    # Filter and clean extracted words
-    candidates = list(set([entity['word'].strip() for entity in ner_results if len(entity['word']) > 2]))
-    if not candidates:
-        return {"SKILL": [], "KNOWLEDGE": []}
-    # Step 2: Classify each entity as SKILL or KNOWLEDGE
-    skills = []
-    knowledge = []
-    # We classify the terms against these labels
-    labels = ["software tool or technology", "concept or knowledge"]
-    # Batch classification can be slow, so we do it simply here
-    for candidate in candidates:
-        # Ask the AI: Is this a tool/technology OR a concept/knowledge?
-        result = classifier_pipe(candidate, candidate_labels=labels)
-        top_label = result['labels'][0]
-        if top_label == "software tool or technology":
-            skills.append(candidate)
-        else:
-            knowledge.append(candidate)
-    return {
-        "SKILL": skills,
-        "KNOWLEDGE": knowledge
-    }
-# --- UI Layout ---
-st.title("Job Description Analyzer")
-st.markdown("Extracts entities and categorizes them into **SKILL** (Tools) and **KNOWLEDGE** (Concepts).")
-with st.spinner("Loading Models... this may take a minute first time..."):
-    ner_pipe, classifier_pipe = load_models()
-job_description = st.text_area("Job Description", height=250, placeholder="Paste job description here...")
-if st.button("Analyze"):
-    if job_description.strip():
-        with st.spinner("Processing..."):
-            output = process_text(job_description, ner_pipe, classifier_pipe)
-            # Show formatted JSON
-            st.json(output)
-            # Download button
-            json_str = json.dumps(output, indent=2)
-            st.download_button("Download JSON", json_str, file_name="output.json", mime="application/json")
-    else:
-        st.warning("Please enter text first.")

+import streamlit as st
+from transformers import pipeline
+import json
+# --- Page Configuration ---
+st.set_page_config(page_title="Skill vs Knowledge Extractor", layout="wide")
+@st.cache_resource
+def load_models():
+    """
+    Loads two Hugging Face models:
+    1. NER Model: To extract potential technical terms (candidates).
+    2. Zero-Shot Classifier: To categorize each term as SKILL or KNOWLEDGE.
+    """
+    try:
+        # 1. Entity Extraction Model (NER - finds the terms)
+        st.info("Loading Entity Extraction Model...")
+        ner_pipe = pipeline(
+            "token-classification",
+            model="jjzha/jobbert-base-cased-v2",
+            aggregation_strategy="simple" # Merges sub-word tokens
+        )
+        # 2. Zero-Shot Classification Model (Categorizes the terms)
+        st.info("Loading Zero-Shot Classification Model...")
+        classifier_pipe = pipeline(
+            "zero-shot-classification",
+            model="valhalla/distilbart-mnli-12-1" # Smaller, faster classification model
+        )
+        return ner_pipe, classifier_pipe
+    except Exception as e:
+        st.error(f"Error loading models. Check your requirements.txt. Details: {e}")
+        return None, None
+def process_text(text, ner_pipe, classifier_pipe):
+    """
+    Runs the extraction and classification pipeline.
+    """
+    if not text:
+        return {"SKILL": [], "KNOWLEDGE": []}
+    # Step 1: Extract Entities (Candidates)
+    ner_results = ner_pipe(text)
+    # Filter and clean extracted words, removing very short, possibly meaningless terms
+    candidates = set()
+    for entity in ner_results:
+        word = entity['word'].strip()
+        if len(word.split()) > 1 or len(word) > 2: # Keep multi-word phrases or single words longer than 2 chars
+            candidates.add(word)
+    candidates = list(candidates)
+    if not candidates:
+        return {"SKILL": [], "KNOWLEDGE": []}
+    # Step 2: Classify each entity as SKILL or KNOWLEDGE using Zero-Shot
+    skills = []
+    knowledge = []
+    # These are the labels the Zero-Shot model will use for classification
+    classification_labels = ["software tool or technology", "concept or knowledge"]
+    for candidate in candidates:
+        try:
+            # Classify the term
+            result = classifier_pipe(candidate, candidate_labels=classification_labels)
+            top_label = result['labels'][0]
+            # Append to the correct list
+            if top_label == "software tool or technology":
+                skills.append(candidate)
+            else:
+                knowledge.append(candidate)
+        except Exception as e:
+            # Fallback for classification errors
+            knowledge.append(candidate)
+    return {
+        "SKILL": sorted(list(set(skills))),
+        "KNOWLEDGE": sorted(list(set(knowledge)))
+    }
+# --- UI Layout ---
+st.title("💡 AI Job Description Analyzer")
+st.markdown("Paste a job description below to extract and categorize entities.")
+# 1. Load Models (Cached)
+ner_pipe, classifier_pipe = load_models()
+if ner_pipe and classifier_pipe:
+    # 2. Input Area
+    job_description = st.text_area(
+        "Job Description Text",
+        height=300,
+        placeholder="Paste a job description here (e.g., 'We require a Python developer proficient in FastAPI and experienced with Kafka and RAG systems...')"
+    )
+    # 3. Process Button
+    if st.button("Analyze and Extract Entities", type="primary"):
+        if job_description.strip():
+            with st.spinner("Analyzing text and running classification..."):
+                output = process_text(job_description, ner_pipe, classifier_pipe)
+                # Display Result
+                st.subheader("Extraction Output (JSON)")
+                st.json(output)
+                # Option to download
+                json_str = json.dumps(output, indent=2)
+                st.download_button(
+                    label="Download JSON Output",
+                    data=json_str,
+                    file_name="extracted_entities.json",
+                    mime="application/json"
+                )
+        else:
+            st.warning("Please enter a job description first.")