Spaces:

NotRev
/

thisis

Sleeping

App Files Files Community

NotRev commited on Dec 11, 2025

Commit

5ea7aea

verified ·

1 Parent(s): b2f4e73

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +114 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,116 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+from transformers import pipeline
+import json
+import os
+# --- Page Configuration ---
+st.set_page_config(page_title="Skill vs Knowledge Extractor", layout="wide")
+@st.cache_resource
+def load_models():
+    # Load NER (Finds the terms) and Zero-Shot Classifier (Categorizes them)
+    try:
+        st.info("Loading AI Models (Hugging Face local models)... This may take a moment.")
+        # Model 1: Named Entity Recognition for finding candidate terms
+        # CORRECTED MODEL ID: "jjzha/jobbert-base-cased"
+        ner_pipe = pipeline("token-classification",
+                             model="jjzha/jobbert-base-cased",
+                             aggregation_strategy="simple")
+        # Model 2: Zero-Shot Classification for categorizing terms
+        classifier_pipe = pipeline("zero-shot-classification",
+                                   model="valhalla/distilbart-mnli-12-1")
+        return ner_pipe, classifier_pipe
+    except Exception as e:
+        # Note: If the error persists, check your internet connection and ensure
+        # your device has enough memory to download these large models.
+        st.error(f"FATAL: Error loading models. Ensure 'transformers', 'accelerate', 'streamlit', and 'torch' are installed. Details: {e}")
+        return None, None
+def process_text(text, ner_pipe, classifier_pipe):
+    if not text:
+        return {"SKILL": [], "KNOWLEDGE": []}
+    # 1. Extract Candidates (Using NER Model)
+    ner_results = ner_pipe(text)
+    candidates = set()
+    for entity in ner_results:
+        word = entity['word'].strip()
+        # Filter out short or single-character entities
+        if len(word.split()) > 1 or len(word) > 2:
+            candidates.add(word)
+    candidates = list(candidates)
+    if not candidates:
+        return {"SKILL": [], "KNOWLEDGE": []}
+    # --- THESIS ENHANCEMENT: Heuristic Post-Processing Overrides ---
+    # These lists are used to correct the known (and often variable) biases
+    # of the zero-shot classifier for specific technical terms.
+    SKILL_OVERRIDES = ["RAG", "function calling", "LoRA", "CI/CD pipelines", "DeepEval", "RAGAS", "Azure", "AWS"]
+    KNOWLEDGE_OVERRIDES = ["clean code practices", "English fluency", "async code", "team leadership", "agile methodologies"]
+    skills, knowledge = [], []
+    classification_labels = ["software tool or technology", "concept or knowledge"]
+    for candidate in candidates:
+        # Check Overrides First (Highest priority for accuracy)
+        if candidate in SKILL_OVERRIDES:
+            skills.append(candidate)
+            continue
+        if candidate in KNOWLEDGE_OVERRIDES:
+            knowledge.append(candidate)
+            continue
+        # 2. Classify (Zero-Shot Model)
+        try:
+            result = classifier_pipe(candidate, candidate_labels=classification_labels)
+            top_label = result['labels'][0]
+            # The zero-shot model determines the category
+            if top_label == "software tool or technology":
+                skills.append(candidate)
+            else:
+                knowledge.append(candidate)
+        except Exception as e:
+            # Fallback for errors or empty results
+            knowledge.append(candidate)
+    return {
+        "SKILL": sorted(list(set(skills))),
+        "KNOWLEDGE": sorted(list(set(knowledge)))
+    }
+# --- UI Layout ---
+st.title("💡 AI Job Description Analyzer")
+ner_pipe, classifier_pipe = load_models()
+if ner_pipe and classifier_pipe:
+    st.markdown("""
+    ***Methodology:*** *This application uses a two-stage NLP pipeline: 1) The `jjzha/jobbert-base-cased` NER model to identify relevant terms, followed by 2) The `valhalla/distilbart-mnli-12-1` Zero-Shot Classifier to categorize them as 'SKILL' or 'KNOWLEDGE'. A heuristic post-processing layer ensures high precision for key technical terms.*
+    """)
+    job_description = st.text_area(
+        "Job Description Text",
+        height=300,
+        placeholder="Paste a job description here..."
+    )
+    if st.button("Analyze and Extract Entities", type="primary"):
+        if job_description.strip():
+            with st.spinner("Analyzing text and running classification..."):
+                output = process_text(job_description, ner_pipe, classifier_pipe)
+                st.subheader("Extraction Output (JSON)")
+                st.json(output)
+                json_str = json.dumps(output, indent=2)
+                st.download_button(
+                    label="Download JSON Output",
+                    data=json_str,
+                    file_name="extracted_entities.json",
+                    mime="application/json"
+                )
+        else:
+            st.warning("Please paste a job description into the text area.")