NotRev commited on
Commit
5ea7aea
·
verified ·
1 Parent(s): b2f4e73

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +114 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,116 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from transformers import pipeline
3
+ import json
4
+ import os
5
 
6
+ # --- Page Configuration ---
7
+ st.set_page_config(page_title="Skill vs Knowledge Extractor", layout="wide")
8
+
9
+ @st.cache_resource
10
+ def load_models():
11
+ # Load NER (Finds the terms) and Zero-Shot Classifier (Categorizes them)
12
+ try:
13
+ st.info("Loading AI Models (Hugging Face local models)... This may take a moment.")
14
+
15
+ # Model 1: Named Entity Recognition for finding candidate terms
16
+ # CORRECTED MODEL ID: "jjzha/jobbert-base-cased"
17
+ ner_pipe = pipeline("token-classification",
18
+ model="jjzha/jobbert-base-cased",
19
+ aggregation_strategy="simple")
20
+
21
+ # Model 2: Zero-Shot Classification for categorizing terms
22
+ classifier_pipe = pipeline("zero-shot-classification",
23
+ model="valhalla/distilbart-mnli-12-1")
24
+
25
+ return ner_pipe, classifier_pipe
26
+ except Exception as e:
27
+ # Note: If the error persists, check your internet connection and ensure
28
+ # your device has enough memory to download these large models.
29
+ st.error(f"FATAL: Error loading models. Ensure 'transformers', 'accelerate', 'streamlit', and 'torch' are installed. Details: {e}")
30
+ return None, None
31
+
32
+ def process_text(text, ner_pipe, classifier_pipe):
33
+ if not text:
34
+ return {"SKILL": [], "KNOWLEDGE": []}
35
+
36
+ # 1. Extract Candidates (Using NER Model)
37
+ ner_results = ner_pipe(text)
38
+ candidates = set()
39
+ for entity in ner_results:
40
+ word = entity['word'].strip()
41
+ # Filter out short or single-character entities
42
+ if len(word.split()) > 1 or len(word) > 2:
43
+ candidates.add(word)
44
+ candidates = list(candidates)
45
+ if not candidates:
46
+ return {"SKILL": [], "KNOWLEDGE": []}
47
+
48
+ # --- THESIS ENHANCEMENT: Heuristic Post-Processing Overrides ---
49
+ # These lists are used to correct the known (and often variable) biases
50
+ # of the zero-shot classifier for specific technical terms.
51
+ SKILL_OVERRIDES = ["RAG", "function calling", "LoRA", "CI/CD pipelines", "DeepEval", "RAGAS", "Azure", "AWS"]
52
+ KNOWLEDGE_OVERRIDES = ["clean code practices", "English fluency", "async code", "team leadership", "agile methodologies"]
53
+
54
+ skills, knowledge = [], []
55
+ classification_labels = ["software tool or technology", "concept or knowledge"]
56
+
57
+ for candidate in candidates:
58
+
59
+ # Check Overrides First (Highest priority for accuracy)
60
+ if candidate in SKILL_OVERRIDES:
61
+ skills.append(candidate)
62
+ continue
63
+ if candidate in KNOWLEDGE_OVERRIDES:
64
+ knowledge.append(candidate)
65
+ continue
66
+
67
+ # 2. Classify (Zero-Shot Model)
68
+ try:
69
+ result = classifier_pipe(candidate, candidate_labels=classification_labels)
70
+ top_label = result['labels'][0]
71
+
72
+ # The zero-shot model determines the category
73
+ if top_label == "software tool or technology":
74
+ skills.append(candidate)
75
+ else:
76
+ knowledge.append(candidate)
77
+ except Exception as e:
78
+ # Fallback for errors or empty results
79
+ knowledge.append(candidate)
80
+
81
+ return {
82
+ "SKILL": sorted(list(set(skills))),
83
+ "KNOWLEDGE": sorted(list(set(knowledge)))
84
+ }
85
+
86
+ # --- UI Layout ---
87
+ st.title("💡 AI Job Description Analyzer")
88
+ ner_pipe, classifier_pipe = load_models()
89
+
90
+ if ner_pipe and classifier_pipe:
91
+ st.markdown("""
92
+ ***Methodology:*** *This application uses a two-stage NLP pipeline: 1) The `jjzha/jobbert-base-cased` NER model to identify relevant terms, followed by 2) The `valhalla/distilbart-mnli-12-1` Zero-Shot Classifier to categorize them as 'SKILL' or 'KNOWLEDGE'. A heuristic post-processing layer ensures high precision for key technical terms.*
93
+ """)
94
+ job_description = st.text_area(
95
+ "Job Description Text",
96
+ height=300,
97
+ placeholder="Paste a job description here..."
98
+ )
99
+
100
+ if st.button("Analyze and Extract Entities", type="primary"):
101
+ if job_description.strip():
102
+ with st.spinner("Analyzing text and running classification..."):
103
+ output = process_text(job_description, ner_pipe, classifier_pipe)
104
+
105
+ st.subheader("Extraction Output (JSON)")
106
+ st.json(output)
107
+
108
+ json_str = json.dumps(output, indent=2)
109
+ st.download_button(
110
+ label="Download JSON Output",
111
+ data=json_str,
112
+ file_name="extracted_entities.json",
113
+ mime="application/json"
114
+ )
115
+ else:
116
+ st.warning("Please paste a job description into the text area.")