Spaces:
Running
Running
| import streamlit as st | |
| from transformers import pipeline | |
| import json | |
| # --- Page Configuration --- | |
| st.set_page_config(page_title="Skill vs Knowledge Extractor", layout="wide") | |
| def load_models(): | |
| """ | |
| Loads two Hugging Face models: | |
| 1. NER Model: To extract potential technical terms (candidates). | |
| 2. Zero-Shot Classifier: To categorize each term as SKILL or KNOWLEDGE. | |
| """ | |
| try: | |
| # 1. Entity Extraction Model (NER - finds the terms) | |
| st.info("Loading Entity Extraction Model...") | |
| ner_pipe = pipeline( | |
| "token-classification", | |
| model="jjzha/jobbert-base-cased-v2", | |
| aggregation_strategy="simple" # Merges sub-word tokens | |
| ) | |
| # 2. Zero-Shot Classification Model (Categorizes the terms) | |
| st.info("Loading Zero-Shot Classification Model...") | |
| classifier_pipe = pipeline( | |
| "zero-shot-classification", | |
| model="valhalla/distilbart-mnli-12-1" # Smaller, faster classification model | |
| ) | |
| return ner_pipe, classifier_pipe | |
| except Exception as e: | |
| st.error(f"Error loading models. Check your requirements.txt. Details: {e}") | |
| return None, None | |
| def process_text(text, ner_pipe, classifier_pipe): | |
| """ | |
| Runs the extraction and classification pipeline. | |
| """ | |
| if not text: | |
| return {"SKILL": [], "KNOWLEDGE": []} | |
| # Step 1: Extract Entities (Candidates) | |
| ner_results = ner_pipe(text) | |
| # Filter and clean extracted words, removing very short, possibly meaningless terms | |
| candidates = set() | |
| for entity in ner_results: | |
| word = entity['word'].strip() | |
| if len(word.split()) > 1 or len(word) > 2: # Keep multi-word phrases or single words longer than 2 chars | |
| candidates.add(word) | |
| candidates = list(candidates) | |
| if not candidates: | |
| return {"SKILL": [], "KNOWLEDGE": []} | |
| # Step 2: Classify each entity as SKILL or KNOWLEDGE using Zero-Shot | |
| skills = [] | |
| knowledge = [] | |
| # These are the labels the Zero-Shot model will use for classification | |
| classification_labels = ["software tool or technology", "concept or knowledge"] | |
| for candidate in candidates: | |
| try: | |
| # Classify the term | |
| result = classifier_pipe(candidate, candidate_labels=classification_labels) | |
| top_label = result['labels'][0] | |
| # Append to the correct list | |
| if top_label == "software tool or technology": | |
| skills.append(candidate) | |
| else: | |
| knowledge.append(candidate) | |
| except Exception as e: | |
| # Fallback for classification errors | |
| knowledge.append(candidate) | |
| return { | |
| "SKILL": sorted(list(set(skills))), | |
| "KNOWLEDGE": sorted(list(set(knowledge))) | |
| } | |
| # --- UI Layout --- | |
| st.title("💡 AI Job Description Analyzer") | |
| st.markdown("Paste a job description below to extract and categorize entities.") | |
| # 1. Load Models (Cached) | |
| ner_pipe, classifier_pipe = load_models() | |
| if ner_pipe and classifier_pipe: | |
| # 2. Input Area | |
| job_description = st.text_area( | |
| "Job Description Text", | |
| height=300, | |
| placeholder="Paste a job description here (e.g., 'We require a Python developer proficient in FastAPI and experienced with Kafka and RAG systems...')" | |
| ) | |
| # 3. Process Button | |
| if st.button("Analyze and Extract Entities", type="primary"): | |
| if job_description.strip(): | |
| with st.spinner("Analyzing text and running classification..."): | |
| output = process_text(job_description, ner_pipe, classifier_pipe) | |
| # Display Result | |
| st.subheader("Extraction Output (JSON)") | |
| st.json(output) | |
| # Option to download | |
| json_str = json.dumps(output, indent=2) | |
| st.download_button( | |
| label="Download JSON Output", | |
| data=json_str, | |
| file_name="extracted_entities.json", | |
| mime="application/json" | |
| ) | |
| else: | |
| st.warning("Please enter a job description first.") |