project / models /skill_extractor.py
venkataashok's picture
Upload 3 files
8f04bd0 verified
# models/skill_extractor.py
from huggingface_hub import snapshot_download
import spacy
class SkillExtractor:
def __init__(self):
# Load Hugging Face pretrained skill extractor
model_path = snapshot_download("amjad-awad/skill-extractor")
self.nlp = spacy.load(model_path)
def extract_skills(self, text):
"""
Extract skills from text and merge multi-word skills.
"""
doc = self.nlp(text)
skills = []
current_skill = []
prev_end = None
for ent in doc.ents:
if "SKILL" in ent.label_:
if prev_end is not None and ent.start_char == prev_end + 1:
current_skill.append(ent.text)
else:
if current_skill:
skills.append(" ".join(current_skill))
current_skill = [ent.text]
prev_end = ent.end_char
else:
if current_skill:
skills.append(" ".join(current_skill))
current_skill = []
prev_end = None
if current_skill:
skills.append(" ".join(current_skill))
# Remove duplicates
skills = list(dict.fromkeys(skills))
return skills