# models/skill_extractor.py from huggingface_hub import snapshot_download import spacy class SkillExtractor: def __init__(self): # Load Hugging Face pretrained skill extractor model_path = snapshot_download("amjad-awad/skill-extractor") self.nlp = spacy.load(model_path) def extract_skills(self, text): """ Extract skills from text and merge multi-word skills. """ doc = self.nlp(text) skills = [] current_skill = [] prev_end = None for ent in doc.ents: if "SKILL" in ent.label_: if prev_end is not None and ent.start_char == prev_end + 1: current_skill.append(ent.text) else: if current_skill: skills.append(" ".join(current_skill)) current_skill = [ent.text] prev_end = ent.end_char else: if current_skill: skills.append(" ".join(current_skill)) current_skill = [] prev_end = None if current_skill: skills.append(" ".join(current_skill)) # Remove duplicates skills = list(dict.fromkeys(skills)) return skills