Spaces:
Runtime error
Runtime error
| # models/skill_extractor.py | |
| from huggingface_hub import snapshot_download | |
| import spacy | |
| class SkillExtractor: | |
| def __init__(self): | |
| # Load Hugging Face pretrained skill extractor | |
| model_path = snapshot_download("amjad-awad/skill-extractor") | |
| self.nlp = spacy.load(model_path) | |
| def extract_skills(self, text): | |
| """ | |
| Extract skills from text and merge multi-word skills. | |
| """ | |
| doc = self.nlp(text) | |
| skills = [] | |
| current_skill = [] | |
| prev_end = None | |
| for ent in doc.ents: | |
| if "SKILL" in ent.label_: | |
| if prev_end is not None and ent.start_char == prev_end + 1: | |
| current_skill.append(ent.text) | |
| else: | |
| if current_skill: | |
| skills.append(" ".join(current_skill)) | |
| current_skill = [ent.text] | |
| prev_end = ent.end_char | |
| else: | |
| if current_skill: | |
| skills.append(" ".join(current_skill)) | |
| current_skill = [] | |
| prev_end = None | |
| if current_skill: | |
| skills.append(" ".join(current_skill)) | |
| # Remove duplicates | |
| skills = list(dict.fromkeys(skills)) | |
| return skills |