Spaces:

Danial7
/

skill_roadmap_app

Sleeping

Danial7 commited on May 15, 2025

Commit

0cb6239

verified ·

1 Parent(s): 856e6a7

Create extractor.py

Files changed (1) hide show

extractor.py ADDED Viewed

+import spacy
+import re
+import pandas as pd
+nlp = spacy.load("en_core_web_sm")
+def extract_entities(text, skills_df):
+    doc = nlp(text)
+    tokens = [token.text for token in doc]
+    found_skills = list(set([token for token in tokens if token in skills_df["Skill"].values]))
+    # Background field classification
+    tech_keywords = {"Python", "ML", "Cloud", "DevOps", "AI"}
+    background = "technical" if any(skill in tech_keywords for skill in found_skills) else "non-technical"
+    return found_skills, background
+def extract_experience_years(text):
+    # Matches patterns like: "2018 - 2021" or "Jan 2017 – Mar 2023"
+    date_pattern = r"\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)?\s?\d{4}"
+    dates = re.findall(date_pattern, text)
+    years = [int(s[-4:]) for s in dates if s[-4:].isdigit()]
+    if len(years) >= 2:
+        return max(years) - min(years)
+    return 0