Spaces:

Corin1998
/

HFResumeIntakeSystem

Runtime error

Corin1998 commited on Nov 23, 2025

Commit

150af7a

verified ·

1 Parent(s): d8bdfe7

Create pipelines/skills.py

Files changed (1) hide show

pipelines/skills.py ADDED Viewed

+import re
+from typing import Dict, List
+SKILL_LEXICON = [
+    # 言語
+    "Python", "C++", "Java", "Go", "Rust", "JavaScript", "TypeScript", "SQL", "R",
+    # フレームワーク/ツール
+    "PyTorch", "TensorFlow", "Keras", "scikit-learn", "Hugging Face", "Transformers",
+    "FastAPI", "Django", "Flask", "React", "Vue", "Next.js", "Node.js",
+    # データ基盤
+    "Spark", "Hadoop", "Airflow", "dbt", "Kafka",
+    # クラウド
+    "AWS", "GCP", "Azure", "Docker", "Kubernetes",
+    # 分析・BI
+    "Tableau", "Power BI", "Looker",
+    # その他
+    "Git", "Linux", "Terraform", "OpenAPI",
+]
+NAME_HINTS = ["氏名", "Name"]
+def _regex_ner_contacts(text: str) -> Dict[str, List[str]]:
+    emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", text)
+    phones = re.findall(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}", text)
+    return {"EMAIL": list(set(emails)), "PHONE": list(set(phones))}
+def extract_skills(text: str, sections: Dict[str, str]) -> Dict:
+    contacts = _regex_ner_contacts(text)
+    name_lines = []
+    for hint in NAME_HINTS:
+        name_lines += [l.strip() for l in text.splitlines() if hint in l][:3]
+    found = []
+    text_lower = text.lower()
+    for s in SKILL_LEXICON:
+        if s.lower() in text_lower:
+            found.append(s)
+    return {
+        "skills": sorted(list(set(found))),
+        "contacts": contacts,
+        "name_candidates": name_lines,
+    }