| from typing import List, Dict |
| import re |
|
|
| def _period_key(item_text: str) -> str: |
| m = re.search(r"(\d{4}[./年]\d{1,2})\s*[-〜~]\s*(\d{4}[./年]?\d{0,2}|現在|至今)?", item_text) |
| return m.group(0) if m else item_text[:50] |
|
|
| def merge_normalized_records(records: List[Dict]) -> Dict: |
| merged = {"work_experience": [], "education": [], "certifications": [], "skills": [], "raw_sections": {}} |
|
|
| seen_we, seen_edu, seen_cert = set(), set(), set() |
| skill_set = set() |
|
|
| for r in records: |
| for w in r.get("work_experience", []): |
| key = _period_key(w.get("text", "")) + "|" + w.get("text", "")[:80] |
| if key not in seen_we: |
| seen_we.add(key) |
| merged["work_experience"].append(w) |
|
|
| for e in r.get("education", []): |
| k = e.get("text", "") |
| if k and k not in seen_edu: |
| seen_edu.add(k) |
| merged["education"].append(e) |
|
|
| for c in r.get("certifications", []): |
| k = c.get("text", "") |
| if k and k not in seen_cert: |
| seen_cert.add(k) |
| merged["certifications"].append(c) |
|
|
| for s in r.get("skills", []): |
| if s: |
| skill_set.add(s) |
|
|
| for k, v in r.get("raw_sections", {}).items(): |
| merged["raw_sections"][k] = (merged["raw_sections"].get(k, "") + "\n" + v).strip() |
|
|
| def _sort_key(w): |
| m = re.search(r"(\d{4})([./年])(\d{1,2})", w.get("period", "") or w.get("text", "")) |
| if m: |
| try: |
| return (-(int(m.group(1)) * 100 + int(m.group(3))), 0) |
| except Exception: |
| return (0, 1) |
| return (0, 1) |
|
|
| merged["work_experience"].sort(key=_sort_key) |
| merged["skills"] = sorted(skill_set) |
| return merged |
|
|