from typing import List, Dict import re def _period_key(item_text: str) -> str: m = re.search(r"(\d{4}[./年]\d{1,2})\s*[-〜~]\s*(\d{4}[./年]?\d{0,2}|現在|至今)?", item_text) return m.group(0) if m else item_text[:50] def merge_normalized_records(records: List[Dict]) -> Dict: merged = {"work_experience": [], "education": [], "certifications": [], "skills": [], "raw_sections": {}} seen_we = set() seen_edu = set() seen_cert = set() skill_set = set() for r in records: # work for w in r.get("work_experience", []): key = _period_key(w.get("text", "")) + "|" + w.get("text", "")[:80] if key not in seen_we: seen_we.add(key) merged["work_experience"].append(w) # edu for e in r.get("education", []): k = e.get("text", "") if k and k not in seen_edu: seen_edu.add(k) merged["education"].append(e) # cert for c in r.get("certifications", []): k = c.get("text", "") if k and k not in seen_cert: seen_cert.add(k) merged["certifications"].append(c) # skills for s in r.get("skills", []): if s: skill_set.add(s) # raw sections(最後のものを採用しつつ連結) for k, v in r.get("raw_sections", {}).items(): merged["raw_sections"][k] = (merged["raw_sections"].get(k, "") + "\n" + v).strip() # ソート:期間っぽい語があるものを先頭、あとは原順 def _sort_key(w): m = re.search(r"(\d{4})([./年])(\d{1,2})", w.get("period", "") or w.get("text", "")) if m: try: return (-(int(m.group(1)) * 100 + int(m.group(3))), 0) except Exception: return (0, 1) return (0, 1) merged["work_experience"].sort(key=_sort_key) merged["skills"] = sorted(skill_set) return merged