import re from typing import Dict, List SECTION_HEADERS = [ ("work_experience", ["職歴", "職務経歴", "業務経験", "Work Experience", "Experience"]), ("education", ["学歴", "Education"]), ("certifications", ["資格", "認定", "Certificates", "Certifications"]), ("skills", ["スキル", "Skills"]), ] def normalize_resume(sections_dict: Dict[str, str]) -> Dict[str, List[Dict]]: # sections_dict は {work_experience, education, certifications, skills, raw_sections?} work_items = [] raw_work = sections_dict.get("work_experience", "") or sections_dict.get("work_experience_raw", "") for line in raw_work.splitlines(): m = re.search(r"(\d{4}[./年]\d{1,2})\s*[-〜~]\s*(\d{4}[./年]?\d{0,2}|現在|至今)?", line) if m: work_items.append({"period": m.group(0), "text": line.strip()}) edu_items = [{"text": l.strip()} for l in (sections_dict.get("education", "") or sections_dict.get("education_raw", "")).splitlines() if l.strip()] cert_items = [{"text": l.strip()} for l in (sections_dict.get("certifications", "") or sections_dict.get("certifications_raw", "")).splitlines() if l.strip()] # skills はカンマ/改行区切り or list を許容 skills_raw = sections_dict.get("skills", "") if isinstance(skills_raw, list): skill_items = skills_raw else: skill_items = [s.strip() for s in re.split(r"[、,\n]\s*", skills_raw) if s.strip()] return { "work_experience": work_items, "education": edu_items, "certifications": cert_items, "skills": skill_items, "raw_sections": { "work_experience": raw_work, "education": sections_dict.get("education", "") or sections_dict.get("education_raw", ""), "certifications": sections_dict.get("certifications", "") or sections_dict.get("certifications_raw", ""), }, }