Spaces:

dohyune
/

boxlabel

Sleeping

App Files Files Community

dohyune commited on Aug 28, 2025

Commit

f84af8d

verified ·

1 Parent(s): 3902f45

Update app.py

Browse files

Files changed (1) hide show

app.py +289 -267

app.py CHANGED Viewed

@@ -1,27 +1,40 @@
-import streamlit as st
-import pandas as pd
-import io, zipfile, re, html, json
 from typing import Dict, Tuple
-st.set_page_config(page_title="📦 박스라벨(HWPX) — 문단 단위 완전 치환", layout="wide")
-st.title("📦 박스라벨 자동 생성기 — HWPX 필드·토큰·텍스트 완전 치환(문단 단위)")
-# -------------------- 데이터 유틸 --------------------
 def _year_range(series: pd.Series) -> str:
     s = series.astype(str).fillna("")
     v = s[~s.isin(["", "0", "0000"])]
-    if v.empty: return "0000-0000"
     nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
-    if nums.empty: return "0000-0000"
     return f"{nums.min():04d}-{nums.max():04d}"
 def build_rows(df: pd.DataFrame) -> pd.DataFrame:
     df = df.copy()
     df["박스번호"] = df["박스번호"].astype(str).str.zfill(4)
     if "제목" in df.columns:
         df["제목"] = df["제목"].astype(str)
-    # 생산연도(범위) = 종료연도 그룹 범위
     if "종료연도" in df.columns:
         yr = df.groupby("박스번호")["종료연도"].apply(_year_range).reset_index()
         yr.columns = ["박스번호", "생산연도"]
@@ -32,72 +45,102 @@ def build_rows(df: pd.DataFrame) -> pd.DataFrame:
     has_mgmt = "관리번호" in df.columns
     lists = []
     for b, g in df.groupby("박스번호"):
-        lines = [f"- {r['관리번호']} {r.get('제목','')}" if has_mgmt else f"- {r.get('제목','')}"
-                 for _, r in g.iterrows()]
         lists.append({"박스번호": b, "목록": "\r\n".join(lines)})
     list_df = pd.DataFrame(lists)
     # 대표 메타
-    meta_cols = ["박스번호","종료연도","보존기간","단위업무","기록물철","제목"]
     meta_exist = [c for c in meta_cols if c in df.columns]
-    meta = df.groupby("박스번호", as_index=False).first()[meta_exist] if meta_exist \
-          else pd.DataFrame({"박스번호": df["박스번호"].unique()})
     merged = meta.merge(list_df, on="박스번호", how="left").merge(yr, on="박스번호", how="left")
     return merged
-# -------------------- 치환 유틸 --------------------
 FIELD_PAIR_RE_TMPL = (
     r'<(?P<fprefix>[a-zA-Z0-9_]+):fieldBegin\b[^>]*\bname="{name}"[^>]*/>'
     r'(.*?)'
     r'<(?P=fprefix):fieldEnd\b[^>]*/>'
 )
 TOKEN_FMT = "{{{{{key}}}}}"
-# 문단(<*:p>) 탐색 패턴
 PARA_RE = re.compile(
     r'<(?P<pprefix>[a-zA-Z0-9_]+):p(?P<pattrs>[^>]*)>(?P<pbody>.*?)</(?P=pprefix):p>',
-    re.DOTALL
 )
-# 원본 run 스타일을 추출하는 함수
-def _extract_run_style(body: str, pprefix: str) -> str:
-    """문단 내용에서 첫 번째 run 요소의 스타일을 추출"""
-    run_pattern = re.compile(
-        rf'<{pprefix}:run[^>]*>.*?</{pprefix}:run>',
-        re.DOTALL
-    )
-    match = run_pattern.search(body)
-    if match:
-        return match.group(0)
-    return f'<{pprefix}:run><{pprefix}:t><//{pprefix}:t></{pprefix}:run>'
-# 문단 하나를 같은 스타일로 복제해주는 헬퍼 (스타일 보존)
-def _make_para_with_style(pprefix: str, pattrs: str, text: str, original_run: str) -> str:
-    esc = html.escape("" if text is None else str(text))
-    # 원본 run에서 텍스트 부분만 교체
-    text_pattern = re.compile(rf'(<{pprefix}:t[^>]*>)[^<]*(</{pprefix}:t>)')
-    new_run = text_pattern.sub(rf'\g<1>{esc}\g<2>', original_run)
-    # 만약 텍스트 노드가 없다면 기본 형태로
-    if new_run == original_run:
-        t_pattern = re.compile(rf'(<{pprefix}:run[^>]*>)(.*?)(</{pprefix}:run>)', re.DOTALL)
-        new_run = t_pattern.sub(rf'\g<1><{pprefix}:t>{esc}</{pprefix}:t>\g<3>', original_run)
-    return f'<{pprefix}:p{pattrs}>{new_run}</{pprefix}:p>'
 def _split_lines(val) -> list:
-    if val is None: return [""]
-    return str(val).replace("\r\n","\n").split("\n")
 def _replace_para_multiline(xml: str, key: str, value: str, dbg: dict) -> str:
     """
-    key가 포함된 '부모 문단 전체'를, 값의 각 줄을 담은 여러 문단으로 교체.
-    원본 스타일을 유지하면서 교체.
     """
     pair_pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(key)), re.DOTALL)
-    tnode_pat = re.compile(rf'<(?P<p>[a-zA-Z0-9_]+):t[^>]*>[^<]*{re.escape(key)}[^<]*</(?P=p):t>', re.DOTALL)
     token_str = TOKEN_FMT.format(key=key)
     def para_repl(m):
@@ -107,29 +150,28 @@ def _replace_para_multiline(xml: str, key: str, value: str, dbg: dict) -> str:
         lines = _split_lines(value)
         pprefix = m.group("pprefix")
-        pattrs  = m.group("pattrs")
-        # 원본 run 스타일 추출
-        original_run = _extract_run_style(body, pprefix)
-        # 각 줄에 대해 원본 스타일을 유지하면서 새 문단 생성
-        new_paras = "".join(_make_para_with_style(pprefix, pattrs, ln, original_run) for ln in lines)
         dbg["para_hits"][key] = dbg["para_hits"].get(key, 0) + 1
         return new_paras
     xml2 = PARA_RE.sub(para_repl, xml)
     if xml2 != xml:
-        dbg["touched"] = True
     return xml2
 def _runs_plain(text: str) -> str:
     return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
 def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
     changed_any = False
-    # 0) 다중줄 키는 먼저 "부모 문단 교체"로 처리 (업무명은 제외하여 폰트 문제 해결)
-    multi_key = re.compile(r"^(목록|list|제목)\d+$", re.IGNORECASE)
     for k, v in mapping.items():
         if multi_key.match(k):
             xml_new = _replace_para_multiline(xml, k, v, dbg)
@@ -137,7 +179,7 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
                 xml = xml_new
                 changed_any = True
-    # 1) 필드쌍(인라인) 치환 — 단일줄 키만
     for k, v in mapping.items():
         if multi_key.match(k):
             continue
@@ -149,27 +191,29 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
             xml = xml_new
             changed_any = True
-    # 2) 순수 텍스트 자리표시자(<*:t>키</*:t>) 부분치환 — 단일줄 키만
     tnode_all = re.compile(
         r'(<(?P<prefix>[a-zA-Z0-9_]+):t[^>]*>)([^<]*?)</(?P=prefix):t>',
-        re.DOTALL
     )
     for k, v in mapping.items():
         if multi_key.match(k):
             continue
         def repl_tnode(m):
             text_node = m.group(3)
             if k not in text_node:
                 return m.group(0)
             new_text = html.escape(text_node.replace(k, "" if v is None else str(v)))
             return f"{m.group(1)}{new_text}</{m.group('prefix')}:t>"
         xml2 = tnode_all.sub(repl_tnode, xml)
         if xml2 != xml:
             dbg["text_hits"][k] = dbg["text_hits"].get(k, 0) + 1
             xml = xml2
             changed_any = True
-    # 3) 토큰 치환 — 단일줄 키만
     for k, v in mapping.items():
         if multi_key.match(k):
             continue
@@ -183,24 +227,25 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
         dbg["files_touched"] = True
     return xml
-def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, dict]:
-    import stat, time
-    dbg = {"para_hits":{}, "field_hits":{}, "text_hits":{}, "token_hits":{}, "touched_files": []}
     zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
     out_buf = io.BytesIO()
     zout = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
-    # 현재 시간
     now = time.localtime()
-    # mimetype 무압축 + 맨앞
     names = zin.namelist()
     if "mimetype" in names:
         zi = zipfile.ZipInfo("mimetype")
         zi.compress_type = zipfile.ZIP_STORED
-        # 완전히 새로운 ZipInfo로 읽기전용 방지
-        zi.external_attr = 0o100666 << 16  # 일반 파일 + 모든 권한
-        zi.create_system = 0  # DOS/Windows
         zi.date_time = now[:6]
         zout.writestr(zi, zin.read("mimetype"))
@@ -212,22 +257,29 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, d
             try:
                 s = data.decode("utf-8", errors="ignore")
                 before = s
-                s = _apply_to_xml(s, mapping, {"para_hits":dbg["para_hits"], "field_hits":dbg["field_hits"],
-                                               "text_hits":dbg["text_hits"], "token_hits":dbg["token_hits"],
-                                               "files_touched":False})
                 if s != before:
                     dbg["touched_files"].append(e.filename)
                 data = s.encode("utf-8")
             except Exception:
                 pass
-        # 완전히 새로운 ZipInfo 생성으로 읽기전용 방지
         zi = zipfile.ZipInfo(e.filename)
         zi.compress_type = zipfile.ZIP_DEFLATED
-        zi.external_attr = 0o100666 << 16  # 일반 파일 + 모든 권한
-        zi.create_system = 0  # DOS/Windows 시스템
-        zi.date_time = now[:6]  # 현재 시간
-        zi.flag_bits = 0  # 특별한 플래그 없음
         zout.writestr(zi, data)
     zout.close()
@@ -235,200 +287,155 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, d
     zin.close()
     return out_buf.getvalue(), dbg
 def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
-    """HWPX 파일들을 섹션 단위로 병합 (COM InsertFile과 유사한 방식)"""
     import time
     base_zip = zipfile.ZipFile(io.BytesIO(base_hwpx), "r")
     add_zip = zipfile.ZipFile(io.BytesIO(additional_hwpx), "r")
     out_buf = io.BytesIO()
     out_zip = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
     now = time.localtime()
-    # mimetype 먼저 처리
     if "mimetype" in base_zip.namelist():
         zi = zipfile.ZipInfo("mimetype")
         zi.compress_type = zipfile.ZIP_STORED
         zi.external_attr = 0o100666 << 16
         zi.create_system = 0
         zi.date_time = now[:6]
-        zi.flag_bits = 0
         out_zip.writestr(zi, base_zip.read("mimetype"))
-    # 베이스 파일들 처리
-    base_sections = {}
-    base_files = {}
-    for filename in base_zip.namelist():
-        if filename == "mimetype":
             continue
-        data = base_zip.read(filename)
-        if filename.startswith("Contents/section") and filename.endswith(".xml"):
-            base_sections[filename] = data.decode("utf-8", errors="ignore")
         else:
-            base_files[filename] = data
-    # 추가 파일의 섹션들 수집
-    add_sections = {}
-    next_section_num = len(base_sections) + 1
-    for filename in add_zip.namelist():
-        if filename.startswith("Contents/section") and filename.endswith(".xml"):
-            # 새로운 섹션 번호로 변경
-            new_filename = f"Contents/section{next_section_num}.xml"
-            add_sections[new_filename] = add_zip.read(filename).decode("utf-8", errors="ignore")
-            next_section_num += 1
-    # 모든 베이스 파일들 복사
-    for filename, data in base_files.items():
-        zi = zipfile.ZipInfo(filename)
-        zi.compress_type = zipfile.ZIP_DEFLATED
-        zi.external_attr = 0o100666 << 16
-        zi.create_system = 0
-        zi.date_time = now[:6]
-        zi.flag_bits = 0
-        out_zip.writestr(zi, data)
-    # 베이스 섹션들 복사
-    for filename, content in base_sections.items():
-        zi = zipfile.ZipInfo(filename)
-        zi.compress_type = zipfile.ZIP_DEFLATED
-        zi.external_attr = 0o100666 << 16
-        zi.create_system = 0
-        zi.date_time = now[:6]
-        zi.flag_bits = 0
-        out_zip.writestr(zi, content.encode("utf-8"))
-    # 새로운 섹션들 추가
-    for filename, content in add_sections.items():
-        zi = zipfile.ZipInfo(filename)
         zi.compress_type = zipfile.ZIP_DEFLATED
         zi.external_attr = 0o100666 << 16
         zi.create_system = 0
         zi.date_time = now[:6]
         zi.flag_bits = 0
         out_zip.writestr(zi, content.encode("utf-8"))
-    # BodyText 업데이트 (새 섹션 참조 추가)
-    if "Contents/bodytext.xml" in base_files:
-        bodytext = base_files["Contents/bodytext.xml"].decode("utf-8", errors="ignore")
-        updated_bodytext = add_sections_to_bodytext(bodytext, list(add_sections.keys()))
-        zi = zipfile.ZipInfo("Contents/bodytext.xml")
-        zi.compress_type = zipfile.ZIP_DEFLATED
-        zi.external_attr = 0o100666 << 16
-        zi.create_system = 0
-        zi.date_time = now[:6]
-        zi.flag_bits = 0
-        out_zip.writestr(zi, updated_bodytext.encode("utf-8"))
     base_zip.close()
     add_zip.close()
     out_zip.close()
     out_buf.seek(0)
     return out_buf.getvalue()
-def add_sections_to_bodytext(bodytext: str, new_section_files: list) -> str:
-    """BodyText에 새 섹션 참조 추가"""
-    # 마지막 섹션 뒤에 새 섹션들 추가
-    # </hml:body> 태그 앞에 새 섹션 참조 삽입
-    section_refs = []
-    for section_file in new_section_files:
-        # section1.xml -> 1 추출
-        section_num = section_file.split("section")[1].split(".xml")[0]
-        section_ref = f'<hml:secDef><hml:secPtr hml:hRef="../Contents/section{section_num}.xml#0"/></hml:secDef>'
-        section_refs.append(section_ref)
-    if section_refs:
-        # </hml:body> 앞에 삽입
-        body_close_pattern = re.compile(r'(</hml:body>)')
-        new_sections_xml = ''.join(section_refs)
-        bodytext = body_close_pattern.sub(new_sections_xml + r'\1', bodytext)
-    return bodytext
-def update_page_id(base_xml: str, new_page: str) -> str:
-    """페이지 ID를 중복되지 않게 업데이트 (더 이상 사용하지 않음)"""
-    return new_page
-def add_page_to_section(base_xml: str, add_xml: str) -> str:
-    """섹션에 새 페이지 추가 (더 이상 사용하지 않음)"""
-    return base_xml
-def merge_section_xml_list(xml_list: list) -> str:
-    """여러 섹션 XML을 하나로 병합 (사용하지 않지만 호환성 유지)"""
-    if len(xml_list) <= 1:
-        return xml_list[0] if xml_list else ""
-    base_xml = xml_list[0]
-    for additional_xml in xml_list[1:]:
-        base_xml = add_page_to_section(base_xml, additional_xml)
-    return base_xml
 def merge_sections(base_sections: dict, add_sections: dict) -> dict:
-    """섹션 XML들을 병합"""
     merged = base_sections.copy()
-    for filename, add_content in add_sections.items():
-        if filename in merged:
-            # 기존 섹션에 페이지 추가
-            merged[filename] = merge_section_content(merged[filename], add_content)
         else:
-            # 새로운 섹션 추가
-            merged[filename] = add_content
     return merged
 def merge_section_content(base_xml: str, add_xml: str) -> str:
-    """단일 섹션 XML 내용을 병합"""
-    # 추가할 XML에서 페이지들 추출 - 더 정확한 패턴
-    page_pattern = re.compile(
-        r'<(?P<prefix>[a-zA-Z0-9_]+):page\b[^>]*>.*?</(?P=prefix):page>',
-        re.DOTALL
     )
-    # 페이지 매칭
-    page_matches = list(page_pattern.finditer(add_xml))
-    if not page_matches:
-        return base_xml
-    # 추가할 페이지들
-    pages_to_add = [match.group(0) for match in page_matches]
-    # 베이스 XML의 </hp:pages> 또는 </hml:pages> 태그 앞에 삽입
-    pages_end_pattern = re.compile(r'(</[a-zA-Z0-9_]+:pages>)')
-    pages_str = ''.join(pages_to_add)
-    merged_xml = pages_end_pattern.sub(pages_str + r'\1', base_xml)
-    return merged_xml
-# -------------------- UI --------------------
 with st.expander("사용법", expanded=True):
-    st.markdown("""
-- **템플릿은 1페이지에 N개 라벨**이 있는 표 형태를 사용합니다.
-- **박스 수가 N개를 초과하면 새 페이지가 자동 추가**됩니다.
-- **다중 줄(목록/제목)은 부모 문단을 여러 문단으로 교체**하여 겹침 없이 표시합니다.
-- **업무명은 단일줄로 처리**하여 원본 폰트 스타일을 유지합니다.
-- **생성된 HWPX 파일의 읽기전용 속성이 해제**되어 편집 가능합니다.
-""")
 tpl = st.file_uploader("📄 HWPX 템플릿 업로드", type=["hwpx"])
 n_per_page = st.number_input("템플릿의 라벨 세트 개수(한 페이지 N개)", 1, 12, 3, 1)
-data = st.file_uploader("📊 데이터 업로드 (Excel/CSV)", type=["xlsx","xls","csv"])
 if tpl and data:
     tpl_bytes = tpl.read()
     df = pd.read_csv(data) if data.name.lower().endswith(".csv") else pd.read_excel(data)
     if "박스번호" not in df.columns:
-        st.error("❌ 필수 컬럼 '박스번호'가 없습니다."); st.stop()
     st.success("✅ 위치 매핑 완료 (엑셀 측)")
     st.dataframe(df.head(10), use_container_width=True)
@@ -446,66 +453,81 @@ if tpl and data:
     # 1페이지 매핑 프리뷰
     st.subheader("🧪 1페이지 매핑 프리뷰")
-    keys = ["박스번호","종료연도","보존기간","단위업무","기록물철","목록","제목","업무명"]
     mapping_preview = {}
     for i in range(int(n_per_page)):
         if i < len(records):
             r = records[i]
-            mapping_preview.update({
-                f"박스번호{i+1}": r.get("박스번호",""),
-                f"종료연도{i+1}": r.get("생산연도",""),
-                f"보존기간{i+1}": r.get("보존기간",""),
-                f"단위업무{i+1}": r.get("단위업무",""),
-                f"기록물철{i+1}": r.get("기록물철",""),
-                f"목록{i+1}": r.get("목록",""),
-                f"제목{i+1}": r.get("제목",""),
-                f"업무명{i+1}": r.get("제목",""),  # 템플릿이 '업무명1'을 쓰는 경우 대응
-            })
         else:
-            for k in keys: mapping_preview[f"{k}{i+1}"] = ""
-    st.dataframe(pd.DataFrame([{"키":k, "값 앞부분":str(v)[:120]} for k,v in sorted(mapping_preview.items())]),
-                 use_container_width=True, height=320)
-    if st.button("🚀 통합 라벨 생성 (단일 HWPX 파일)"):
         pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
         debug_all = []
-        # 첫 페이지로 시작
-        merged_hwpx = None
         for p in range(pages):
-            chunk = records[p*int(n_per_page):(p+1)*int(n_per_page)]
-            mapping = {}
             for i in range(int(n_per_page)):
                 if i < len(chunk):
                     r = chunk[i]
-                    mapping[f"박스번호{i+1}"] = r.get("박스번호","")
-                    mapping[f"종료연도{i+1}"] = r.get("생산연도","")
-                    mapping[f"보존기간{i+1}"] = r.get("보존기간","")
-                    mapping[f"단위업무{i+1}"] = r.get("단위업무","")
-                    mapping[f"기록물철{i+1}"] = r.get("기록물철","")
-                    mapping[f"목록{i+1}"]   = r.get("목록","")
-                    title_val = r.get("제목","")
-                    mapping[f"제목{i+1}"]   = title_val
                     mapping[f"업무명{i+1}"] = title_val
                 else:
-                    for k in keys: mapping[f"{k}{i+1}"] = ""
             if p == 0:
-                # 첫 페이지: 템플릿 기반으로 생성
                 merged_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
             else:
-                # 두 번째 페이지부터: 기존 HWPX에 페이지 추가
                 page_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
                 merged_hwpx = merge_hwpx_pages(merged_hwpx, page_hwpx)
-            debug_all.append({"page": p+1, "stats": dbg})
-        # 박스번호 범위로 파일명 생성
         first_box = records[0].get("박스번호", "0000") if records else "0000"
         last_box = records[-1].get("박스번호", "0000") if records else "0000"
-        filename = f"labels_{first_box}to{last_box}.hwpx" if first_box != last_box else f"labels_{first_box}.hwpx"
-        st.download_button("⬇️ 통합 HWPX 다운로드", data=merged_hwpx, file_name=filename, mime="application/zip")
-        st.download_button("⬇️ 디버그(JSON)", data=json.dumps(debug_all, ensure_ascii=False, indent=2),
-                           file_name="debug.json", mime="application/json")

+# app.py
+import io
+import json
+import html
+import re
+import zipfile
 from typing import Dict, Tuple
+import pandas as pd
+import streamlit as st
+# ====================== Streamlit ======================
+st.set_page_config(page_title="📦 박스라벨(HWPX) — 통합 파일 출력", layout="wide")
+st.title("📦 박스라벨 자동 생성기 — HWPX 필드·토큰·문단 완전 치환 + 다페이지 통합 출력")
+# ====================== 데이터 유틸 ======================
 def _year_range(series: pd.Series) -> str:
     s = series.astype(str).fillna("")
     v = s[~s.isin(["", "0", "0000"])]
+    if v.empty:
+        return "0000-0000"
     nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
+    if nums.empty:
+        return "0000-0000"
     return f"{nums.min():04d}-{nums.max():04d}"
 def build_rows(df: pd.DataFrame) -> pd.DataFrame:
+    """박스번호 기준 대표 메타 + 목록(여러 줄) + 생산연도 범위 생성"""
     df = df.copy()
     df["박스번호"] = df["박스번호"].astype(str).str.zfill(4)
     if "제목" in df.columns:
         df["제목"] = df["제목"].astype(str)
+    # 생산연도(범위)
     if "종료연도" in df.columns:
         yr = df.groupby("박스번호")["종료연도"].apply(_year_range).reset_index()
         yr.columns = ["박스번호", "생산연도"]
     has_mgmt = "관리번호" in df.columns
     lists = []
     for b, g in df.groupby("박스번호"):
+        lines = [
+            f"- {r['관리번호']} {r.get('제목','')}" if has_mgmt else f"- {r.get('제목','')}"
+            for _, r in g.iterrows()
+        ]
         lists.append({"박스번호": b, "목록": "\r\n".join(lines)})
     list_df = pd.DataFrame(lists)
     # 대표 메타
+    meta_cols = ["박스번호", "종료연도", "보존기간", "단위업무", "기록물철", "제목"]
     meta_exist = [c for c in meta_cols if c in df.columns]
+    if meta_exist:
+        meta = df.groupby("박스번호", as_index=False).first()[meta_exist]
+    else:
+        meta = pd.DataFrame({"박스번호": df["박스번호"].unique()})
     merged = meta.merge(list_df, on="박스번호", how="left").merge(yr, on="박스번호", how="left")
     return merged
+# ====================== 치환 유틸 (인라인/문단) ======================
+# fieldBegin/fieldEnd 쌍 (접두어 와일드카드)
 FIELD_PAIR_RE_TMPL = (
     r'<(?P<fprefix>[a-zA-Z0-9_]+):fieldBegin\b[^>]*\bname="{name}"[^>]*/>'
     r'(.*?)'
     r'<(?P=fprefix):fieldEnd\b[^>]*/>'
 )
+# 토큰 포맷
 TOKEN_FMT = "{{{{{key}}}}}"
+# 문단 탐색용
 PARA_RE = re.compile(
     r'<(?P<pprefix>[a-zA-Z0-9_]+):p(?P<pattrs>[^>]*)>(?P<pbody>.*?)</(?P=pprefix):p>',
+    re.DOTALL,
 )
+# run / t ���드 추출용
+RUN_RE = re.compile(
+    r'<(?P<prefix>[a-zA-Z0-9_]+):run(?P<rattrs>[^>]*)>(?P<body>.*?)</(?P=prefix):run>',
+    re.DOTALL,
+)
+TP_RE = re.compile(
+    r'<(?P<prefix>[a-zA-Z0-9_]+):t[^>]*>(?P<text>.*?)</(?P=prefix):t>',
+    re.DOTALL,
+)
+def _clone_run_with_text(run_xml: str, text: str) -> str:
+    """기존 run의 rPr/속성 보존, t 내용만 교체"""
+    def _repl_t(m):
+        return f"<{m.group('prefix')}:t>{html.escape(text)}</{m.group('prefix')}:t>"
+    if TP_RE.search(run_xml):
+        return TP_RE.sub(_repl_t, run_xml, count=1)
+    # t 노드 없으면 기본 삽입
+    m = RUN_RE.search(run_xml)
+    if not m:
+        return f"<hp:run><hp:t>{html.escape(text)}</hp:t></hp:run>"
+    prefix = m.group("prefix")
+    return f"<{prefix}:run><{prefix}:t>{html.escape(text)}</{prefix}:t></{prefix}:run>"
+def _extract_ppr_and_template_run(pbody: str):
+    """문단 pPr(있으면)과 첫 번째 run 원형을 추출"""
+    ppr_match = re.search(r'<(?P<prefix>[a-zA-Z0-9_]+):pPr\b[^>]*/>', pbody)
+    ppr_xml = ppr_match.group(0) if ppr_match else ""
+    run_match = RUN_RE.search(pbody)
+    if run_match:
+        template_run = run_match.group(0)  # rPr 포함
+    else:
+        template_run = "<hp:run><hp:t></hp:t></hp:run>"
+    return ppr_xml, template_run
+def _make_para_from_templates(pprefix: str, pattrs: str, ppr_xml: str, template_run: str, text: str) -> str:
+    cloned_run = _clone_run_with_text(template_run, text)
+    return f"<{pprefix}:p{pattrs}>{ppr_xml}{cloned_run}</{pprefix}:p>"
 def _split_lines(val) -> list:
+    if val is None:
+        return [""]
+    return str(val).replace("\r\n", "\n").split("\n")
 def _replace_para_multiline(xml: str, key: str, value: str, dbg: dict) -> str:
     """
+    key가 포함된 '부모 문단 전체'를 값의 각 줄을 담은 다수 문단으로 교체.
+    원 문단 pPr/rPr 스타일 유지.
     """
     pair_pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(key)), re.DOTALL)
+    tnode_pat = re.compile(
+        rf'<(?P<p>[a-zA-Z0-9_]+):t[^>]*>[^<]*{re.escape(key)}[^<]*</(?P=p):t>',
+        re.DOTALL,
+    )
     token_str = TOKEN_FMT.format(key=key)
     def para_repl(m):
         lines = _split_lines(value)
         pprefix = m.group("pprefix")
+        pattrs = m.group("pattrs")
+        ppr_xml, template_run = _extract_ppr_and_template_run(body)
+        new_paras = "".join(_make_para_from_templates(pprefix, pattrs, ppr_xml, template_run, ln) for ln in lines)
         dbg["para_hits"][key] = dbg["para_hits"].get(key, 0) + 1
         return new_paras
     xml2 = PARA_RE.sub(para_repl, xml)
     if xml2 != xml:
+        dbg["files_touched"] = True
     return xml2
 def _runs_plain(text: str) -> str:
     return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
 def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
     changed_any = False
+    # (A) 다중 줄 키는 "문단 교체"로 먼저 처리 (목록/제목/업무명 모두 줄바꿈 강제)
+    multi_key = re.compile(r"^(목록|list|제목|업무명)\d+$", re.IGNORECASE)
     for k, v in mapping.items():
         if multi_key.match(k):
             xml_new = _replace_para_multiline(xml, k, v, dbg)
                 xml = xml_new
                 changed_any = True
+    # (B) 인라인 필드쌍 치환 — 단일 줄만
     for k, v in mapping.items():
         if multi_key.match(k):
             continue
             xml = xml_new
             changed_any = True
+    # (C) 순수 텍스트 자리표시자(<*:t>키</*:t>) 치환 — 단일 줄만
     tnode_all = re.compile(
         r'(<(?P<prefix>[a-zA-Z0-9_]+):t[^>]*>)([^<]*?)</(?P=prefix):t>',
+        re.DOTALL,
     )
     for k, v in mapping.items():
         if multi_key.match(k):
             continue
         def repl_tnode(m):
             text_node = m.group(3)
             if k not in text_node:
                 return m.group(0)
             new_text = html.escape(text_node.replace(k, "" if v is None else str(v)))
             return f"{m.group(1)}{new_text}</{m.group('prefix')}:t>"
         xml2 = tnode_all.sub(repl_tnode, xml)
         if xml2 != xml:
             dbg["text_hits"][k] = dbg["text_hits"].get(k, 0) + 1
             xml = xml2
             changed_any = True
+    # (D) 토큰 치환 — 단일 줄만
     for k, v in mapping.items():
         if multi_key.match(k):
             continue
         dbg["files_touched"] = True
     return xml
+def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str, str]) -> Tuple[bytes, dict]:
+    """HWPX(zip) 내부 모든 XML에 치환 적용"""
+    import time
+    dbg = {"para_hits": {}, "field_hits": {}, "text_hits": {}, "token_hits": {}, "touched_files": []}
     zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
     out_buf = io.BytesIO()
     zout = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
     now = time.localtime()
+    # mimetype: 무압축 + 맨앞
     names = zin.namelist()
     if "mimetype" in names:
         zi = zipfile.ZipInfo("mimetype")
         zi.compress_type = zipfile.ZIP_STORED
+        zi.external_attr = 0o100666 << 16
+        zi.create_system = 0
         zi.date_time = now[:6]
         zout.writestr(zi, zin.read("mimetype"))
             try:
                 s = data.decode("utf-8", errors="ignore")
                 before = s
+                s = _apply_to_xml(
+                    s,
+                    mapping,
+                    {
+                        "para_hits": dbg["para_hits"],
+                        "field_hits": dbg["field_hits"],
+                        "text_hits": dbg["text_hits"],
+                        "token_hits": dbg["token_hits"],
+                        "files_touched": False,
+                    },
+                )
                 if s != before:
                     dbg["touched_files"].append(e.filename)
                 data = s.encode("utf-8")
             except Exception:
                 pass
         zi = zipfile.ZipInfo(e.filename)
         zi.compress_type = zipfile.ZIP_DEFLATED
+        zi.external_attr = 0o100666 << 16
+        zi.create_system = 0
+        zi.date_time = now[:6]
+        zi.flag_bits = 0
         zout.writestr(zi, data)
     zout.close()
     zin.close()
     return out_buf.getvalue(), dbg
+# ====================== 섹션/페이지 병합 (단일 HWPX로 출력) ======================
 def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
+    """두 HWPX를 1개로 병합: pages 목록과 본문 문단까지 합침"""
     import time
     base_zip = zipfile.ZipFile(io.BytesIO(base_hwpx), "r")
     add_zip = zipfile.ZipFile(io.BytesIO(additional_hwpx), "r")
     out_buf = io.BytesIO()
     out_zip = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
     now = time.localtime()
+    # mimetype
     if "mimetype" in base_zip.namelist():
         zi = zipfile.ZipInfo("mimetype")
         zi.compress_type = zipfile.ZIP_STORED
         zi.external_attr = 0o100666 << 16
         zi.create_system = 0
         zi.date_time = now[:6]
         out_zip.writestr(zi, base_zip.read("mimetype"))
+    # 섹션 XML 수집
+    base_sections, add_sections = {}, {}
+    for fn in base_zip.namelist():
+        if fn == "mimetype":
             continue
+        if fn.startswith("Contents/section") and fn.endswith(".xml"):
+            base_sections[fn] = base_zip.read(fn).decode("utf-8", errors="ignore")
         else:
+            zi = zipfile.ZipInfo(fn)
+            zi.compress_type = zipfile.ZIP_DEFLATED
+            zi.external_attr = 0o100666 << 16
+            zi.create_system = 0
+            zi.date_time = now[:6]
+            zi.flag_bits = 0
+            out_zip.writestr(zi, base_zip.read(fn))
+    for fn in add_zip.namelist():
+        if fn.startswith("Contents/section") and fn.endswith(".xml"):
+            add_sections[fn] = add_zip.read(fn).decode("utf-8", errors="ignore")
+    # 섹션 병합
+    merged_sections = merge_sections(base_sections, add_sections)
+    # 결과 기록
+    for fn, content in merged_sections.items():
+        zi = zipfile.ZipInfo(fn)
         zi.compress_type = zipfile.ZIP_DEFLATED
         zi.external_attr = 0o100666 << 16
         zi.create_system = 0
         zi.date_time = now[:6]
         zi.flag_bits = 0
         out_zip.writestr(zi, content.encode("utf-8"))
     base_zip.close()
     add_zip.close()
     out_zip.close()
     out_buf.seek(0)
     return out_buf.getvalue()
 def merge_sections(base_sections: dict, add_sections: dict) -> dict:
     merged = base_sections.copy()
+    for fn, add_xml in add_sections.items():
+        if fn in merged:
+            merged[fn] = merge_section_content(merged[fn], add_xml)
         else:
+            merged[fn] = add_xml
     return merged
 def merge_section_content(base_xml: str, add_xml: str) -> str:
+    """
+    1) <*:pages>에 새 page 엔트리 추가 (self-closing/일반 모두)
+    2) 본문(<*:p>) 끝에 pageBreak + 추가 문단 붙이기
+    """
+    # pages 목록 합치기
+    pages_block_re = re.compile(
+        r'<(?P<pfx>[a-zA-Z0-9_]+):pages\b[^>]*>(?P<body>.*?)</(?P=pfx):pages>',
+        re.DOTALL,
+    )
+    m_base_pages = pages_block_re.search(base_xml)
+    m_add_pages = pages_block_re.search(add_xml)
+    if m_base_pages and m_add_pages:
+        pfx = m_base_pages.group("pfx")
+        body_base = m_base_pages.group("body")
+        body_add = m_add_pages.group("body")
+        add_entries = re.findall(
+            rf'<{pfx}:page\b[^>]*/>|<{pfx}:page\b[^>]*>.*?</{pfx}:page>',
+            body_add,
+            re.DOTALL,
+        )
+        if add_entries:
+            new_body = body_base + "".join(add_entries)
+            base_xml = (
+                base_xml[: m_base_pages.start("body")]
+                + new_body
+                + base_xml[m_base_pages.end("body") :]
+            )
+    # 본문 문단 합치기
+    para_re = re.compile(
+        r'<(?P<pfx>[a-zA-Z0-9_]+):p\b[^>]*>.*?</(?P=pfx):p>', re.DOTALL
     )
+    pfx_in_base = None
+    m0 = para_re.search(base_xml)
+    if m0:
+        pfx_in_base = m0.group("pfx")
+    add_paras = [m.group(0) for m in para_re.finditer(add_xml)]
+    if add_paras and pfx_in_base:
+        pagebreak_para = (
+            f'<{pfx_in_base}:p><{pfx_in_base}:run>'
+            f'<{pfx_in_base}:pageBreak/>'
+            f'</{pfx_in_base}:run></{pfx_in_base}:p>'
+        )
+        section_end_re = re.compile(rf'</{pfx_in_base}:section>')
+        m_end = section_end_re.search(base_xml)
+        if m_end:
+            insert_at = m_end.start()
+            base_xml = (
+                base_xml[:insert_at] + pagebreak_para + "".join(add_paras) + base_xml[insert_at:]
+            )
+    return base_xml
+# ====================== UI ======================
 with st.expander("사용법", expanded=True):
+    st.markdown(
+        """
+- **다중 줄(목록/제목/업무명)** 은 원 문단 스타일을 유지한 채 **부모 문단을 줄 수만큼 복제**하여 겹침 없이 표시합니다.
+- 박스가 많아도 **마지막에 한 개의 HWPX 파일**로 통합해 내려줍니다.
+- 템플릿은 반드시 **.HWPX** 여야 합니다. (.HWP 불가)
+        """
+    )
 tpl = st.file_uploader("📄 HWPX 템플릿 업로드", type=["hwpx"])
 n_per_page = st.number_input("템플릿의 라벨 세트 개수(한 페이지 N개)", 1, 12, 3, 1)
+data = st.file_uploader("📊 데이터 업로드 (Excel/CSV)", type=["xlsx", "xls", "csv"])
 if tpl and data:
     tpl_bytes = tpl.read()
     df = pd.read_csv(data) if data.name.lower().endswith(".csv") else pd.read_excel(data)
     if "박스번호" not in df.columns:
+        st.error("❌ 필수 컬럼 '박스번호'가 없습니다.")
+        st.stop()
     st.success("✅ 위치 매핑 완료 (엑셀 측)")
     st.dataframe(df.head(10), use_container_width=True)
     # 1페이지 매핑 프리뷰
     st.subheader("🧪 1페이지 매핑 프리뷰")
+    keys = ["박스번호", "종료연도", "보존기간", "단위업무", "기록물철", "목록", "제목", "업무명"]
     mapping_preview = {}
     for i in range(int(n_per_page)):
         if i < len(records):
             r = records[i]
+            mapping_preview.update(
+                {
+                    f"박스번호{i+1}": r.get("박스번호", ""),
+                    f"종료연도{i+1}": r.get("생산연도", ""),
+                    f"보존기간{i+1}": r.get("보존기간", ""),
+                    f"단위업무{i+1}": r.get("단위업무", ""),
+                    f"기록물철{i+1}": r.get("기록물철", ""),
+                    f"목록{i+1}": r.get("목록", ""),
+                    f"제목{i+1}": r.get("제목", ""),
+                    f"업무명{i+1}": r.get("제목", ""),  # 템플릿이 '업무명X'을 사용할 수 있어 동시 매핑
+                }
+            )
         else:
+            for k in keys:
+                mapping_preview[f"{k}{i+1}"] = ""
+    st.dataframe(
+        pd.DataFrame([{"키": k, "값 앞부분": str(v)[:120]} for k, v in sorted(mapping_preview.items())]),
+        use_container_width=True,
+        height=320,
+    )
+    if st.button("🚀 통합 HWPX 생성 (한 파일로 다운로드)"):
         pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
         debug_all = []
+        merged_hwpx: bytes | None = None
         for p in range(pages):
+            chunk = records[p * int(n_per_page) : (p + 1) * int(n_per_page)]
+            mapping: Dict[str, str] = {}
             for i in range(int(n_per_page)):
                 if i < len(chunk):
                     r = chunk[i]
+                    mapping[f"박스번호{i+1}"] = r.get("박스번호", "")
+                    mapping[f"종료연도{i+1}"] = r.get("생산연도", "")
+                    mapping[f"보존기간{i+1}"] = r.get("보존기간", "")
+                    mapping[f"단위업무{i+1}"] = r.get("단위업무", "")
+                    mapping[f"기록물철{i+1}"] = r.get("기록물철", "")
+                    mapping[f"목록{i+1}"] = r.get("목록", "")
+                    title_val = r.get("제목", "")
+                    mapping[f"제목{i+1}"] = title_val
                     mapping[f"업무명{i+1}"] = title_val
                 else:
+                    for k in keys:
+                        mapping[f"{k}{i+1}"] = ""
             if p == 0:
                 merged_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
             else:
                 page_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
                 merged_hwpx = merge_hwpx_pages(merged_hwpx, page_hwpx)
+            debug_all.append({"page": p + 1, "stats": dbg})
+        # 파일명
         first_box = records[0].get("박스번호", "0000") if records else "0000"
         last_box = records[-1].get("박스번호", "0000") if records else "0000"
+        filename = (
+            f"labels_{first_box}to{last_box}.hwpx" if first_box != last_box else f"labels_{first_box}.hwpx"
+        )
+        st.download_button(
+            "⬇️ 통합 HWPX 다운로드",
+            data=merged_hwpx,
+            file_name=filename,
+            mime="application/vnd.hancom.hwpx",
+        )
+        st.download_button(
+            "⬇️ 디버그(JSON)",
+            data=json.dumps(debug_all, ensure_ascii=False, indent=2),
+            file_name="debug.json",
+            mime="application/json",
+        )