dohyune commited on
Commit
c0ef99a
ยท
verified ยท
1 Parent(s): 8329462

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -244
app.py CHANGED
@@ -1,39 +1,27 @@
1
- # app.py โ€” ์ „ ์ฝ”๋“œ ๊ธฐ๋ฐ˜(์•ˆ์ • ์น˜ํ™˜) + lxml ๊ธฐ๋ฐ˜ "๋‹จ์ผ HWPX ๋ณ‘ํ•ฉ" ์ถœ๋ ฅ
2
- import io
3
- import json
4
- import html
5
- import re
6
- import zipfile
7
- from typing import Dict, Tuple, List, Optional
8
-
9
- import pandas as pd
10
  import streamlit as st
 
 
 
11
 
12
- # =============== Streamlit ===============
13
- st.set_page_config(page_title="๐Ÿ“ฆ๋ณด์กด๋ฐ•์Šค ๋ผ๋ฒจ ์ƒ์„ฑ๊ธฐ", layout="wide")
14
- st.title("๐Ÿ“ฆ ๋ณด์กด๋ฐ•์Šค ๋ผ๋ฒจ ์ƒ์„ฑ๊ธฐ ๐Ÿ“ฆ")
15
-
16
 
17
- # =============== ๋ฐ์ดํ„ฐ ์œ ํ‹ธ ===============
18
  def _year_range(series: pd.Series) -> str:
19
  s = series.astype(str).fillna("")
20
  v = s[~s.isin(["", "0", "0000"])]
21
- if v.empty:
22
- return "0000-0000"
23
  nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
24
- if nums.empty:
25
- return "0000-0000"
26
  return f"{nums.min():04d}-{nums.max():04d}"
27
 
28
-
29
  def build_rows(df: pd.DataFrame) -> pd.DataFrame:
30
- """๋ฐ•์Šค๋ฒˆํ˜ธ ๊ธฐ์ค€ ๋Œ€ํ‘œ ๋ฉ”ํƒ€ + ๋ชฉ๋ก(์—ฌ๋Ÿฌ ์ค„) + ์ƒ์‚ฐ์—ฐ๋„ ๋ฒ”์œ„ ์ƒ์„ฑ"""
31
  df = df.copy()
32
  df["๋ฐ•์Šค๋ฒˆํ˜ธ"] = df["๋ฐ•์Šค๋ฒˆํ˜ธ"].astype(str).str.zfill(4)
33
  if "์ œ๋ชฉ" in df.columns:
34
  df["์ œ๋ชฉ"] = df["์ œ๋ชฉ"].astype(str)
35
 
36
- # ์ƒ์‚ฐ์—ฐ๋„(๋ฒ”์œ„)
37
  if "์ข…๋ฃŒ์—ฐ๋„" in df.columns:
38
  yr = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ")["์ข…๋ฃŒ์—ฐ๋„"].apply(_year_range).reset_index()
39
  yr.columns = ["๋ฐ•์Šค๋ฒˆํ˜ธ", "์ƒ์‚ฐ์—ฐ๋„"]
@@ -44,57 +32,116 @@ def build_rows(df: pd.DataFrame) -> pd.DataFrame:
44
  has_mgmt = "๊ด€๋ฆฌ๋ฒˆํ˜ธ" in df.columns
45
  lists = []
46
  for b, g in df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ"):
47
- lines = [
48
- f"- {r['๊ด€๋ฆฌ๋ฒˆํ˜ธ']} {r.get('์ œ๋ชฉ','')}" if has_mgmt else f"- {r.get('์ œ๋ชฉ','')}"
49
- for _, r in g.iterrows()
50
- ]
51
  lists.append({"๋ฐ•์Šค๋ฒˆํ˜ธ": b, "๋ชฉ๋ก": "\r\n".join(lines)})
52
  list_df = pd.DataFrame(lists)
53
 
54
  # ๋Œ€ํ‘œ ๋ฉ”ํƒ€
55
- meta_cols = ["๋ฐ•์Šค๋ฒˆํ˜ธ", "์ข…๋ฃŒ์—ฐ๋„", "๋ณด์กด๊ธฐ๊ฐ„", "๋‹จ์œ„์—…๋ฌด", "๊ธฐ๋ก๋ฌผ์ฒ ", "์ œ๋ชฉ"]
56
  meta_exist = [c for c in meta_cols if c in df.columns]
57
- if meta_exist:
58
- meta = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ", as_index=False).first()[meta_exist]
59
- else:
60
- meta = pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": df["๋ฐ•์Šค๋ฒˆํ˜ธ"].unique()})
61
 
62
  merged = meta.merge(list_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left").merge(yr, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left")
63
  return merged
64
 
65
-
66
- # =============== ์น˜ํ™˜ ์œ ํ‹ธ (์ „ ์ฝ”๋“œ์˜ ์•ˆ์ • ๋ฒ„์ „) ===============
67
  FIELD_PAIR_RE_TMPL = (
68
- r'<(?P<prefix>[a-zA-Z0-9_]+):fieldBegin\b[^>]*\bname="{name}"[^>]*/>'
69
  r'(.*?)'
70
- r'<(?P=prefix):fieldEnd\b[^>]*/>'
71
  )
72
  TOKEN_FMT = "{{{{{key}}}}}"
73
 
 
 
 
 
 
74
 
75
- def _run_for_plain(text: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
77
 
78
-
79
- def _run_for_list(text: str) -> str:
80
- if text is None:
81
- return ""
82
- lines = str(text).replace("\r\n", "\n").split("\n")
83
- parts = []
84
- for i, ln in enumerate(lines):
85
- if i > 0:
86
- parts.append("<hp:lineBreak/>")
87
- parts.append(f"<hp:run><hp:t>{html.escape(ln)}</hp:t></hp:run>")
88
- return "".join(parts)
89
-
90
-
91
  def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
92
  changed_any = False
93
 
94
- # 1) ํ•„๋“œ์Œ ์น˜ํ™˜ (์ ‘๋‘์–ด ์™€์ผ๋“œ์นด๋“œ)
 
 
 
 
 
 
 
 
 
95
  for k, v in mapping.items():
96
- is_list = bool(re.match(r"^(๋ชฉ๋ก|list)\d+$", k, re.IGNORECASE))
97
- replacement = _run_for_list(v) if is_list else _run_for_plain(v)
 
98
  pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(k)), re.DOTALL)
99
  xml_new, n = pat.subn(replacement, xml)
100
  if n:
@@ -102,16 +149,33 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
102
  xml = xml_new
103
  changed_any = True
104
 
105
- # 2) ํ† ํฐ ์น˜ํ™˜ ({{ํ‚ค}})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  for k, v in mapping.items():
 
 
107
  tok = TOKEN_FMT.format(key=k)
108
  if tok in xml:
109
- rep = (
110
- _run_for_list(v)
111
- if re.match(r"^(๋ชฉ๋ก|list)\d+$", k, re.IGNORECASE)
112
- else html.escape("" if v is None else str(v))
113
- )
114
- xml = xml.replace(tok, rep)
115
  dbg["token_hits"][k] = dbg["token_hits"].get(k, 0) + 1
116
  changed_any = True
117
 
@@ -119,21 +183,25 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
119
  dbg["files_touched"] = True
120
  return xml
121
 
122
-
123
- def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str, str]) -> Tuple[bytes, dict]:
124
- """HWPX(zip) ๋‚ด๋ถ€ ๋ชจ๋“  XML์— ์น˜ํ™˜ ์ ์šฉ โ€” ์•ˆ์ • ๋ฒ„์ „"""
125
- import time
126
-
127
- dbg = {"field_hits": {}, "token_hits": {}, "touched_files": []}
128
  zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
129
  out_buf = io.BytesIO()
130
- zout = zipfile.ZipFile(out_buf, "w")
 
 
 
131
 
132
  # mimetype ๋ฌด์••์ถ• + ๋งจ์•ž
133
  names = zin.namelist()
134
  if "mimetype" in names:
135
  zi = zipfile.ZipInfo("mimetype")
136
  zi.compress_type = zipfile.ZIP_STORED
 
 
 
 
137
  zout.writestr(zi, zin.read("mimetype"))
138
 
139
  for e in zin.infolist():
@@ -144,18 +212,22 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str, str]) -> Tuple[bytes,
144
  try:
145
  s = data.decode("utf-8", errors="ignore")
146
  before = s
147
- s = _apply_to_xml(
148
- s,
149
- mapping,
150
- {"field_hits": dbg["field_hits"], "token_hits": dbg["token_hits"], "files_touched": False},
151
- )
152
  if s != before:
153
  dbg["touched_files"].append(e.filename)
154
  data = s.encode("utf-8")
155
  except Exception:
156
  pass
 
 
157
  zi = zipfile.ZipInfo(e.filename)
158
  zi.compress_type = zipfile.ZIP_DEFLATED
 
 
 
 
159
  zout.writestr(zi, data)
160
 
161
  zout.close()
@@ -163,141 +235,27 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str, str]) -> Tuple[bytes,
163
  zin.close()
164
  return out_buf.getvalue(), dbg
165
 
166
-
167
- # =============== ๋‹จ์ผ HWPX ๋ณ‘ํ•ฉ(lxml) ===============
168
- def merge_filled_hwpx_list_to_single(hwpx_list: list[bytes]) -> bytes:
169
- """
170
- ์—ฌ๋Ÿฌ ๊ฐœ์˜ '์ด๋ฏธ ์ฑ„์›Œ์ง„' HWPX ๋ฐ”์ดํŠธ๋“ค์„ ๋ฐ›์•„
171
- ์ฒซ ๋ฒˆ์งธ ๋ฌธ์„œ์— ๋’ค ๋ฌธ์„œ์˜ ํŽ˜์ด์ง€์™€ ๋ณธ๋ฌธ ๋ธ”๋ก(<p> ๋“ฑ)์„ ์•ˆ์ „ํ•˜๊ฒŒ ๋ณ‘ํ•ฉํ•œ๋‹ค.
172
- XPath๋Š” local-name()์„ ์‚ฌ์šฉํ•˜์—ฌ ๋„ค์ž„์ŠคํŽ˜์ด์Šค ์ด์Šˆ๋ฅผ ํšŒํ”ผํ•œ๋‹ค.
173
- """
174
- from lxml import etree
175
- import io, zipfile
176
-
177
- if not hwpx_list:
178
- raise ValueError("๋ณ‘ํ•ฉํ•  HWPX๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
179
-
180
- # ์ฒซ ๋ฌธ์„œ๋ฅผ ๋ฒ ์ด์Šค๋กœ ํŽผ์น˜๊ธฐ
181
- base_zip = zipfile.ZipFile(io.BytesIO(hwpx_list[0]), "r")
182
- base_files = {name: base_zip.read(name) for name in base_zip.namelist()}
183
- base_zip.close()
184
-
185
- # ์„น์…˜ ํŒŒ์ผ ํ›„๋ณด
186
- section_names = [n for n in base_files.keys() if n.startswith("Contents/section") and n.endswith(".xml")]
187
- if not section_names:
188
- return hwpx_list[0]
189
-
190
- parser = etree.XMLParser(remove_blank_text=False, recover=True)
191
-
192
- def first_or_none(nodes):
193
- return nodes[0] if nodes else None
194
-
195
- def findall_by_local(root, name):
196
- # ๋ชจ๋“  ๋„ค์ž„์ŠคํŽ˜์ด์Šค๋ฅผ ๋ฌด์‹œํ•˜๊ณ  ๋กœ์ปฌ ์ด๋ฆ„์œผ๋กœ๋งŒ ํƒ์ƒ‰
197
- return root.xpath(f".//*[local-name()='{name}']")
198
-
199
- def findfirst_by_local(root, name):
200
- return first_or_none(findall_by_local(root, name))
201
-
202
- # ๋ฌธ์„œ๋ณ„๋กœ ์„น์…˜ ํ•ฉ์น˜๊ธฐ
203
- for idx in range(1, len(hwpx_list)):
204
- add_zip = zipfile.ZipFile(io.BytesIO(hwpx_list[idx]), "r")
205
- add_files = {name: add_zip.read(name) for name in add_zip.namelist()}
206
- add_zip.close()
207
-
208
- for sec in section_names:
209
- if sec not in add_files or sec not in base_files:
210
- continue
211
-
212
- base_xml = base_files[sec].decode("utf-8", errors="ignore")
213
- add_xml = add_files[sec].decode("utf-8", errors="ignore")
214
-
215
- base_root = etree.fromstring(base_xml.encode("utf-8"), parser=parser)
216
- add_root = etree.fromstring(add_xml.encode("utf-8"), parser=parser)
217
-
218
- # 1) <pages> ๋ณ‘ํ•ฉ: add์˜ <page>๋“ค์„ base์˜ <pages> ๋’ค์— ์ถ”๊ฐ€
219
- base_pages = findfirst_by_local(base_root, "pages")
220
- add_pages = findfirst_by_local(add_root, "pages")
221
- if base_pages is not None and add_pages is not None:
222
- for pg in list(add_pages): # <page> ์š”์†Œ๋“ค
223
- base_pages.append(pg)
224
-
225
- # 2) ๋ณธ๋ฌธ ๋ณ‘ํ•ฉ: <section> ์•„๋ž˜ ๋ธ”๋ก๋“ค์„ ์ด์–ด๋ถ™์ž„ + ์‚ฌ์ด์— pageBreak ๋ฌธ๋‹จ ์‚ฝ์ž…
226
- base_section = findfirst_by_local(base_root, "section")
227
- add_section = findfirst_by_local(add_root, "section")
228
- if base_section is not None and add_section is not None:
229
- # hp ๋„ค์ž„์ŠคํŽ˜์ด์Šค URI ์ถ”์ถœ (๏ฟฝ๏ฟฝ์œผ๋ฉด ํƒœ๊ทธ์—์„œ ๋ฝ‘์•„๋‚ด๊ธฐ)
230
- def ns_of(el):
231
- if el is None or el.tag is None:
232
- return None
233
- if el.tag.startswith("{"):
234
- return el.tag.split("}")[0][1:]
235
- return None
236
-
237
- # pages๋‚˜ section์˜ ํƒœ๊ทธ์—์„œ hp ns ์ถ”์ •
238
- hp_ns = ns_of(base_pages) or ns_of(base_section)
239
- def E(tag):
240
- return etree.Element(f"{{{hp_ns}}}{tag}") if hp_ns else etree.Element(tag)
241
-
242
- # ํŽ˜์ด์ง€ ๊ตฌ๋ถ„์šฉ pageBreak ๋ฌธ๋‹จ
243
- pagebreak_p = E("p")
244
- run = E("run")
245
- br = E("pageBreak")
246
- run.append(br)
247
- pagebreak_p.append(run)
248
- base_section.append(pagebreak_p)
249
-
250
- # add_section์˜ ์ž์‹ ๋ธ”๋ก๋“ค(๋ณดํ†ต p)์„ ๊ทธ๋Œ€๋กœ ์ด์–ด๋ถ™์ž„
251
- for child in list(add_section):
252
- base_section.append(child)
253
-
254
- # ์ง๋ ฌํ™”ํ•ด์„œ ๋˜๋Œ๋ ค์“ฐ๊ธฐ
255
- base_files[sec] = etree.tostring(base_root, encoding="utf-8", xml_declaration=False)
256
-
257
- # ์ตœ์ข… zip ๋‹ค์‹œ ๋งŒ๋“ค๊ธฐ (mimetype ๋ฌด์••์ถ• + ๋งจ์•ž)
258
- out_buf = io.BytesIO()
259
- zout = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
260
-
261
- if "mimetype" in base_files:
262
- zi = zipfile.ZipInfo("mimetype"); zi.compress_type = zipfile.ZIP_STORED
263
- zout.writestr(zi, base_files["mimetype"])
264
- del base_files["mimetype"]
265
-
266
- for name, data in base_files.items():
267
- if isinstance(data, str):
268
- data = data.encode("utf-8")
269
- zi = zipfile.ZipInfo(name); zi.compress_type = zipfile.ZIP_DEFLATED
270
- zout.writestr(zi, data)
271
-
272
- zout.close()
273
- out_buf.seek(0)
274
- return out_buf.getvalue()
275
-
276
- # =============== UI ===============
277
  with st.expander("์‚ฌ์šฉ๋ฒ•", expanded=True):
278
- st.markdown(
279
- """
280
  1. ํ…œํ”Œ๋ฆฟ ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”.
281
  2. ๋ณด์กด์ƒ์ž ์ •๋ณด๊ฐ€ ๋“ค์–ด์žˆ๋Š” ์—‘์…€ ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”.
282
  3. ์ถœ๋ ฅํ•  ๋ผ๋ฒจ ๋ฒˆํ˜ธ๋ฅผ ์„ ํƒํ•ด์ฃผ์„ธ์š”.
283
  4. ์••์ถ• ํŒŒ์ผ์„ ๋‹ค์šด๋ฐ›๊ณ , ์••์ถ•ํ•ด์ œ ํ›„ ํŒŒ์ผ์„ ๋ณ‘ํ•ฉํ•ด์ฃผ์„ธ์š”.
284
  5. ๋ณ‘ํ•ฉ ํ›„, ๋ผ๋ฒจ์„ ์ถœ๋ ฅํ•˜์„ธ์š”.
285
-
286
- - ๋‹จ, ํ…œํ”Œ๋ฆฟ์€ **.HWPX(ํ•œ๊ธ€)** ํŒŒ์ผ์ด์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. (.HWP ๋ถˆ๊ฐ€)
287
- """
288
- )
289
 
290
  tpl = st.file_uploader("๐Ÿ“„ HWPX ํ…œํ”Œ๋ฆฟ ์—…๋กœ๋“œ", type=["hwpx"])
291
  n_per_page = st.number_input("ํ…œํ”Œ๋ฆฟ์˜ ๋ผ๋ฒจ ์„ธํŠธ ๊ฐœ์ˆ˜(ํ•œ ํŽ˜์ด์ง€ N๊ฐœ)", 1, 12, 3, 1)
292
- data = st.file_uploader("๐Ÿ“Š ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ (Excel/CSV)", type=["xlsx", "xls", "csv"])
293
 
294
  if tpl and data:
295
  tpl_bytes = tpl.read()
296
  df = pd.read_csv(data) if data.name.lower().endswith(".csv") else pd.read_excel(data)
297
 
298
  if "๋ฐ•์Šค๋ฒˆํ˜ธ" not in df.columns:
299
- st.error("โŒ ํ•„์ˆ˜ ์ปฌ๋Ÿผ '๋ฐ•์Šค๋ฒˆํ˜ธ'๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
300
- st.stop()
301
 
302
  st.success("โœ… ์œ„์น˜ ๋งคํ•‘ ์™„๋ฃŒ (์—‘์…€ ์ธก)")
303
  st.dataframe(df.head(10), use_container_width=True)
@@ -315,74 +273,55 @@ if tpl and data:
315
 
316
  # 1ํŽ˜์ด์ง€ ๋งคํ•‘ ํ”„๋ฆฌ๋ทฐ
317
  st.subheader("๐Ÿงช 1ํŽ˜์ด์ง€ ๋งคํ•‘ ํ”„๋ฆฌ๋ทฐ")
318
- keys = ["๋ฐ•์Šค๋ฒˆํ˜ธ", "์ข…๋ฃŒ์—ฐ๋„", "๋ณด์กด๊ธฐ๊ฐ„", "๋‹จ์œ„์—…๋ฌด", "๊ธฐ๋ก๋ฌผ์ฒ ", "๋ชฉ๋ก", "์ œ๋ชฉ", "์—…๋ฌด๋ช…"]
319
  mapping_preview = {}
320
  for i in range(int(n_per_page)):
321
  if i < len(records):
322
  r = records[i]
323
- mapping_preview.update(
324
- {
325
- f"๋ฐ•์Šค๋ฒˆํ˜ธ{i+1}": r.get("๋ฐ•์Šค๋ฒˆํ˜ธ", ""),
326
- f"์ข…๋ฃŒ์—ฐ๋„{i+1}": r.get("์ƒ์‚ฐ์—ฐ๋„", ""),
327
- f"๋ณด์กด๊ธฐ๊ฐ„{i+1}": r.get("๋ณด์กด๊ธฐ๊ฐ„", ""),
328
- f"๋‹จ์œ„์—…๋ฌด{i+1}": r.get("๋‹จ์œ„์—…๋ฌด", ""),
329
- f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}": r.get("๊ธฐ๋ก๋ฌผ์ฒ ", ""),
330
- f"๋ชฉ๋ก{i+1}": r.get("๋ชฉ๋ก", ""),
331
- f"์ œ๋ชฉ{i+1}": r.get("์ œ๋ชฉ", ""),
332
- f"์—…๋ฌด๋ช…{i+1}": r.get("์ œ๋ชฉ", ""), # ํ…œํ”Œ๋ฆฟ์ด '์—…๋ฌด๋ช…X'์„ ์“ฐ๋Š” ๊ฒฝ์šฐ ๋Œ€์‘
333
- }
334
- )
335
  else:
336
- for k in keys:
337
- mapping_preview[f"{k}{i+1}"] = ""
338
- st.dataframe(
339
- pd.DataFrame([{"ํ‚ค": k, "๊ฐ’ ์•ž๋ถ€๋ถ„": str(v)[:120]} for k, v in sorted(mapping_preview.items())]),
340
- use_container_width=True,
341
- height=320,
342
- )
343
 
344
- if st.button("๐Ÿš€ ๋‹จ์ผ HWPX ์ƒ์„ฑ (๋ณ‘ํ•ฉ ์ถœ๋ ฅ)"):
 
345
  pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
346
  debug_all = []
347
- filled_pages: List[bytes] = []
348
 
349
  for p in range(pages):
350
- chunk = records[p * int(n_per_page) : (p + 1) * int(n_per_page)]
351
- mapping: Dict[str, str] = {}
352
  for i in range(int(n_per_page)):
353
  if i < len(chunk):
354
  r = chunk[i]
355
- mapping[f"๋ฐ•์Šค๋ฒˆํ˜ธ{i+1}"] = r.get("๋ฐ•์Šค๋ฒˆํ˜ธ", "")
356
- mapping[f"์ข…๋ฃŒ์—ฐ๋„{i+1}"] = r.get("์ƒ์‚ฐ์—ฐ๋„", "")
357
- mapping[f"๋ณด์กด๊ธฐ๊ฐ„{i+1}"] = r.get("๋ณด์กด๊ธฐ๊ฐ„", "")
358
- mapping[f"๋‹จ์œ„์—…๋ฌด{i+1}"] = r.get("๋‹จ์œ„์—…๋ฌด", "")
359
- mapping[f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}"] = r.get("๊ธฐ๋ก๋ฌผ์ฒ ", "")
360
- mapping[f"๋ชฉ๋ก{i+1}"] = r.get("๋ชฉ๋ก", "")
361
- title_val = r.get("์ œ๋ชฉ", "")
362
- mapping[f"์ œ๋ชฉ{i+1}"] = title_val
363
  mapping[f"์—…๋ฌด๋ช…{i+1}"] = title_val
364
  else:
365
- for k in keys:
366
- mapping[f"{k}{i+1}"] = ""
367
 
368
  out_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
369
- debug_all.append({"page": p + 1, "stats": dbg})
370
- filled_pages.append(out_hwpx)
371
-
372
- # ๐Ÿ”ฅ ๋‹จ์ผ HWPX ๋ณ‘ํ•ฉ (lxml)
373
- merged_hwpx = merge_filled_hwpx_list_to_single(filled_pages)
374
-
375
- # ํŒŒ์ผ๋ช…
376
- first_box = records[0].get("๋ฐ•์Šค๋ฒˆํ˜ธ", "0000") if records else "0000"
377
- last_box = records[-1].get("๋ฐ•์Šค๋ฒˆํ˜ธ", "0000") if records else "0000"
378
- filename = (
379
- f"labels_{first_box}to{last_box}.hwpx" if first_box != last_box else f"labels_{first_box}.hwpx"
380
- )
381
-
382
- st.download_button(
383
- "โฌ‡๏ธ ๋‹จ์ผ HWPX ๋‹ค์šด๋กœ๋“œ",
384
- data=merged_hwpx,
385
- file_name=filename,
386
- mime="application/vnd.hancom.hwpx",
387
- )
388
-
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import io, zipfile, re, html, json
4
+ from typing import Dict, Tuple
5
 
6
+ st.set_page_config(page_title="๐Ÿ“ฆ ๋ณด์กด์ƒ์ž ๋ผ๋ฒจ ์ƒ์„ฑ๊ธฐ", layout="wide")
7
+ st.title("๐Ÿ“ฆ ๋ณด์กด์ƒ์ž ๋ผ๋ฒจ ์ƒ์„ฑ๊ธฐ ๐Ÿ“ฆ")
 
 
8
 
9
+ # -------------------- ๋ฐ์ดํ„ฐ ์œ ํ‹ธ --------------------
10
  def _year_range(series: pd.Series) -> str:
11
  s = series.astype(str).fillna("")
12
  v = s[~s.isin(["", "0", "0000"])]
13
+ if v.empty: return "0000-0000"
 
14
  nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
15
+ if nums.empty: return "0000-0000"
 
16
  return f"{nums.min():04d}-{nums.max():04d}"
17
 
 
18
  def build_rows(df: pd.DataFrame) -> pd.DataFrame:
 
19
  df = df.copy()
20
  df["๋ฐ•์Šค๋ฒˆํ˜ธ"] = df["๋ฐ•์Šค๋ฒˆํ˜ธ"].astype(str).str.zfill(4)
21
  if "์ œ๋ชฉ" in df.columns:
22
  df["์ œ๋ชฉ"] = df["์ œ๋ชฉ"].astype(str)
23
 
24
+ # ์ƒ์‚ฐ์—ฐ๋„(๋ฒ”์œ„) = ์ข…๋ฃŒ์—ฐ๋„ ๊ทธ๋ฃน ๋ฒ”์œ„
25
  if "์ข…๋ฃŒ์—ฐ๋„" in df.columns:
26
  yr = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ")["์ข…๋ฃŒ์—ฐ๋„"].apply(_year_range).reset_index()
27
  yr.columns = ["๋ฐ•์Šค๋ฒˆํ˜ธ", "์ƒ์‚ฐ์—ฐ๋„"]
 
32
  has_mgmt = "๊ด€๋ฆฌ๋ฒˆํ˜ธ" in df.columns
33
  lists = []
34
  for b, g in df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ"):
35
+ lines = [f"- {r['๊ด€๋ฆฌ๋ฒˆํ˜ธ']} {r.get('์ œ๋ชฉ','')}" if has_mgmt else f"- {r.get('์ œ๋ชฉ','')}"
36
+ for _, r in g.iterrows()]
 
 
37
  lists.append({"๋ฐ•์Šค๋ฒˆํ˜ธ": b, "๋ชฉ๋ก": "\r\n".join(lines)})
38
  list_df = pd.DataFrame(lists)
39
 
40
  # ๋Œ€ํ‘œ ๋ฉ”ํƒ€
41
+ meta_cols = ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","์ œ๋ชฉ"]
42
  meta_exist = [c for c in meta_cols if c in df.columns]
43
+ meta = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ", as_index=False).first()[meta_exist] if meta_exist \
44
+ else pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": df["๋ฐ•์Šค๋ฒˆํ˜ธ"].unique()})
 
 
45
 
46
  merged = meta.merge(list_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left").merge(yr, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left")
47
  return merged
48
 
49
+ # -------------------- ์น˜ํ™˜ ์œ ํ‹ธ --------------------
 
50
  FIELD_PAIR_RE_TMPL = (
51
+ r'<(?P<fprefix>[a-zA-Z0-9_]+):fieldBegin\b[^>]*\bname="{name}"[^>]*/>'
52
  r'(.*?)'
53
+ r'<(?P=fprefix):fieldEnd\b[^>]*/>'
54
  )
55
  TOKEN_FMT = "{{{{{key}}}}}"
56
 
57
+ # ๋ฌธ๋‹จ(<*:p>) ํƒ์ƒ‰ ํŒจํ„ด
58
+ PARA_RE = re.compile(
59
+ r'<(?P<pprefix>[a-zA-Z0-9_]+):p(?P<pattrs>[^>]*)>(?P<pbody>.*?)</(?P=pprefix):p>',
60
+ re.DOTALL
61
+ )
62
 
63
+ # ์›๋ณธ run ์Šคํƒ€์ผ์„ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜
64
+ def _extract_run_style(body: str, pprefix: str) -> str:
65
+ """๋ฌธ๋‹จ ๋‚ด์šฉ์—์„œ ์ฒซ ๋ฒˆ์งธ run ์š”์†Œ์˜ ์Šคํƒ€์ผ์„ ์ถ”์ถœ"""
66
+ run_pattern = re.compile(
67
+ rf'<{pprefix}:run[^>]*>.*?</{pprefix}:run>',
68
+ re.DOTALL
69
+ )
70
+ match = run_pattern.search(body)
71
+ if match:
72
+ return match.group(0)
73
+ return f'<{pprefix}:run><{pprefix}:t><//{pprefix}:t></{pprefix}:run>'
74
+
75
+ # ๋ฌธ๋‹จ ํ•˜๋‚˜๋ฅผ ๊ฐ™์€ ์Šคํƒ€์ผ๋กœ ๋ณต์ œํ•ด์ฃผ๋Š” ํ—ฌํผ (์Šคํƒ€์ผ ๋ณด์กด)
76
+ def _make_para_with_style(pprefix: str, pattrs: str, text: str, original_run: str) -> str:
77
+ esc = html.escape("" if text is None else str(text))
78
+
79
+ # ์›๋ณธ run์—์„œ ํ…์ŠคํŠธ ๋ถ€๋ถ„๋งŒ ๊ต์ฒด
80
+ text_pattern = re.compile(rf'(<{pprefix}:t[^>]*>)[^<]*(</{pprefix}:t>)')
81
+ new_run = text_pattern.sub(rf'\g<1>{esc}\g<2>', original_run)
82
+
83
+ # ๋งŒ์•ฝ ํ…์ŠคํŠธ ๋…ธ๋“œ๊ฐ€ ์—†๋‹ค๋ฉด ๊ธฐ๋ณธ ํ˜•ํƒœ๋กœ
84
+ if new_run == original_run:
85
+ t_pattern = re.compile(rf'(<{pprefix}:run[^>]*>)(.*?)(</{pprefix}:run>)', re.DOTALL)
86
+ new_run = t_pattern.sub(rf'\g<1><{pprefix}:t>{esc}</{pprefix}:t>\g<3>', original_run)
87
+
88
+ return f'<{pprefix}:p{pattrs}>{new_run}</{pprefix}:p>'
89
+
90
+ def _split_lines(val) -> list:
91
+ if val is None: return [""]
92
+ return str(val).replace("\r\n","\n").split("\n")
93
+
94
+ def _replace_para_multiline(xml: str, key: str, value: str, dbg: dict) -> str:
95
+ """
96
+ key๊ฐ€ ํฌํ•จ๋œ '๋ถ€๋ชจ ๋ฌธ๋‹จ ์ „์ฒด'๋ฅผ, ๊ฐ’์˜ ๊ฐ ์ค„์„ ๋‹ด์€ ์—ฌ๋Ÿฌ ๋ฌธ๋‹จ์œผ๋กœ ๊ต์ฒด.
97
+ ์›๋ณธ ์Šคํƒ€์ผ์„ ์œ ์ง€ํ•˜๋ฉด์„œ ๊ต์ฒด.
98
+ """
99
+ pair_pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(key)), re.DOTALL)
100
+ tnode_pat = re.compile(rf'<(?P<p>[a-zA-Z0-9_]+):t[^>]*>[^<]*{re.escape(key)}[^<]*</(?P=p):t>', re.DOTALL)
101
+ token_str = TOKEN_FMT.format(key=key)
102
+
103
+ def para_repl(m):
104
+ body = m.group("pbody")
105
+ if not (pair_pat.search(body) or tnode_pat.search(body) or (token_str in body)):
106
+ return m.group(0)
107
+
108
+ lines = _split_lines(value)
109
+ pprefix = m.group("pprefix")
110
+ pattrs = m.group("pattrs")
111
+
112
+ # ์›๋ณธ run ์Šคํƒ€์ผ ์ถ”์ถœ
113
+ original_run = _extract_run_style(body, pprefix)
114
+
115
+ # ๊ฐ ์ค„์— ๋Œ€ํ•ด ์›๋ณธ ์Šคํƒ€์ผ์„ ์œ ์ง€ํ•˜๋ฉด์„œ ์ƒˆ ๋ฌธ๋‹จ ์ƒ์„ฑ
116
+ new_paras = "".join(_make_para_with_style(pprefix, pattrs, ln, original_run) for ln in lines)
117
+ dbg["para_hits"][key] = dbg["para_hits"].get(key, 0) + 1
118
+ return new_paras
119
+
120
+ xml2 = PARA_RE.sub(para_repl, xml)
121
+ if xml2 != xml:
122
+ dbg["touched"] = True
123
+ return xml2
124
+
125
+ def _runs_plain(text: str) -> str:
126
  return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
129
  changed_any = False
130
 
131
+ # 0) ๋‹ค์ค‘์ค„ ํ‚ค๋Š” ๋จผ์ € "๋ถ€๋ชจ ๋ฌธ๋‹จ ๊ต์ฒด"๋กœ ์ฒ˜๋ฆฌ (์—…๋ฌด๋ช…์€ ์ œ์™ธํ•˜์—ฌ ํฐํŠธ ๋ฌธ์ œ ํ•ด๊ฒฐ)
132
+ multi_key = re.compile(r"^(๋ชฉ๋ก|list|์ œ๋ชฉ)\d+$", re.IGNORECASE)
133
+ for k, v in mapping.items():
134
+ if multi_key.match(k):
135
+ xml_new = _replace_para_multiline(xml, k, v, dbg)
136
+ if xml_new != xml:
137
+ xml = xml_new
138
+ changed_any = True
139
+
140
+ # 1) ํ•„๋“œ์Œ(์ธ๋ผ์ธ) ์น˜ํ™˜ โ€” ๋‹จ์ผ์ค„ ํ‚ค๋งŒ
141
  for k, v in mapping.items():
142
+ if multi_key.match(k):
143
+ continue
144
+ replacement = _runs_plain(v)
145
  pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(k)), re.DOTALL)
146
  xml_new, n = pat.subn(replacement, xml)
147
  if n:
 
149
  xml = xml_new
150
  changed_any = True
151
 
152
+ # 2) ์ˆœ์ˆ˜ ํ…์ŠคํŠธ ์ž๋ฆฌํ‘œ์‹œ์ž(<*:t>ํ‚ค</*:t>) ๋ถ€๋ถ„์น˜ํ™˜ โ€” ๋‹จ์ผ์ค„ ํ‚ค๋งŒ
153
+ tnode_all = re.compile(
154
+ r'(<(?P<prefix>[a-zA-Z0-9_]+):t[^>]*>)([^<]*?)</(?P=prefix):t>',
155
+ re.DOTALL
156
+ )
157
+ for k, v in mapping.items():
158
+ if multi_key.match(k):
159
+ continue
160
+ def repl_tnode(m):
161
+ text_node = m.group(3)
162
+ if k not in text_node:
163
+ return m.group(0)
164
+ new_text = html.escape(text_node.replace(k, "" if v is None else str(v)))
165
+ return f"{m.group(1)}{new_text}</{m.group('prefix')}:t>"
166
+ xml2 = tnode_all.sub(repl_tnode, xml)
167
+ if xml2 != xml:
168
+ dbg["text_hits"][k] = dbg["text_hits"].get(k, 0) + 1
169
+ xml = xml2
170
+ changed_any = True
171
+
172
+ # 3) ํ† ํฐ ์น˜ํ™˜ โ€” ๋‹จ์ผ์ค„ ํ‚ค๋งŒ
173
  for k, v in mapping.items():
174
+ if multi_key.match(k):
175
+ continue
176
  tok = TOKEN_FMT.format(key=k)
177
  if tok in xml:
178
+ xml = xml.replace(tok, html.escape("" if v is None else str(v)))
 
 
 
 
 
179
  dbg["token_hits"][k] = dbg["token_hits"].get(k, 0) + 1
180
  changed_any = True
181
 
 
183
  dbg["files_touched"] = True
184
  return xml
185
 
186
+ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, dict]:
187
+ import stat, time
188
+ dbg = {"para_hits":{}, "field_hits":{}, "text_hits":{}, "token_hits":{}, "touched_files": []}
 
 
 
189
  zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
190
  out_buf = io.BytesIO()
191
+ zout = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
192
+
193
+ # ํ˜„์žฌ ์‹œ๊ฐ„
194
+ now = time.localtime()
195
 
196
  # mimetype ๋ฌด์••์ถ• + ๋งจ์•ž
197
  names = zin.namelist()
198
  if "mimetype" in names:
199
  zi = zipfile.ZipInfo("mimetype")
200
  zi.compress_type = zipfile.ZIP_STORED
201
+ # ์™„์ „ํžˆ ์ƒˆ๋กœ์šด ZipInfo๋กœ ์ฝ๊ธฐ์ „์šฉ ๋ฐฉ์ง€
202
+ zi.external_attr = 0o100666 << 16 # ์ผ๋ฐ˜ ํŒŒ์ผ + ๋ชจ๋“  ๊ถŒํ•œ
203
+ zi.create_system = 0 # DOS/Windows
204
+ zi.date_time = now[:6]
205
  zout.writestr(zi, zin.read("mimetype"))
206
 
207
  for e in zin.infolist():
 
212
  try:
213
  s = data.decode("utf-8", errors="ignore")
214
  before = s
215
+ s = _apply_to_xml(s, mapping, {"para_hits":dbg["para_hits"], "field_hits":dbg["field_hits"],
216
+ "text_hits":dbg["text_hits"], "token_hits":dbg["token_hits"],
217
+ "files_touched":False})
 
 
218
  if s != before:
219
  dbg["touched_files"].append(e.filename)
220
  data = s.encode("utf-8")
221
  except Exception:
222
  pass
223
+
224
+ # ์™„์ „ํžˆ ์ƒˆ๋กœ์šด ZipInfo ์ƒ์„ฑ์œผ๋กœ ์ฝ๊ธฐ์ „์šฉ ๋ฐฉ์ง€
225
  zi = zipfile.ZipInfo(e.filename)
226
  zi.compress_type = zipfile.ZIP_DEFLATED
227
+ zi.external_attr = 0o100666 << 16 # ์ผ๋ฐ˜ ํŒŒ์ผ + ๋ชจ๋“  ๊ถŒํ•œ
228
+ zi.create_system = 0 # DOS/Windows ์‹œ์Šคํ…œ
229
+ zi.date_time = now[:6] # ํ˜„์žฌ ์‹œ๊ฐ„
230
+ zi.flag_bits = 0 # ํŠน๋ณ„ํ•œ ํ”Œ๋ž˜๊ทธ ์—†์Œ
231
  zout.writestr(zi, data)
232
 
233
  zout.close()
 
235
  zin.close()
236
  return out_buf.getvalue(), dbg
237
 
238
+ # -------------------- UI --------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  with st.expander("์‚ฌ์šฉ๋ฒ•", expanded=True):
240
+ st.markdown("""
 
241
  1. ํ…œํ”Œ๋ฆฟ ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”.
242
  2. ๋ณด์กด์ƒ์ž ์ •๋ณด๊ฐ€ ๋“ค์–ด์žˆ๋Š” ์—‘์…€ ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”.
243
  3. ์ถœ๋ ฅํ•  ๋ผ๋ฒจ ๋ฒˆํ˜ธ๋ฅผ ์„ ํƒํ•ด์ฃผ์„ธ์š”.
244
  4. ์••์ถ• ํŒŒ์ผ์„ ๋‹ค์šด๋ฐ›๊ณ , ์••์ถ•ํ•ด์ œ ํ›„ ํŒŒ์ผ์„ ๋ณ‘ํ•ฉํ•ด์ฃผ์„ธ์š”.
245
  5. ๋ณ‘ํ•ฉ ํ›„, ๋ผ๋ฒจ์„ ์ถœ๋ ฅํ•˜์„ธ์š”.
246
+ ๋‹จ, ํ…œํ”Œ๋ฆฟ์€ .HWPX(ํ•œ๊ธ€) ํŒŒ์ผ์ด์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. (.HWP ๋ถˆ๊ฐ€)
247
+ """)
 
 
248
 
249
  tpl = st.file_uploader("๐Ÿ“„ HWPX ํ…œํ”Œ๋ฆฟ ์—…๋กœ๋“œ", type=["hwpx"])
250
  n_per_page = st.number_input("ํ…œํ”Œ๋ฆฟ์˜ ๋ผ๋ฒจ ์„ธํŠธ ๊ฐœ์ˆ˜(ํ•œ ํŽ˜์ด์ง€ N๊ฐœ)", 1, 12, 3, 1)
251
+ data = st.file_uploader("๐Ÿ“Š ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ (Excel/CSV)", type=["xlsx","xls","csv"])
252
 
253
  if tpl and data:
254
  tpl_bytes = tpl.read()
255
  df = pd.read_csv(data) if data.name.lower().endswith(".csv") else pd.read_excel(data)
256
 
257
  if "๋ฐ•์Šค๋ฒˆํ˜ธ" not in df.columns:
258
+ st.error("โŒ ํ•„์ˆ˜ ์ปฌ๋Ÿผ '๋ฐ•์Šค๋ฒˆํ˜ธ'๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."); st.stop()
 
259
 
260
  st.success("โœ… ์œ„์น˜ ๋งคํ•‘ ์™„๋ฃŒ (์—‘์…€ ์ธก)")
261
  st.dataframe(df.head(10), use_container_width=True)
 
273
 
274
  # 1ํŽ˜์ด์ง€ ๋งคํ•‘ ํ”„๋ฆฌ๋ทฐ
275
  st.subheader("๐Ÿงช 1ํŽ˜์ด์ง€ ๋งคํ•‘ ํ”„๋ฆฌ๋ทฐ")
276
+ keys = ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๋ชฉ๋ก","์ œ๋ชฉ","์—…๋ฌด๋ช…"]
277
  mapping_preview = {}
278
  for i in range(int(n_per_page)):
279
  if i < len(records):
280
  r = records[i]
281
+ mapping_preview.update({
282
+ f"๋ฐ•์Šค๋ฒˆํ˜ธ{i+1}": r.get("๋ฐ•์Šค๋ฒˆํ˜ธ",""),
283
+ f"์ข…๋ฃŒ์—ฐ๋„{i+1}": r.get("์ƒ์‚ฐ์—ฐ๋„",""),
284
+ f"๋ณด์กด๊ธฐ๊ฐ„{i+1}": r.get("๋ณด์กด๊ธฐ๊ฐ„",""),
285
+ f"๋‹จ์œ„์—…๋ฌด{i+1}": r.get("๋‹จ์œ„์—…๋ฌด",""),
286
+ f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}": r.get("๊ธฐ๋ก๋ฌผ์ฒ ",""),
287
+ f"๋ชฉ๋ก{i+1}": r.get("๋ชฉ๋ก",""),
288
+ f"์ œ๋ชฉ{i+1}": r.get("์ œ๋ชฉ",""),
289
+ f"์—…๋ฌด๋ช…{i+1}": r.get("์ œ๋ชฉ",""), # ํ…œํ”Œ๋ฆฟ์ด '์—…๋ฌด๋ช…1'์„ ์“ฐ๋Š” ๊ฒฝ์šฐ ๋Œ€์‘
290
+ })
 
 
291
  else:
292
+ for k in keys: mapping_preview[f"{k}{i+1}"] = ""
293
+ st.dataframe(pd.DataFrame([{"ํ‚ค":k, "๊ฐ’ ์•ž๋ถ€๋ถ„":str(v)[:120]} for k,v in sorted(mapping_preview.items())]),
294
+ use_container_width=True, height=320)
 
 
 
 
295
 
296
+ if st.button("๐Ÿš€ ๋ผ๋ฒจ ์ƒ์„ฑ (ํŽ˜์ด์ง€๋ณ„ HWPX ZIP)"):
297
+ mem = io.BytesIO(); zout = zipfile.ZipFile(mem, "w", zipfile.ZIP_DEFLATED)
298
  pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
299
  debug_all = []
 
300
 
301
  for p in range(pages):
302
+ chunk = records[p*int(n_per_page):(p+1)*int(n_per_page)]
303
+ mapping = {}
304
  for i in range(int(n_per_page)):
305
  if i < len(chunk):
306
  r = chunk[i]
307
+ mapping[f"๋ฐ•์Šค๋ฒˆํ˜ธ{i+1}"] = r.get("๋ฐ•์Šค๋ฒˆํ˜ธ","")
308
+ mapping[f"์ข…๋ฃŒ์—ฐ๋„{i+1}"] = r.get("์ƒ์‚ฐ์—ฐ๋„","")
309
+ mapping[f"๋ณด์กด๊ธฐ๊ฐ„{i+1}"] = r.get("๋ณด์กด๊ธฐ๊ฐ„","")
310
+ mapping[f"๋‹จ์œ„์—…๋ฌด{i+1}"] = r.get("๋‹จ์œ„์—…๋ฌด","")
311
+ mapping[f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}"] = r.get("๊ธฐ๋ก๋ฌผ์ฒ ","")
312
+ mapping[f"๋ชฉ๋ก{i+1}"] = r.get("๋ชฉ๋ก","")
313
+ title_val = r.get("์ œ๋ชฉ","")
314
+ mapping[f"์ œ๋ชฉ{i+1}"] = title_val
315
  mapping[f"์—…๋ฌด๋ช…{i+1}"] = title_val
316
  else:
317
+ for k in keys: mapping[f"{k}{i+1}"] = ""
 
318
 
319
  out_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
320
+ debug_all.append({"page": p+1, "stats": dbg})
321
+ name = "_".join([r.get("๋ฐ•์Šค๋ฒˆํ˜ธ","") for r in chunk]) if chunk else f"empty_{p+1}"
322
+ zout.writestr(f"label_{name}.hwpx", out_hwpx)
323
+
324
+ zout.close(); mem.seek(0)
325
+ st.download_button("โฌ‡๏ธ ZIP ๋‹ค์šด๋กœ๋“œ", data=mem, file_name="labels_by_page.zip", mime="application/zip")
326
+ st.download_button("โฌ‡๏ธ ๋””๋ฒ„๊ทธ(JSON)", data=json.dumps(debug_all, ensure_ascii=False, indent=2),
327
+ file_name="debug.json", mime="application/json")