dohyune commited on
Commit
0f1b6bc
ยท
verified ยท
1 Parent(s): f84af8d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -333
app.py CHANGED
@@ -1,40 +1,27 @@
1
- # app.py
2
- import io
3
- import json
4
- import html
5
- import re
6
- import zipfile
7
- from typing import Dict, Tuple
8
-
9
- import pandas as pd
10
  import streamlit as st
 
 
 
11
 
 
 
12
 
13
- # ====================== Streamlit ======================
14
- st.set_page_config(page_title="๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ(HWPX) โ€” ํ†ตํ•ฉ ํŒŒ์ผ ์ถœ๋ ฅ", layout="wide")
15
- st.title("๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ ์ž๋™ ์ƒ์„ฑ๊ธฐ โ€” HWPX ํ•„๋“œยทํ† ํฐยท๋ฌธ๋‹จ ์™„์ „ ์น˜ํ™˜ + ๋‹คํŽ˜์ด์ง€ ํ†ตํ•ฉ ์ถœ๋ ฅ")
16
-
17
-
18
- # ====================== ๋ฐ์ดํ„ฐ ์œ ํ‹ธ ======================
19
  def _year_range(series: pd.Series) -> str:
20
  s = series.astype(str).fillna("")
21
  v = s[~s.isin(["", "0", "0000"])]
22
- if v.empty:
23
- return "0000-0000"
24
  nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
25
- if nums.empty:
26
- return "0000-0000"
27
  return f"{nums.min():04d}-{nums.max():04d}"
28
 
29
-
30
  def build_rows(df: pd.DataFrame) -> pd.DataFrame:
31
- """๋ฐ•์Šค๋ฒˆํ˜ธ ๊ธฐ์ค€ ๋Œ€ํ‘œ ๋ฉ”ํƒ€ + ๋ชฉ๋ก(์—ฌ๋Ÿฌ ์ค„) + ์ƒ์‚ฐ์—ฐ๋„ ๋ฒ”์œ„ ์ƒ์„ฑ"""
32
  df = df.copy()
33
  df["๋ฐ•์Šค๋ฒˆํ˜ธ"] = df["๋ฐ•์Šค๋ฒˆํ˜ธ"].astype(str).str.zfill(4)
34
  if "์ œ๋ชฉ" in df.columns:
35
  df["์ œ๋ชฉ"] = df["์ œ๋ชฉ"].astype(str)
36
 
37
- # ์ƒ์‚ฐ์—ฐ๋„(๋ฒ”์œ„)
38
  if "์ข…๋ฃŒ์—ฐ๋„" in df.columns:
39
  yr = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ")["์ข…๋ฃŒ์—ฐ๋„"].apply(_year_range).reset_index()
40
  yr.columns = ["๋ฐ•์Šค๋ฒˆํ˜ธ", "์ƒ์‚ฐ์—ฐ๋„"]
@@ -45,102 +32,72 @@ def build_rows(df: pd.DataFrame) -> pd.DataFrame:
45
  has_mgmt = "๊ด€๋ฆฌ๋ฒˆํ˜ธ" in df.columns
46
  lists = []
47
  for b, g in df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ"):
48
- lines = [
49
- f"- {r['๊ด€๋ฆฌ๋ฒˆํ˜ธ']} {r.get('์ œ๋ชฉ','')}" if has_mgmt else f"- {r.get('์ œ๋ชฉ','')}"
50
- for _, r in g.iterrows()
51
- ]
52
  lists.append({"๋ฐ•์Šค๋ฒˆํ˜ธ": b, "๋ชฉ๋ก": "\r\n".join(lines)})
53
  list_df = pd.DataFrame(lists)
54
 
55
  # ๋Œ€ํ‘œ ๋ฉ”ํƒ€
56
- meta_cols = ["๋ฐ•์Šค๋ฒˆํ˜ธ", "์ข…๋ฃŒ์—ฐ๋„", "๋ณด์กด๊ธฐ๊ฐ„", "๋‹จ์œ„์—…๋ฌด", "๊ธฐ๋ก๋ฌผ์ฒ ", "์ œ๋ชฉ"]
57
  meta_exist = [c for c in meta_cols if c in df.columns]
58
- if meta_exist:
59
- meta = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ", as_index=False).first()[meta_exist]
60
- else:
61
- meta = pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": df["๋ฐ•์Šค๋ฒˆํ˜ธ"].unique()})
62
 
63
  merged = meta.merge(list_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left").merge(yr, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left")
64
  return merged
65
 
66
-
67
- # ====================== ์น˜ํ™˜ ์œ ํ‹ธ (์ธ๋ผ์ธ/๋ฌธ๋‹จ) ======================
68
-
69
- # fieldBegin/fieldEnd ์Œ (์ ‘๋‘์–ด ์™€์ผ๋“œ์นด๋“œ)
70
  FIELD_PAIR_RE_TMPL = (
71
  r'<(?P<fprefix>[a-zA-Z0-9_]+):fieldBegin\b[^>]*\bname="{name}"[^>]*/>'
72
  r'(.*?)'
73
  r'<(?P=fprefix):fieldEnd\b[^>]*/>'
74
  )
75
- # ํ† ํฐ ํฌ๋งท
76
  TOKEN_FMT = "{{{{{key}}}}}"
77
 
78
- # ๋ฌธ๋‹จ ํƒ์ƒ‰์šฉ
79
  PARA_RE = re.compile(
80
  r'<(?P<pprefix>[a-zA-Z0-9_]+):p(?P<pattrs>[^>]*)>(?P<pbody>.*?)</(?P=pprefix):p>',
81
- re.DOTALL,
82
- )
83
-
84
- # run / t ๋…ธ๋“œ ์ถ”์ถœ์šฉ
85
- RUN_RE = re.compile(
86
- r'<(?P<prefix>[a-zA-Z0-9_]+):run(?P<rattrs>[^>]*)>(?P<body>.*?)</(?P=prefix):run>',
87
- re.DOTALL,
88
  )
89
- TP_RE = re.compile(
90
- r'<(?P<prefix>[a-zA-Z0-9_]+):t[^>]*>(?P<text>.*?)</(?P=prefix):t>',
91
- re.DOTALL,
92
- )
93
-
94
-
95
- def _clone_run_with_text(run_xml: str, text: str) -> str:
96
- """๊ธฐ์กด run์˜ rPr/์†์„ฑ ๋ณด์กด, t ๋‚ด์šฉ๋งŒ ๊ต์ฒด"""
97
- def _repl_t(m):
98
- return f"<{m.group('prefix')}:t>{html.escape(text)}</{m.group('prefix')}:t>"
99
-
100
- if TP_RE.search(run_xml):
101
- return TP_RE.sub(_repl_t, run_xml, count=1)
102
- # t ๋…ธ๋“œ ์—†์œผ๋ฉด ๊ธฐ๋ณธ ์‚ฝ์ž…
103
- m = RUN_RE.search(run_xml)
104
- if not m:
105
- return f"<hp:run><hp:t>{html.escape(text)}</hp:t></hp:run>"
106
- prefix = m.group("prefix")
107
- return f"<{prefix}:run><{prefix}:t>{html.escape(text)}</{prefix}:t></{prefix}:run>"
108
-
109
-
110
- def _extract_ppr_and_template_run(pbody: str):
111
- """๋ฌธ๋‹จ pPr(์žˆ์œผ๋ฉด)๊ณผ ์ฒซ ๋ฒˆ์งธ run ์›ํ˜•์„ ์ถ”์ถœ"""
112
- ppr_match = re.search(r'<(?P<prefix>[a-zA-Z0-9_]+):pPr\b[^>]*/>', pbody)
113
- ppr_xml = ppr_match.group(0) if ppr_match else ""
114
-
115
- run_match = RUN_RE.search(pbody)
116
- if run_match:
117
- template_run = run_match.group(0) # rPr ํฌํ•จ
118
- else:
119
- template_run = "<hp:run><hp:t></hp:t></hp:run>"
120
- return ppr_xml, template_run
121
-
122
-
123
- def _make_para_from_templates(pprefix: str, pattrs: str, ppr_xml: str, template_run: str, text: str) -> str:
124
- cloned_run = _clone_run_with_text(template_run, text)
125
- return f"<{pprefix}:p{pattrs}>{ppr_xml}{cloned_run}</{pprefix}:p>"
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  def _split_lines(val) -> list:
129
- if val is None:
130
- return [""]
131
- return str(val).replace("\r\n", "\n").split("\n")
132
-
133
 
134
  def _replace_para_multiline(xml: str, key: str, value: str, dbg: dict) -> str:
135
  """
136
- key๊ฐ€ ํฌํ•จ๋œ '๋ถ€๋ชจ ๋ฌธ๋‹จ ์ „์ฒด'๋ฅผ ๊ฐ’์˜ ๊ฐ ์ค„์„ ๋‹ด์€ ๋‹ค์ˆ˜ ๋ฌธ๋‹จ์œผ๋กœ ๊ต์ฒด.
137
- ์› ๋ฌธ๋‹จ pPr/rPr ์Šคํƒ€์ผ ์œ ์ง€.
138
  """
139
  pair_pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(key)), re.DOTALL)
140
- tnode_pat = re.compile(
141
- rf'<(?P<p>[a-zA-Z0-9_]+):t[^>]*>[^<]*{re.escape(key)}[^<]*</(?P=p):t>',
142
- re.DOTALL,
143
- )
144
  token_str = TOKEN_FMT.format(key=key)
145
 
146
  def para_repl(m):
@@ -150,28 +107,29 @@ def _replace_para_multiline(xml: str, key: str, value: str, dbg: dict) -> str:
150
 
151
  lines = _split_lines(value)
152
  pprefix = m.group("pprefix")
153
- pattrs = m.group("pattrs")
154
- ppr_xml, template_run = _extract_ppr_and_template_run(body)
155
-
156
- new_paras = "".join(_make_para_from_templates(pprefix, pattrs, ppr_xml, template_run, ln) for ln in lines)
 
 
 
157
  dbg["para_hits"][key] = dbg["para_hits"].get(key, 0) + 1
158
  return new_paras
159
 
160
  xml2 = PARA_RE.sub(para_repl, xml)
161
  if xml2 != xml:
162
- dbg["files_touched"] = True
163
  return xml2
164
 
165
-
166
  def _runs_plain(text: str) -> str:
167
  return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
168
 
169
-
170
  def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
171
  changed_any = False
172
 
173
- # (A) ๋‹ค์ค‘ ์ค„ ํ‚ค๋Š” "๋ฌธ๋‹จ ๊ต์ฒด"๋กœ ๋จผ์ € ์ฒ˜๋ฆฌ (๋ชฉ๋ก/์ œ๋ชฉ/์—…๋ฌด๋ช… ๋ชจ๋‘ ์ค„๋ฐ”๊ฟˆ ๊ฐ•์ œ)
174
- multi_key = re.compile(r"^(๋ชฉ๋ก|list|์ œ๋ชฉ|์—…๋ฌด๋ช…)\d+$", re.IGNORECASE)
175
  for k, v in mapping.items():
176
  if multi_key.match(k):
177
  xml_new = _replace_para_multiline(xml, k, v, dbg)
@@ -179,7 +137,7 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
179
  xml = xml_new
180
  changed_any = True
181
 
182
- # (B) ์ธ๋ผ์ธ ํ•„๋“œ์Œ ์น˜ํ™˜ โ€” ๋‹จ์ผ ์ค„๋งŒ
183
  for k, v in mapping.items():
184
  if multi_key.match(k):
185
  continue
@@ -191,29 +149,27 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
191
  xml = xml_new
192
  changed_any = True
193
 
194
- # (C) ์ˆœ์ˆ˜ ํ…์ŠคํŠธ ์ž๋ฆฌํ‘œ์‹œ์ž(<*:t>ํ‚ค</*:t>) ์น˜ํ™˜ โ€” ๋‹จ์ผ ์ค„๋งŒ
195
  tnode_all = re.compile(
196
  r'(<(?P<prefix>[a-zA-Z0-9_]+):t[^>]*>)([^<]*?)</(?P=prefix):t>',
197
- re.DOTALL,
198
  )
199
  for k, v in mapping.items():
200
  if multi_key.match(k):
201
  continue
202
-
203
  def repl_tnode(m):
204
  text_node = m.group(3)
205
  if k not in text_node:
206
  return m.group(0)
207
  new_text = html.escape(text_node.replace(k, "" if v is None else str(v)))
208
  return f"{m.group(1)}{new_text}</{m.group('prefix')}:t>"
209
-
210
  xml2 = tnode_all.sub(repl_tnode, xml)
211
  if xml2 != xml:
212
  dbg["text_hits"][k] = dbg["text_hits"].get(k, 0) + 1
213
  xml = xml2
214
  changed_any = True
215
 
216
- # (D) ํ† ํฐ ์น˜ํ™˜ โ€” ๋‹จ์ผ ์ค„๋งŒ
217
  for k, v in mapping.items():
218
  if multi_key.match(k):
219
  continue
@@ -227,25 +183,24 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
227
  dbg["files_touched"] = True
228
  return xml
229
 
230
-
231
- def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str, str]) -> Tuple[bytes, dict]:
232
- """HWPX(zip) ๋‚ด๋ถ€ ๋ชจ๋“  XML์— ์น˜ํ™˜ ์ ์šฉ"""
233
- import time
234
-
235
- dbg = {"para_hits": {}, "field_hits": {}, "text_hits": {}, "token_hits": {}, "touched_files": []}
236
  zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
237
  out_buf = io.BytesIO()
238
  zout = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
239
 
 
240
  now = time.localtime()
241
 
242
- # mimetype: ๋ฌด์••์ถ• + ๋งจ์•ž
243
  names = zin.namelist()
244
  if "mimetype" in names:
245
  zi = zipfile.ZipInfo("mimetype")
246
  zi.compress_type = zipfile.ZIP_STORED
247
- zi.external_attr = 0o100666 << 16
248
- zi.create_system = 0
 
249
  zi.date_time = now[:6]
250
  zout.writestr(zi, zin.read("mimetype"))
251
 
@@ -257,29 +212,22 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str, str]) -> Tuple[bytes,
257
  try:
258
  s = data.decode("utf-8", errors="ignore")
259
  before = s
260
- s = _apply_to_xml(
261
- s,
262
- mapping,
263
- {
264
- "para_hits": dbg["para_hits"],
265
- "field_hits": dbg["field_hits"],
266
- "text_hits": dbg["text_hits"],
267
- "token_hits": dbg["token_hits"],
268
- "files_touched": False,
269
- },
270
- )
271
  if s != before:
272
  dbg["touched_files"].append(e.filename)
273
  data = s.encode("utf-8")
274
  except Exception:
275
  pass
276
-
 
277
  zi = zipfile.ZipInfo(e.filename)
278
  zi.compress_type = zipfile.ZIP_DEFLATED
279
- zi.external_attr = 0o100666 << 16
280
- zi.create_system = 0
281
- zi.date_time = now[:6]
282
- zi.flag_bits = 0
283
  zout.writestr(zi, data)
284
 
285
  zout.close()
@@ -287,155 +235,24 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str, str]) -> Tuple[bytes,
287
  zin.close()
288
  return out_buf.getvalue(), dbg
289
 
290
-
291
- # ====================== ์„น์…˜/ํŽ˜์ด์ง€ ๋ณ‘ํ•ฉ (๋‹จ์ผ HWPX๋กœ ์ถœ๋ ฅ) ======================
292
-
293
- def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
294
- """๋‘ HWPX๋ฅผ 1๊ฐœ๋กœ ๋ณ‘ํ•ฉ: pages ๋ชฉ๋ก๊ณผ ๋ณธ๋ฌธ ๋ฌธ๋‹จ๊นŒ์ง€ ํ•ฉ์นจ"""
295
- import time
296
-
297
- base_zip = zipfile.ZipFile(io.BytesIO(base_hwpx), "r")
298
- add_zip = zipfile.ZipFile(io.BytesIO(additional_hwpx), "r")
299
-
300
- out_buf = io.BytesIO()
301
- out_zip = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
302
- now = time.localtime()
303
-
304
- # mimetype
305
- if "mimetype" in base_zip.namelist():
306
- zi = zipfile.ZipInfo("mimetype")
307
- zi.compress_type = zipfile.ZIP_STORED
308
- zi.external_attr = 0o100666 << 16
309
- zi.create_system = 0
310
- zi.date_time = now[:6]
311
- out_zip.writestr(zi, base_zip.read("mimetype"))
312
-
313
- # ์„น์…˜ XML ์ˆ˜์ง‘
314
- base_sections, add_sections = {}, {}
315
- for fn in base_zip.namelist():
316
- if fn == "mimetype":
317
- continue
318
- if fn.startswith("Contents/section") and fn.endswith(".xml"):
319
- base_sections[fn] = base_zip.read(fn).decode("utf-8", errors="ignore")
320
- else:
321
- zi = zipfile.ZipInfo(fn)
322
- zi.compress_type = zipfile.ZIP_DEFLATED
323
- zi.external_attr = 0o100666 << 16
324
- zi.create_system = 0
325
- zi.date_time = now[:6]
326
- zi.flag_bits = 0
327
- out_zip.writestr(zi, base_zip.read(fn))
328
-
329
- for fn in add_zip.namelist():
330
- if fn.startswith("Contents/section") and fn.endswith(".xml"):
331
- add_sections[fn] = add_zip.read(fn).decode("utf-8", errors="ignore")
332
-
333
- # ์„น์…˜ ๋ณ‘ํ•ฉ
334
- merged_sections = merge_sections(base_sections, add_sections)
335
-
336
- # ๊ฒฐ๊ณผ ๊ธฐ๋ก
337
- for fn, content in merged_sections.items():
338
- zi = zipfile.ZipInfo(fn)
339
- zi.compress_type = zipfile.ZIP_DEFLATED
340
- zi.external_attr = 0o100666 << 16
341
- zi.create_system = 0
342
- zi.date_time = now[:6]
343
- zi.flag_bits = 0
344
- out_zip.writestr(zi, content.encode("utf-8"))
345
-
346
- base_zip.close()
347
- add_zip.close()
348
- out_zip.close()
349
- out_buf.seek(0)
350
- return out_buf.getvalue()
351
-
352
-
353
- def merge_sections(base_sections: dict, add_sections: dict) -> dict:
354
- merged = base_sections.copy()
355
- for fn, add_xml in add_sections.items():
356
- if fn in merged:
357
- merged[fn] = merge_section_content(merged[fn], add_xml)
358
- else:
359
- merged[fn] = add_xml
360
- return merged
361
-
362
-
363
- def merge_section_content(base_xml: str, add_xml: str) -> str:
364
- """
365
- 1) <*:pages>์— ์ƒˆ page ์—”ํŠธ๋ฆฌ ์ถ”๊ฐ€ (self-closing/์ผ๋ฐ˜ ๋ชจ๋‘)
366
- 2) ๋ณธ๋ฌธ(<*:p>) ๋์— pageBreak + ์ถ”๊ฐ€ ๋ฌธ๋‹จ ๋ถ™์ด๊ธฐ
367
- """
368
- # pages ๋ชฉ๋ก ํ•ฉ์น˜๊ธฐ
369
- pages_block_re = re.compile(
370
- r'<(?P<pfx>[a-zA-Z0-9_]+):pages\b[^>]*>(?P<body>.*?)</(?P=pfx):pages>',
371
- re.DOTALL,
372
- )
373
- m_base_pages = pages_block_re.search(base_xml)
374
- m_add_pages = pages_block_re.search(add_xml)
375
- if m_base_pages and m_add_pages:
376
- pfx = m_base_pages.group("pfx")
377
- body_base = m_base_pages.group("body")
378
- body_add = m_add_pages.group("body")
379
- add_entries = re.findall(
380
- rf'<{pfx}:page\b[^>]*/>|<{pfx}:page\b[^>]*>.*?</{pfx}:page>',
381
- body_add,
382
- re.DOTALL,
383
- )
384
- if add_entries:
385
- new_body = body_base + "".join(add_entries)
386
- base_xml = (
387
- base_xml[: m_base_pages.start("body")]
388
- + new_body
389
- + base_xml[m_base_pages.end("body") :]
390
- )
391
-
392
- # ๋ณธ๋ฌธ ๋ฌธ๋‹จ ํ•ฉ์น˜๊ธฐ
393
- para_re = re.compile(
394
- r'<(?P<pfx>[a-zA-Z0-9_]+):p\b[^>]*>.*?</(?P=pfx):p>', re.DOTALL
395
- )
396
- pfx_in_base = None
397
- m0 = para_re.search(base_xml)
398
- if m0:
399
- pfx_in_base = m0.group("pfx")
400
-
401
- add_paras = [m.group(0) for m in para_re.finditer(add_xml)]
402
- if add_paras and pfx_in_base:
403
- pagebreak_para = (
404
- f'<{pfx_in_base}:p><{pfx_in_base}:run>'
405
- f'<{pfx_in_base}:pageBreak/>'
406
- f'</{pfx_in_base}:run></{pfx_in_base}:p>'
407
- )
408
- section_end_re = re.compile(rf'</{pfx_in_base}:section>')
409
- m_end = section_end_re.search(base_xml)
410
- if m_end:
411
- insert_at = m_end.start()
412
- base_xml = (
413
- base_xml[:insert_at] + pagebreak_para + "".join(add_paras) + base_xml[insert_at:]
414
- )
415
- return base_xml
416
-
417
-
418
- # ====================== UI ======================
419
  with st.expander("์‚ฌ์šฉ๋ฒ•", expanded=True):
420
- st.markdown(
421
- """
422
- - **๋‹ค์ค‘ ์ค„(๋ชฉ๋ก/์ œ๋ชฉ/์—…๋ฌด๋ช…)** ์€ ์› ๋ฌธ๋‹จ ์Šคํƒ€์ผ์„ ์œ ์ง€ํ•œ ์ฑ„ **๋ถ€๋ชจ ๋ฌธ๋‹จ์„ ์ค„ ์ˆ˜๋งŒํผ ๋ณต์ œ**ํ•˜์—ฌ ๊ฒน์นจ ์—†์ด ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค.
423
- - ๋ฐ•์Šค๊ฐ€ ๋งŽ์•„๋„ **๋งˆ์ง€๋ง‰์— ํ•œ ๊ฐœ์˜ HWPX ํŒŒ์ผ**๋กœ ํ†ตํ•ฉํ•ด ๋‚ด๋ ค์ค๋‹ˆ๋‹ค.
424
- - ํ…œํ”Œ๋ฆฟ์€ ๋ฐ˜๋“œ์‹œ **.HWPX** ์—ฌ์•ผ ํ•ฉ๋‹ˆ๋‹ค. (.HWP ๋ถˆ๊ฐ€)
425
- """
426
- )
427
 
428
  tpl = st.file_uploader("๐Ÿ“„ HWPX ํ…œํ”Œ๋ฆฟ ์—…๋กœ๋“œ", type=["hwpx"])
429
  n_per_page = st.number_input("ํ…œํ”Œ๋ฆฟ์˜ ๋ผ๋ฒจ ์„ธํŠธ ๊ฐœ์ˆ˜(ํ•œ ํŽ˜์ด์ง€ N๊ฐœ)", 1, 12, 3, 1)
430
- data = st.file_uploader("๐Ÿ“Š ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ (Excel/CSV)", type=["xlsx", "xls", "csv"])
431
 
432
  if tpl and data:
433
  tpl_bytes = tpl.read()
434
  df = pd.read_csv(data) if data.name.lower().endswith(".csv") else pd.read_excel(data)
435
 
436
  if "๋ฐ•์Šค๋ฒˆํ˜ธ" not in df.columns:
437
- st.error("โŒ ํ•„์ˆ˜ ์ปฌ๋Ÿผ '๋ฐ•์Šค๋ฒˆํ˜ธ'๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
438
- st.stop()
439
 
440
  st.success("โœ… ์œ„์น˜ ๋งคํ•‘ ์™„๋ฃŒ (์—‘๏ฟฝ๏ฟฝ๏ฟฝ ์ธก)")
441
  st.dataframe(df.head(10), use_container_width=True)
@@ -453,81 +270,55 @@ if tpl and data:
453
 
454
  # 1ํŽ˜์ด์ง€ ๋งคํ•‘ ํ”„๋ฆฌ๋ทฐ
455
  st.subheader("๐Ÿงช 1ํŽ˜์ด์ง€ ๋งคํ•‘ ํ”„๋ฆฌ๋ทฐ")
456
- keys = ["๋ฐ•์Šค๋ฒˆํ˜ธ", "์ข…๋ฃŒ์—ฐ๋„", "๋ณด์กด๊ธฐ๊ฐ„", "๋‹จ์œ„์—…๋ฌด", "๊ธฐ๋ก๋ฌผ์ฒ ", "๋ชฉ๋ก", "์ œ๋ชฉ", "์—…๋ฌด๋ช…"]
457
  mapping_preview = {}
458
  for i in range(int(n_per_page)):
459
  if i < len(records):
460
  r = records[i]
461
- mapping_preview.update(
462
- {
463
- f"๋ฐ•์Šค๋ฒˆํ˜ธ{i+1}": r.get("๋ฐ•์Šค๋ฒˆํ˜ธ", ""),
464
- f"์ข…๋ฃŒ์—ฐ๋„{i+1}": r.get("์ƒ์‚ฐ์—ฐ๋„", ""),
465
- f"๋ณด์กด๊ธฐ๊ฐ„{i+1}": r.get("๋ณด์กด๊ธฐ๊ฐ„", ""),
466
- f"๋‹จ์œ„์—…๋ฌด{i+1}": r.get("๋‹จ์œ„์—…๋ฌด", ""),
467
- f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}": r.get("๊ธฐ๋ก๋ฌผ์ฒ ", ""),
468
- f"๋ชฉ๋ก{i+1}": r.get("๋ชฉ๋ก", ""),
469
- f"์ œ๋ชฉ{i+1}": r.get("์ œ๋ชฉ", ""),
470
- f"์—…๋ฌด๋ช…{i+1}": r.get("์ œ๋ชฉ", ""), # ํ…œํ”Œ๋ฆฟ์ด '์—…๋ฌด๋ช…X'์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์–ด ๋™์‹œ ๋งคํ•‘
471
- }
472
- )
473
  else:
474
- for k in keys:
475
- mapping_preview[f"{k}{i+1}"] = ""
476
- st.dataframe(
477
- pd.DataFrame([{"ํ‚ค": k, "๊ฐ’ ์•ž๋ถ€๋ถ„": str(v)[:120]} for k, v in sorted(mapping_preview.items())]),
478
- use_container_width=True,
479
- height=320,
480
- )
481
 
482
- if st.button("๐Ÿš€ ํ†ตํ•ฉ HWPX ์ƒ์„ฑ (ํ•œ ํŒŒ์ผ๋กœ ๋‹ค์šด๋กœ๋“œ)"):
 
483
  pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
484
  debug_all = []
485
 
486
- merged_hwpx: bytes | None = None
487
-
488
  for p in range(pages):
489
- chunk = records[p * int(n_per_page) : (p + 1) * int(n_per_page)]
490
- mapping: Dict[str, str] = {}
491
  for i in range(int(n_per_page)):
492
  if i < len(chunk):
493
  r = chunk[i]
494
- mapping[f"๋ฐ•์Šค๋ฒˆํ˜ธ{i+1}"] = r.get("๋ฐ•์Šค๋ฒˆํ˜ธ", "")
495
- mapping[f"์ข…๋ฃŒ์—ฐ๋„{i+1}"] = r.get("์ƒ์‚ฐ์—ฐ๋„", "")
496
- mapping[f"๋ณด์กด๊ธฐ๊ฐ„{i+1}"] = r.get("๋ณด์กด๊ธฐ๊ฐ„", "")
497
- mapping[f"๋‹จ์œ„์—…๋ฌด{i+1}"] = r.get("๋‹จ์œ„์—…๋ฌด", "")
498
- mapping[f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}"] = r.get("๊ธฐ๋ก๋ฌผ์ฒ ", "")
499
- mapping[f"๋ชฉ๋ก{i+1}"] = r.get("๋ชฉ๋ก", "")
500
- title_val = r.get("์ œ๋ชฉ", "")
501
- mapping[f"์ œ๋ชฉ{i+1}"] = title_val
502
  mapping[f"์—…๋ฌด๋ช…{i+1}"] = title_val
503
  else:
504
- for k in keys:
505
- mapping[f"{k}{i+1}"] = ""
506
-
507
- if p == 0:
508
- merged_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
509
- else:
510
- page_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
511
- merged_hwpx = merge_hwpx_pages(merged_hwpx, page_hwpx)
512
-
513
- debug_all.append({"page": p + 1, "stats": dbg})
514
-
515
- # ํŒŒ์ผ๋ช…
516
- first_box = records[0].get("๋ฐ•์Šค๋ฒˆํ˜ธ", "0000") if records else "0000"
517
- last_box = records[-1].get("๋ฐ•์Šค๋ฒˆํ˜ธ", "0000") if records else "0000"
518
- filename = (
519
- f"labels_{first_box}to{last_box}.hwpx" if first_box != last_box else f"labels_{first_box}.hwpx"
520
- )
521
-
522
- st.download_button(
523
- "โฌ‡๏ธ ํ†ต๏ฟฝ๏ฟฝ HWPX ๋‹ค์šด๋กœ๋“œ",
524
- data=merged_hwpx,
525
- file_name=filename,
526
- mime="application/vnd.hancom.hwpx",
527
- )
528
- st.download_button(
529
- "โฌ‡๏ธ ๋””๋ฒ„๊ทธ(JSON)",
530
- data=json.dumps(debug_all, ensure_ascii=False, indent=2),
531
- file_name="debug.json",
532
- mime="application/json",
533
- )
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import io, zipfile, re, html, json
4
+ from typing import Dict, Tuple
5
 
6
+ st.set_page_config(page_title="๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ(HWPX) โ€” ๋ฌธ๋‹จ ๋‹จ์œ„ ์™„์ „ ์น˜ํ™˜", layout="wide")
7
+ st.title("๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ ์ž๋™ ์ƒ์„ฑ๊ธฐ โ€” HWPX ํ•„๋“œยทํ† ํฐยทํ…์ŠคํŠธ ์™„์ „ ์น˜ํ™˜(๋ฌธ๋‹จ ๋‹จ์œ„)")
8
 
9
+ # -------------------- ๋ฐ์ดํ„ฐ ์œ ํ‹ธ --------------------
 
 
 
 
 
10
  def _year_range(series: pd.Series) -> str:
11
  s = series.astype(str).fillna("")
12
  v = s[~s.isin(["", "0", "0000"])]
13
+ if v.empty: return "0000-0000"
 
14
  nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
15
+ if nums.empty: return "0000-0000"
 
16
  return f"{nums.min():04d}-{nums.max():04d}"
17
 
 
18
  def build_rows(df: pd.DataFrame) -> pd.DataFrame:
 
19
  df = df.copy()
20
  df["๋ฐ•์Šค๋ฒˆํ˜ธ"] = df["๋ฐ•์Šค๋ฒˆํ˜ธ"].astype(str).str.zfill(4)
21
  if "์ œ๋ชฉ" in df.columns:
22
  df["์ œ๋ชฉ"] = df["์ œ๋ชฉ"].astype(str)
23
 
24
+ # ์ƒ์‚ฐ์—ฐ๋„(๋ฒ”์œ„) = ์ข…๋ฃŒ์—ฐ๋„ ๊ทธ๋ฃน ๋ฒ”์œ„
25
  if "์ข…๋ฃŒ์—ฐ๋„" in df.columns:
26
  yr = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ")["์ข…๋ฃŒ์—ฐ๋„"].apply(_year_range).reset_index()
27
  yr.columns = ["๋ฐ•์Šค๋ฒˆํ˜ธ", "์ƒ์‚ฐ์—ฐ๋„"]
 
32
  has_mgmt = "๊ด€๋ฆฌ๋ฒˆํ˜ธ" in df.columns
33
  lists = []
34
  for b, g in df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ"):
35
+ lines = [f"- {r['๊ด€๋ฆฌ๋ฒˆํ˜ธ']} {r.get('์ œ๋ชฉ','')}" if has_mgmt else f"- {r.get('์ œ๋ชฉ','')}"
36
+ for _, r in g.iterrows()]
 
 
37
  lists.append({"๋ฐ•์Šค๋ฒˆํ˜ธ": b, "๋ชฉ๋ก": "\r\n".join(lines)})
38
  list_df = pd.DataFrame(lists)
39
 
40
  # ๋Œ€ํ‘œ ๋ฉ”ํƒ€
41
+ meta_cols = ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","์ œ๋ชฉ"]
42
  meta_exist = [c for c in meta_cols if c in df.columns]
43
+ meta = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ", as_index=False).first()[meta_exist] if meta_exist \
44
+ else pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": df["๋ฐ•์Šค๋ฒˆํ˜ธ"].unique()})
 
 
45
 
46
  merged = meta.merge(list_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left").merge(yr, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left")
47
  return merged
48
 
49
+ # -------------------- ์น˜ํ™˜ ์œ ํ‹ธ --------------------
 
 
 
50
  FIELD_PAIR_RE_TMPL = (
51
  r'<(?P<fprefix>[a-zA-Z0-9_]+):fieldBegin\b[^>]*\bname="{name}"[^>]*/>'
52
  r'(.*?)'
53
  r'<(?P=fprefix):fieldEnd\b[^>]*/>'
54
  )
 
55
  TOKEN_FMT = "{{{{{key}}}}}"
56
 
57
+ # ๋ฌธ๋‹จ(<*:p>) ํƒ์ƒ‰ ํŒจํ„ด
58
  PARA_RE = re.compile(
59
  r'<(?P<pprefix>[a-zA-Z0-9_]+):p(?P<pattrs>[^>]*)>(?P<pbody>.*?)</(?P=pprefix):p>',
60
+ re.DOTALL
 
 
 
 
 
 
61
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ # ์›๋ณธ run ์Šคํƒ€์ผ์„ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜
64
+ def _extract_run_style(body: str, pprefix: str) -> str:
65
+ """๋ฌธ๋‹จ ๋‚ด์šฉ์—์„œ ์ฒซ ๋ฒˆ์งธ run ์š”์†Œ์˜ ์Šคํƒ€์ผ์„ ์ถ”์ถœ"""
66
+ run_pattern = re.compile(
67
+ rf'<{pprefix}:run[^>]*>.*?</{pprefix}:run>',
68
+ re.DOTALL
69
+ )
70
+ match = run_pattern.search(body)
71
+ if match:
72
+ return match.group(0)
73
+ return f'<{pprefix}:run><{pprefix}:t><//{pprefix}:t></{pprefix}:run>'
74
+
75
+ # ๋ฌธ๋‹จ ํ•˜๋‚˜๋ฅผ ๊ฐ™์€ ์Šคํƒ€์ผ๋กœ ๋ณต์ œํ•ด์ฃผ๋Š” ํ—ฌํผ (์Šคํƒ€์ผ ๋ณด์กด)
76
+ def _make_para_with_style(pprefix: str, pattrs: str, text: str, original_run: str) -> str:
77
+ esc = html.escape("" if text is None else str(text))
78
+
79
+ # ์›๋ณธ run์—์„œ ํ…์ŠคํŠธ ๋ถ€๋ถ„๋งŒ ๊ต์ฒด
80
+ text_pattern = re.compile(rf'(<{pprefix}:t[^>]*>)[^<]*(</{pprefix}:t>)')
81
+ new_run = text_pattern.sub(rf'\g<1>{esc}\g<2>', original_run)
82
+
83
+ # ๋งŒ์•ฝ ํ…์ŠคํŠธ ๋…ธ๋“œ๊ฐ€ ์—†๋‹ค๋ฉด ๊ธฐ๋ณธ ํ˜•ํƒœ๋กœ
84
+ if new_run == original_run:
85
+ t_pattern = re.compile(rf'(<{pprefix}:run[^>]*>)(.*?)(</{pprefix}:run>)', re.DOTALL)
86
+ new_run = t_pattern.sub(rf'\g<1><{pprefix}:t>{esc}</{pprefix}:t>\g<3>', original_run)
87
+
88
+ return f'<{pprefix}:p{pattrs}>{new_run}</{pprefix}:p>'
89
 
90
  def _split_lines(val) -> list:
91
+ if val is None: return [""]
92
+ return str(val).replace("\r\n","\n").split("\n")
 
 
93
 
94
  def _replace_para_multiline(xml: str, key: str, value: str, dbg: dict) -> str:
95
  """
96
+ key๊ฐ€ ํฌํ•จ๋œ '๋ถ€๋ชจ ๋ฌธ๋‹จ ์ „์ฒด'๋ฅผ, ๊ฐ’์˜ ๊ฐ ์ค„์„ ๋‹ด์€ ์—ฌ๋Ÿฌ ๋ฌธ๋‹จ์œผ๋กœ ๊ต์ฒด.
97
+ ์›๋ณธ ์Šคํƒ€์ผ์„ ์œ ์ง€ํ•˜๋ฉด์„œ ๊ต์ฒด.
98
  """
99
  pair_pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(key)), re.DOTALL)
100
+ tnode_pat = re.compile(rf'<(?P<p>[a-zA-Z0-9_]+):t[^>]*>[^<]*{re.escape(key)}[^<]*</(?P=p):t>', re.DOTALL)
 
 
 
101
  token_str = TOKEN_FMT.format(key=key)
102
 
103
  def para_repl(m):
 
107
 
108
  lines = _split_lines(value)
109
  pprefix = m.group("pprefix")
110
+ pattrs = m.group("pattrs")
111
+
112
+ # ์›๋ณธ run ์Šคํƒ€์ผ ์ถ”์ถœ
113
+ original_run = _extract_run_style(body, pprefix)
114
+
115
+ # ๊ฐ ์ค„์— ๋Œ€ํ•ด ์›๋ณธ ์Šคํƒ€์ผ์„ ์œ ์ง€ํ•˜๋ฉด์„œ ์ƒˆ ๋ฌธ๋‹จ ์ƒ์„ฑ
116
+ new_paras = "".join(_make_para_with_style(pprefix, pattrs, ln, original_run) for ln in lines)
117
  dbg["para_hits"][key] = dbg["para_hits"].get(key, 0) + 1
118
  return new_paras
119
 
120
  xml2 = PARA_RE.sub(para_repl, xml)
121
  if xml2 != xml:
122
+ dbg["touched"] = True
123
  return xml2
124
 
 
125
  def _runs_plain(text: str) -> str:
126
  return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
127
 
 
128
  def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
129
  changed_any = False
130
 
131
+ # 0) ๋‹ค์ค‘์ค„ ํ‚ค๋Š” ๋จผ์ € "๋ถ€๋ชจ ๋ฌธ๋‹จ ๊ต์ฒด"๋กœ ์ฒ˜๋ฆฌ (์—…๋ฌด๋ช…์€ ์ œ์™ธํ•˜์—ฌ ํฐํŠธ ๋ฌธ์ œ ํ•ด๊ฒฐ)
132
+ multi_key = re.compile(r"^(๋ชฉ๋ก|list|์ œ๋ชฉ)\d+$", re.IGNORECASE)
133
  for k, v in mapping.items():
134
  if multi_key.match(k):
135
  xml_new = _replace_para_multiline(xml, k, v, dbg)
 
137
  xml = xml_new
138
  changed_any = True
139
 
140
+ # 1) ํ•„๋“œ์Œ(์ธ๋ผ์ธ) ์น˜ํ™˜ โ€” ๋‹จ์ผ์ค„ ํ‚ค๋งŒ
141
  for k, v in mapping.items():
142
  if multi_key.match(k):
143
  continue
 
149
  xml = xml_new
150
  changed_any = True
151
 
152
+ # 2) ์ˆœ์ˆ˜ ํ…์ŠคํŠธ ์ž๋ฆฌํ‘œ์‹œ์ž(<*:t>ํ‚ค</*:t>) ๋ถ€๋ถ„์น˜ํ™˜ โ€” ๋‹จ์ผ์ค„ ํ‚ค๋งŒ
153
  tnode_all = re.compile(
154
  r'(<(?P<prefix>[a-zA-Z0-9_]+):t[^>]*>)([^<]*?)</(?P=prefix):t>',
155
+ re.DOTALL
156
  )
157
  for k, v in mapping.items():
158
  if multi_key.match(k):
159
  continue
 
160
  def repl_tnode(m):
161
  text_node = m.group(3)
162
  if k not in text_node:
163
  return m.group(0)
164
  new_text = html.escape(text_node.replace(k, "" if v is None else str(v)))
165
  return f"{m.group(1)}{new_text}</{m.group('prefix')}:t>"
 
166
  xml2 = tnode_all.sub(repl_tnode, xml)
167
  if xml2 != xml:
168
  dbg["text_hits"][k] = dbg["text_hits"].get(k, 0) + 1
169
  xml = xml2
170
  changed_any = True
171
 
172
+ # 3) ํ† ํฐ ์น˜ํ™˜ โ€” ๋‹จ์ผ์ค„ ํ‚ค๋งŒ
173
  for k, v in mapping.items():
174
  if multi_key.match(k):
175
  continue
 
183
  dbg["files_touched"] = True
184
  return xml
185
 
186
+ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, dict]:
187
+ import stat, time
188
+ dbg = {"para_hits":{}, "field_hits":{}, "text_hits":{}, "token_hits":{}, "touched_files": []}
 
 
 
189
  zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
190
  out_buf = io.BytesIO()
191
  zout = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
192
 
193
+ # ํ˜„์žฌ ์‹œ๊ฐ„
194
  now = time.localtime()
195
 
196
+ # mimetype ๋ฌด์••์ถ• + ๋งจ์•ž
197
  names = zin.namelist()
198
  if "mimetype" in names:
199
  zi = zipfile.ZipInfo("mimetype")
200
  zi.compress_type = zipfile.ZIP_STORED
201
+ # ์™„์ „ํžˆ ์ƒˆ๋กœ์šด ZipInfo๋กœ ์ฝ๊ธฐ์ „์šฉ ๋ฐฉ์ง€
202
+ zi.external_attr = 0o100666 << 16 # ์ผ๋ฐ˜ ํŒŒ์ผ + ๋ชจ๋“  ๊ถŒํ•œ
203
+ zi.create_system = 0 # DOS/Windows
204
  zi.date_time = now[:6]
205
  zout.writestr(zi, zin.read("mimetype"))
206
 
 
212
  try:
213
  s = data.decode("utf-8", errors="ignore")
214
  before = s
215
+ s = _apply_to_xml(s, mapping, {"para_hits":dbg["para_hits"], "field_hits":dbg["field_hits"],
216
+ "text_hits":dbg["text_hits"], "token_hits":dbg["token_hits"],
217
+ "files_touched":False})
 
 
 
 
 
 
 
 
218
  if s != before:
219
  dbg["touched_files"].append(e.filename)
220
  data = s.encode("utf-8")
221
  except Exception:
222
  pass
223
+
224
+ # ์™„์ „ํžˆ ์ƒˆ๋กœ์šด ZipInfo ์ƒ์„ฑ์œผ๋กœ ์ฝ๊ธฐ์ „์šฉ ๋ฐฉ์ง€
225
  zi = zipfile.ZipInfo(e.filename)
226
  zi.compress_type = zipfile.ZIP_DEFLATED
227
+ zi.external_attr = 0o100666 << 16 # ์ผ๋ฐ˜ ํŒŒ์ผ + ๋ชจ๋“  ๊ถŒํ•œ
228
+ zi.create_system = 0 # DOS/Windows ์‹œ์Šคํ…œ
229
+ zi.date_time = now[:6] # ํ˜„์žฌ ์‹œ๊ฐ„
230
+ zi.flag_bits = 0 # ํŠน๋ณ„ํ•œ ํ”Œ๋ž˜๊ทธ ์—†์Œ
231
  zout.writestr(zi, data)
232
 
233
  zout.close()
 
235
  zin.close()
236
  return out_buf.getvalue(), dbg
237
 
238
+ # -------------------- UI --------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  with st.expander("์‚ฌ์šฉ๋ฒ•", expanded=True):
240
+ st.markdown("""
241
+ - **๋‹ค์ค‘ ์ค„(๋ชฉ๋ก/์ œ๋ชฉ)์€ ๋ถ€๋ชจ ๋ฌธ๋‹จ์„ ์—ฌ๋Ÿฌ ๋ฌธ๋‹จ์œผ๋กœ ๊ต์ฒด**ํ•˜์—ฌ ๊ฒน์นจ ์—†์ด ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค.
242
+ - **์—…๋ฌด๋ช…์€ ๋‹จ์ผ์ค„๋กœ ์ฒ˜๋ฆฌ**ํ•˜์—ฌ ์›๋ณธ ํฐํŠธ ์Šคํƒ€์ผ์„ ์œ ์ง€ํ•ฉ๋‹ˆ๋‹ค.
243
+ - **์ƒ์„ฑ๋œ HWPX ํŒŒ์ผ์˜ ์ฝ๊ธฐ์ „์šฉ ์†์„ฑ์ด ํ•ด์ œ**๋˜์–ด ํŽธ์ง‘ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.
244
+ """)
 
 
245
 
246
  tpl = st.file_uploader("๐Ÿ“„ HWPX ํ…œํ”Œ๋ฆฟ ์—…๋กœ๋“œ", type=["hwpx"])
247
  n_per_page = st.number_input("ํ…œํ”Œ๋ฆฟ์˜ ๋ผ๋ฒจ ์„ธํŠธ ๊ฐœ์ˆ˜(ํ•œ ํŽ˜์ด์ง€ N๊ฐœ)", 1, 12, 3, 1)
248
+ data = st.file_uploader("๐Ÿ“Š ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ (Excel/CSV)", type=["xlsx","xls","csv"])
249
 
250
  if tpl and data:
251
  tpl_bytes = tpl.read()
252
  df = pd.read_csv(data) if data.name.lower().endswith(".csv") else pd.read_excel(data)
253
 
254
  if "๋ฐ•์Šค๋ฒˆํ˜ธ" not in df.columns:
255
+ st.error("โŒ ํ•„์ˆ˜ ์ปฌ๋Ÿผ '๋ฐ•์Šค๋ฒˆํ˜ธ'๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."); st.stop()
 
256
 
257
  st.success("โœ… ์œ„์น˜ ๋งคํ•‘ ์™„๋ฃŒ (์—‘๏ฟฝ๏ฟฝ๏ฟฝ ์ธก)")
258
  st.dataframe(df.head(10), use_container_width=True)
 
270
 
271
  # 1ํŽ˜์ด์ง€ ๋งคํ•‘ ํ”„๋ฆฌ๋ทฐ
272
  st.subheader("๐Ÿงช 1ํŽ˜์ด์ง€ ๋งคํ•‘ ํ”„๋ฆฌ๋ทฐ")
273
+ keys = ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๋ชฉ๋ก","์ œ๋ชฉ","์—…๋ฌด๋ช…"]
274
  mapping_preview = {}
275
  for i in range(int(n_per_page)):
276
  if i < len(records):
277
  r = records[i]
278
+ mapping_preview.update({
279
+ f"๋ฐ•์Šค๋ฒˆํ˜ธ{i+1}": r.get("๋ฐ•์Šค๋ฒˆํ˜ธ",""),
280
+ f"์ข…๋ฃŒ์—ฐ๋„{i+1}": r.get("์ƒ์‚ฐ์—ฐ๋„",""),
281
+ f"๋ณด์กด๊ธฐ๊ฐ„{i+1}": r.get("๋ณด์กด๊ธฐ๊ฐ„",""),
282
+ f"๋‹จ์œ„์—…๋ฌด{i+1}": r.get("๋‹จ์œ„์—…๋ฌด",""),
283
+ f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}": r.get("๊ธฐ๋ก๋ฌผ์ฒ ",""),
284
+ f"๋ชฉ๋ก{i+1}": r.get("๋ชฉ๋ก",""),
285
+ f"์ œ๋ชฉ{i+1}": r.get("์ œ๋ชฉ",""),
286
+ f"์—…๋ฌด๋ช…{i+1}": r.get("์ œ๋ชฉ",""), # ํ…œํ”Œ๋ฆฟ์ด '์—…๋ฌด๋ช…1'์„ ์“ฐ๋Š” ๊ฒฝ์šฐ ๋Œ€์‘
287
+ })
 
 
288
  else:
289
+ for k in keys: mapping_preview[f"{k}{i+1}"] = ""
290
+ st.dataframe(pd.DataFrame([{"ํ‚ค":k, "๊ฐ’ ์•ž๋ถ€๋ถ„":str(v)[:120]} for k,v in sorted(mapping_preview.items())]),
291
+ use_container_width=True, height=320)
 
 
 
 
292
 
293
+ if st.button("๐Ÿš€ ๋ผ๋ฒจ ์ƒ์„ฑ (ํŽ˜์ด์ง€๋ณ„ HWPX ZIP)"):
294
+ mem = io.BytesIO(); zout = zipfile.ZipFile(mem, "w", zipfile.ZIP_DEFLATED)
295
  pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
296
  debug_all = []
297
 
 
 
298
  for p in range(pages):
299
+ chunk = records[p*int(n_per_page):(p+1)*int(n_per_page)]
300
+ mapping = {}
301
  for i in range(int(n_per_page)):
302
  if i < len(chunk):
303
  r = chunk[i]
304
+ mapping[f"๋ฐ•์Šค๋ฒˆํ˜ธ{i+1}"] = r.get("๋ฐ•์Šค๋ฒˆํ˜ธ","")
305
+ mapping[f"์ข…๋ฃŒ์—ฐ๋„{i+1}"] = r.get("์ƒ์‚ฐ์—ฐ๋„","")
306
+ mapping[f"๋ณด์กด๊ธฐ๊ฐ„{i+1}"] = r.get("๋ณด์กด๊ธฐ๊ฐ„","")
307
+ mapping[f"๋‹จ์œ„์—…๋ฌด{i+1}"] = r.get("๋‹จ์œ„์—…๋ฌด","")
308
+ mapping[f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}"] = r.get("๊ธฐ๋ก๋ฌผ์ฒ ","")
309
+ mapping[f"๋ชฉ๋ก{i+1}"] = r.get("๋ชฉ๋ก","")
310
+ title_val = r.get("์ œ๋ชฉ","")
311
+ mapping[f"์ œ๋ชฉ{i+1}"] = title_val
312
  mapping[f"์—…๋ฌด๋ช…{i+1}"] = title_val
313
  else:
314
+ for k in keys: mapping[f"{k}{i+1}"] = ""
315
+
316
+ out_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
317
+ debug_all.append({"page": p+1, "stats": dbg})
318
+ name = "_".join([r.get("๋ฐ•์Šค๋ฒˆํ˜ธ","") for r in chunk]) if chunk else f"empty_{p+1}"
319
+ zout.writestr(f"label_{name}.hwpx", out_hwpx)
320
+
321
+ zout.close(); mem.seek(0)
322
+ st.download_button("โฌ‡๏ธ ZIP ๋‹ค์šด๋กœ๋“œ", data=mem, file_name="labels_by_page.zip", mime="application/zip")
323
+ st.download_button("โฌ‡๏ธ ๋””๋ฒ„๊ทธ(JSON)", data=json.dumps(debug_all, ensure_ascii=False, indent=2),
324
+ file_name="debug.json", mime="application/json")