dohyune commited on
Commit
bd43ad1
ยท
verified ยท
1 Parent(s): 45aa168

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -138
app.py CHANGED
@@ -2,10 +2,10 @@ import streamlit as st
2
  import pandas as pd
3
  import io, zipfile, re, html, json
4
 
5
- st.set_page_config(page_title="๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ ์ž๋™ ์ƒ์„ฑ๊ธฐ (ํ•„๋“œ/ํ† ํฐ ์ž๋™๊ฐ์ง€)", layout="wide")
6
- st.title("๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ ์ž๋™ ์ƒ์„ฑ๊ธฐ (.HWPX โ€” ํ•„๋“œ/ํ† ํฐ ์ž๋™๊ฐ์ง€)")
7
 
8
- # =============== ๊ณตํ†ต ์œ ํ‹ธ ===============
9
  def compute_year_range(series: pd.Series) -> str:
10
  s = series.astype(str).fillna("")
11
  valid = s[~s.isin(["", "0", "0000"])]
@@ -29,14 +29,11 @@ def build_merged_df(df: pd.DataFrame) -> pd.DataFrame:
29
  else:
30
  prod_df = pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": df["๋ฐ•์Šค๋ฒˆํ˜ธ"].unique(), "์ƒ์‚ฐ์—ฐ๋„": "0000-0000"})
31
 
32
- # ๋ชฉ๋ก(๊ด€๋ฆฌ๋ฒˆํ˜ธ + ์ œ๋ชฉ)
33
  has_mgmt = "๊ด€๋ฆฌ๋ฒˆํ˜ธ" in df.columns
34
  list_rows = []
35
  for box, g in df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ"):
36
- if has_mgmt:
37
- lines = [f"- {r['๊ด€๋ฆฌ๋ฒˆํ˜ธ']} {r['์ œ๋ชฉ']}" for _, r in g.iterrows()]
38
- else:
39
- lines = [f"- {r['์ œ๋ชฉ']}" for _, r in g.iterrows()]
40
  list_rows.append({"๋ฐ•์Šค๋ฒˆํ˜ธ": box, "๋ชฉ๋ก": "\r\n".join(lines)})
41
  list_df = pd.DataFrame(list_rows)
42
 
@@ -47,7 +44,13 @@ def build_merged_df(df: pd.DataFrame) -> pd.DataFrame:
47
 
48
  return meta_df.merge(list_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left").merge(prod_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left")
49
 
50
- def _build_list_runs(text: str) -> str:
 
 
 
 
 
 
51
  if text is None: return ""
52
  lines = str(text).replace("\r\n", "\n").split("\n")
53
  parts = []
@@ -57,17 +60,12 @@ def _build_list_runs(text: str) -> str:
57
  parts.append(f"<hp:run><hp:t>{html.escape(ln)}</hp:t></hp:run>")
58
  return "".join(parts)
59
 
60
- def _build_plain_runs(text: str) -> str:
61
- return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
62
-
63
- # =============== HWPX ์“ฐ๊ธฐ ๊ณตํ†ต (mimetype ๋ฌด์••์ถ•/๋งจ์•ž) ===============
64
- def write_hwpx_like_src(zin: zipfile.ZipFile, writer_fn) -> bytes:
65
  out = io.BytesIO()
66
  zout = zipfile.ZipFile(out, "w")
67
- names = zin.namelist()
68
 
69
- # 1) mimetype ๋จผ์ € ๋ฌด์••์ถ•
70
- if "mimetype" in names:
71
  zi = zipfile.ZipInfo("mimetype")
72
  zi.compress_type = zipfile.ZIP_STORED
73
  zout.writestr(zi, zin.read("mimetype"))
@@ -91,129 +89,63 @@ def write_hwpx_like_src(zin: zipfile.ZipFile, writer_fn) -> bytes:
91
  zout.close(); out.seek(0)
92
  return out.getvalue()
93
 
94
- # =============== ๋ชจ๋“œ1: ํ† ํฐ ์น˜ํ™˜ ({{ํ‚ค}}) ===============
95
- RUN_JOIN_RE = re.compile(r'</hp:t>\s*</hp:run>\s*<hp:run[^>]*>\s*<hp:t>', re.DOTALL)
96
-
97
- def token_mode_apply(hwpx_bytes: bytes, mapping: dict, collect_debug=False):
98
- dbg = {"mode":"token","files_touched":[], "token_hits":{}} if collect_debug else None
99
- zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
100
-
101
- token_keys = list(mapping.keys())
102
-
103
- def writer_fn(fname: str, xml: str) -> str:
104
- changed = False
105
- # run ๊ฒฝ๊ณ„ ๋ณ‘ํ•ฉ (ํ† ํฐ์ด ์ž˜๋ ค ์žˆ์–ด๋„ ์ด์–ด๋ถ™์ด๊ธฐ)
106
- xml2 = RUN_JOIN_RE.sub('', xml)
107
- if xml2 != xml:
108
- changed = True
109
- xml = xml2
110
- # ํ† ํฐ ๋ฌธ์ž์—ด ์น˜ํ™˜
111
- for k in token_keys:
112
- tok = f"{{{{{k}}}}}"
113
- if tok in xml:
114
- val = mapping[k]
115
- if re.match(r"^(๋ชฉ๋ก|list)\d+$", k, re.IGNORECASE):
116
- # ํ† ํฐ์€ run ์•ˆ์— ๋“ค์–ด๊ฐ€ ์žˆ์œผ๋ฏ€๋กœ, run ๊ตฌ์กฐ๋ฅผ ํ†ต์งธ๋กœ ์ƒ์„ฑ
117
- xml = xml.replace(tok, _build_list_runs(val))
118
- else:
119
- xml = xml.replace(tok, html.escape("" if val is None else str(val)))
120
- changed = True
121
- if dbg: dbg["token_hits"][k] = dbg["token_hits"].get(k, 0) + 1
122
- if changed and dbg and fname not in dbg["files_touched"]:
123
- dbg["files_touched"].append(fname)
124
- return xml
125
-
126
- out = write_hwpx_like_src(zin, writer_fn)
127
- zin.close()
128
- return (out, dbg) if collect_debug else (out, None)
129
-
130
- # =============== ๋ชจ๋“œ2: ํ•„๋“œ์ปจํŠธ๋กค ์น˜ํ™˜ (๊ฐ€์‹œ ํ…์ŠคํŠธ ์ค‘๋ณต ์‚ฝ์ž…) ===============
131
- # <hp:fieldBegin ... name="ํ‚ค"> ... </hp:fieldBegin> [๋ณธ๋ฌธ] <hp:fieldEnd ... />
132
- FIELD_BLOCK_RE_TMPL = r'(<hp:fieldBegin[^>]*name="{name}"[^>]*>.*?</hp:fieldBegin>)(.*?)(<hp:fieldEnd[^>]*/>)'
133
-
134
- def field_mode_apply(hwpx_bytes: bytes, mapping: dict, collect_debug=False):
135
  dbg = {"mode":"field","files_touched":[], "field_hits":{}} if collect_debug else None
136
  zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
137
 
138
- # ์–ด๋–ค ํ‚ค๋“ค์ด ์‹ค์ œ๋กœ ์กด์žฌํ•˜๋Š”์ง€ ๋น ๋ฅด๊ฒŒ ์ˆ˜์ง‘ (์ •ํ™•๋„โ†‘, ์†๋„โ†‘)
139
- contents = [e.filename for e in zin.infolist() if e.filename.startswith("Contents/") and e.filename.endswith(".xml")]
140
- present_keys = set()
141
- for fn in contents:
142
- try:
143
- s = zin.read(fn).decode("utf-8", errors="ignore")
144
  for k in mapping.keys():
145
  if f'name="{k}"' in s:
146
- present_keys.add(k)
147
- except:
148
- pass
149
 
150
- def writer_fn(fname: str, xml: str) -> str:
151
- any_change = False
152
- for k in present_keys:
153
  val = mapping.get(k, "")
154
  is_list = bool(re.match(r"^(๋ชฉ๋ก|list)\d+$", k, re.IGNORECASE))
155
- pattern = re.compile(FIELD_BLOCK_RE_TMPL.format(name=re.escape(k)), re.DOTALL)
 
156
 
157
  def _repl(m):
158
- # ํ•„๋“œ ๋‚ด๋ถ€ ๋‚ด์šฉ์€ run์œผ๋กœ ๊ต์ฒด
159
- inner = _build_list_runs(val) if is_list else _build_plain_runs(val)
160
- # ํ•„๋“œ ๋’ค์— ๊ฐ€์‹œ ํ…์ŠคํŠธ๋ฅผ 'ํ•œ ๋ฒˆ ๋”' ๋„ฃ์–ด ํ•ญ์ƒ ๋ณด์ด๊ฒŒ
161
- visible_dup = inner
162
  if dbg: dbg["field_hits"][k] = dbg["field_hits"].get(k, 0) + 1
163
- return f'{m.group(1)}{inner}{m.group(3)}{visible_dup}'
164
 
165
- xml_new, n = pattern.subn(_repl, xml)
166
  if n:
167
- any_change = True
168
- xml = xml_new
169
-
170
- if any_change and dbg and fname not in dbg["files_touched"]:
171
  dbg["files_touched"].append(fname)
172
  return xml
173
 
174
- out = write_hwpx_like_src(zin, writer_fn)
175
  zin.close()
176
  return (out, dbg) if collect_debug else (out, None)
177
 
178
- # =============== ๋ชจ๋“œ ์ž๋™๊ฐ์ง€ ===============
179
- def detect_template_mode(hwpx_bytes: bytes) -> str:
180
- zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
181
- has_token = False
182
- has_field = False
183
- for e in zin.infolist():
184
- if not (e.filename.startswith("Contents/") and e.filename.endswith(".xml")):
185
- continue
186
- try:
187
- s = zin.read(e.filename).decode("utf-8", errors="ignore")
188
- if "{{" in s and "}}" in s:
189
- has_token = True
190
- if "<hp:fieldBegin" in s and 'name="' in s:
191
- has_field = True
192
- except:
193
- pass
194
- zin.close()
195
- if has_token: return "token"
196
- if has_field: return "field"
197
- return "unknown"
198
-
199
- # =============== Streamlit UI ===============
200
- with st.expander("์‚ฌ์šฉ ๋ฐฉ๋ฒ• ์š”์•ฝ", expanded=True):
201
  st.markdown("""
202
- - ํ…œํ”Œ๋ฆฟ์ด **ํ† ํฐ(`{{๋ฐ•์Šค๋ฒˆํ˜ธ1}}` ๋“ฑ)** ์ด๋ฉด ์ž๋™์œผ๋กœ ํ† ํฐ ๋ชจ๋“œ,
203
- **ํ•œ๊ธ€ ํ•„๋“œ์ปจํŠธ๋กค(`name="๋ฐ•์Šค๋ฒˆํ˜ธ1"`)** ์ด๋ฉด ํ•„๋“œ ๋ชจ๋“œ๋กœ ์ž๋™ ์ฒ˜๋ฆฌํ•ฉ๋‹ˆ๋‹ค.
204
- - ํ•„๋“œ ๋ชจ๋“œ์—์„œ๋Š” ๊ฐ’์ด ์•ˆ ๋ณด์ด๋Š” ๋ฌธ์ œ๋ฅผ ๋ง‰๊ธฐ ์œ„ํ•ด **fieldEnd ๋’ค์— ๊ฐ€์‹œ ํ…์ŠคํŠธ๋ฅผ ํ•œ ๋ฒˆ ๋” ๋„ฃ์Šต๋‹ˆ๋‹ค.**
205
  """)
206
 
207
- tpl = st.file_uploader("๐Ÿ“„ HWPX ํ…œํ”Œ๋ฆฟ ์—…๋กœ๋“œ", type=["hwpx"])
208
  batch_size = st.number_input("ํ…œํ”Œ๋ฆฟ์˜ ๋ผ๋ฒจ ์„ธํŠธ ๊ฐœ์ˆ˜ (ํ•œ ํŽ˜์ด์ง€ N๊ฐœ)", min_value=1, max_value=12, value=3, step=1)
209
- data = st.file_uploader("๐Ÿ“Š ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ (Excel/CSV)", type=["xlsx","xls","csv"])
210
 
211
- if tpl and data:
212
- tpl_bytes = tpl.read()
213
- mode = detect_template_mode(tpl_bytes)
214
- st.info(f"ํƒ์ง€๋œ ํ…œํ”Œ๋ฆฟ ๋ชจ๋“œ: **{mode}**")
215
 
216
- df = pd.read_csv(data) if data.name.lower().endswith(".csv") else pd.read_excel(data)
217
  if "๋ฐ•์Šค๋ฒˆํ˜ธ" not in df.columns:
218
  st.error("โŒ ํ•„์ˆ˜ ์ปฌ๋Ÿผ '๋ฐ•์Šค๋ฒˆํ˜ธ'๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
219
  st.stop()
@@ -232,11 +164,11 @@ if tpl and data:
232
  work = merged[merged["๋ฐ•์Šค๋ฒˆํ˜ธ"].isin(selected)] if selected else merged
233
  rows = work.sort_values("๋ฐ•์Šค๋ฒˆํ˜ธ").to_dict(orient="records")
234
 
235
- # 1ํŽ˜์ด์ง€ ๋ฏธ๋ฆฌ๋ณด๊ธฐ ๋งคํ•‘
236
- st.subheader("๐Ÿงช 1ํŽ˜์ด์ง€ ๋งคํ•‘ ํ”„๋ฆฌ๋ทฐ")
237
  keys = ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๋ชฉ๋ก"]
238
- preview = {}
239
  n = int(batch_size)
 
240
  for i in range(n):
241
  if i < len(rows):
242
  r = rows[i]
@@ -245,17 +177,15 @@ if tpl and data:
245
  else:
246
  for k in keys:
247
  preview[f"{k}{i+1}"] = ""
248
- st.dataframe(
249
- pd.DataFrame([{"ํ† ํฐ/ํ•„๋“œ":k, "๊ฐ’ ์•ž๋ถ€๋ถ„":str(v)[:120]} for k,v in sorted(preview.items())]),
250
- use_container_width=True, height=320
251
- )
252
 
253
  if st.button("๐Ÿš€ ๋ผ๋ฒจ ์ƒ์„ฑ (ํŽ˜์ด์ง€๋ณ„ HWPX ZIP)"):
254
  mem_zip = io.BytesIO()
255
  zout = zipfile.ZipFile(mem_zip, "w", zipfile.ZIP_DEFLATED)
 
256
  pages = (len(rows) + n - 1) // n
257
  all_dbg = []
258
-
259
  for p in range(pages):
260
  chunk = rows[p*n:(p+1)*n]
261
  mapping = {}
@@ -268,24 +198,14 @@ if tpl and data:
268
  for k in keys:
269
  mapping[f"{k}{i+1}"] = ""
270
 
271
- if mode == "token":
272
- out, dbg = token_mode_apply(tpl_bytes, mapping, collect_debug=True)
273
- elif mode == "field":
274
- out, dbg = field_mode_apply(tpl_bytes, mapping, collect_debug=True)
275
- else:
276
- # ์•ˆ์ „๋นต: ๋‘˜ ๋‹ค ์‹œ๋„ (token -> field)
277
- out, dbg = token_mode_apply(tpl_bytes, mapping, collect_debug=True)
278
- if dbg and not dbg["files_touched"]:
279
- out, dbg = field_mode_apply(tpl_bytes, mapping, collect_debug=True)
280
-
281
- all_dbg.append({"page": p+1, "mode": dbg.get("mode") if dbg else mode, "stats": dbg})
282
- page_boxes = [r.get("๋ฐ•์Šค๋ฒˆํ˜ธ","") for r in chunk]
283
- name = "_".join(page_boxes) if page_boxes else f"empty_{p+1}"
284
- zout.writestr(f"label_{name}.hwpx", out)
285
 
286
  zout.close(); mem_zip.seek(0)
287
  st.download_button("โฌ‡๏ธ ZIP ๋‹ค์šด๋กœ๋“œ", data=mem_zip, file_name="labels_by_page.zip", mime="application/zip")
288
  st.download_button("โฌ‡๏ธ ๋””๋ฒ„๊ทธ(JSON)", data=json.dumps(all_dbg, ensure_ascii=False, indent=2),
289
  file_name="debug.json", mime="application/json")
290
 
291
- st.caption("ํ•„๋“œ ๋ชจ๋“œ: ๊ฐ’์€ ํ•„๋“œ ๋‚ด๋ถ€ + fieldEnd ๋’ค์— ์ผ๋ฐ˜ ํ…์ŠคํŠธ๋กœ ํ•œ ๋ฒˆ ๋” ๋„ฃ์Šต๋‹ˆ๋‹ค(ํ•ญ์ƒ ๋ณด์ด๋„๋ก). ํ† ํฐ ๋ชจ๋“œ: run ๋ณ‘ํ•ฉ ํ›„ ์น˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
 
2
  import pandas as pd
3
  import io, zipfile, re, html, json
4
 
5
+ st.set_page_config(page_title="๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ ์ž๋™ ์ƒ์„ฑ๊ธฐ (HWPX ํ•„๋“œ ์ „์šฉ)", layout="wide")
6
+ st.title("๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ ์ž๋™ ์ƒ์„ฑ๊ธฐ โ€” HWPX **ํ•„๋“œ์ปจํŠธ๋กค ์ „์šฉ**")
7
 
8
+ # ---------- ๋ฐ์ดํ„ฐ ์ „์ฒ˜๋ฆฌ ----------
9
  def compute_year_range(series: pd.Series) -> str:
10
  s = series.astype(str).fillna("")
11
  valid = s[~s.isin(["", "0", "0000"])]
 
29
  else:
30
  prod_df = pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": df["๋ฐ•์Šค๋ฒˆํ˜ธ"].unique(), "์ƒ์‚ฐ์—ฐ๋„": "0000-0000"})
31
 
32
+ # ๋ชฉ๋ก(๊ด€๋ฆฌ๋ฒˆํ˜ธ+์ œ๋ชฉ)
33
  has_mgmt = "๊ด€๋ฆฌ๋ฒˆํ˜ธ" in df.columns
34
  list_rows = []
35
  for box, g in df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ"):
36
+ lines = [f"- {r['๊ด€๋ฆฌ๋ฒˆํ˜ธ']} {r['์ œ๋ชฉ']}" if has_mgmt else f"- {r['์ œ๋ชฉ']}" for _, r in g.iterrows()]
 
 
 
37
  list_rows.append({"๋ฐ•์Šค๋ฒˆํ˜ธ": box, "๋ชฉ๋ก": "\r\n".join(lines)})
38
  list_df = pd.DataFrame(list_rows)
39
 
 
44
 
45
  return meta_df.merge(list_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left").merge(prod_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left")
46
 
47
+ # ---------- HWPX ํ•„๋“œ ์น˜ํ™˜ ----------
48
+ FIELD_BLOCK_RE_TMPL = r'(<hp:fieldBegin[^>]*name="{name}"[^>]*>.*?</hp:fieldBegin>)(.*?)(<hp:fieldEnd[^>]*/>)'
49
+
50
+ def _runs_plain(text: str) -> str:
51
+ return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
52
+
53
+ def _runs_list(text: str) -> str:
54
  if text is None: return ""
55
  lines = str(text).replace("\r\n", "\n").split("\n")
56
  parts = []
 
60
  parts.append(f"<hp:run><hp:t>{html.escape(ln)}</hp:t></hp:run>")
61
  return "".join(parts)
62
 
63
+ def _write_hwpx_like_src(zin: zipfile.ZipFile, writer_fn) -> bytes:
 
 
 
 
64
  out = io.BytesIO()
65
  zout = zipfile.ZipFile(out, "w")
 
66
 
67
+ # 1) mimetype: ๋ฌด์••์ถ• + ๋งจ ์•ž
68
+ if "mimetype" in zin.namelist():
69
  zi = zipfile.ZipInfo("mimetype")
70
  zi.compress_type = zipfile.ZIP_STORED
71
  zout.writestr(zi, zin.read("mimetype"))
 
89
  zout.close(); out.seek(0)
90
  return out.getvalue()
91
 
92
+ def apply_field_mode(hwpx_bytes: bytes, mapping: dict, collect_debug=False):
93
+ """ ํ•œ๊ธ€ ํ•„๋“œ์ปจํŠธ๋กค(name=...)์„ ๊ฐ’์œผ๋กœ ์ฑ„์šฐ๊ณ ,
94
+ fieldEnd ๋’ค์— ๋™์ผ ๋‚ด์šฉ์„ '๊ฐ€์‹œ ํ…์ŠคํŠธ'๋กœ ํ•œ ๋ฒˆ ๋” ์‚ฝ์ž…ํ•˜์—ฌ ํ•ญ์ƒ ๋ณด์ด๊ฒŒ ํ•œ๋‹ค. """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  dbg = {"mode":"field","files_touched":[], "field_hits":{}} if collect_debug else None
96
  zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
97
 
98
+ # ์‹ค์ œ๋กœ ์กด์žฌํ•˜๋Š” name๋งŒ ๊ณจ๋ผ๋‚ด๊ธฐ(์†๋„โ†‘)
99
+ present = set()
100
+ for e in zin.infolist():
101
+ if e.filename.startswith("Contents/") and e.filename.endswith(".xml"):
102
+ s = zin.read(e.filename).decode("utf-8", errors="ignore")
 
103
  for k in mapping.keys():
104
  if f'name="{k}"' in s:
105
+ present.add(k)
 
 
106
 
107
+ def writer(fname: str, xml: str) -> str:
108
+ changed = False
109
+ for k in present:
110
  val = mapping.get(k, "")
111
  is_list = bool(re.match(r"^(๋ชฉ๋ก|list)\d+$", k, re.IGNORECASE))
112
+ inner = _runs_list(val) if is_list else _runs_plain(val)
113
+ pat = re.compile(FIELD_BLOCK_RE_TMPL.format(name=re.escape(k)), re.DOTALL)
114
 
115
  def _repl(m):
116
+ # ํ•„๋“œ ๋‚ด๋ถ€ ์ฑ„์šฐ๊ณ , fieldEnd ๋’ค์— ๊ฐ€์‹œ ํ…์ŠคํŠธ ํ•œ ๋ฒˆ ๋”
117
+ vis = inner
 
 
118
  if dbg: dbg["field_hits"][k] = dbg["field_hits"].get(k, 0) + 1
119
+ return f"{m.group(1)}{inner}{m.group(3)}{vis}"
120
 
121
+ xml2, n = pat.subn(_repl, xml)
122
  if n:
123
+ changed = True
124
+ xml = xml2
125
+ if changed and dbg and fname not in dbg["files_touched"]:
 
126
  dbg["files_touched"].append(fname)
127
  return xml
128
 
129
+ out = _write_hwpx_like_src(zin, writer)
130
  zin.close()
131
  return (out, dbg) if collect_debug else (out, None)
132
 
133
+ # ---------- UI ----------
134
+ with st.expander("์‚ฌ์šฉ๋ฒ•", expanded=True):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  st.markdown("""
136
+ - ํ…œํ”Œ๋ฆฟ์€ **ํ•œ๊ธ€ ํ•„๋“œ์ปจํŠธ๋กค**(์˜ˆ: `name="๋ฐ•์Šค๋ฒˆํ˜ธ1"`)์ด์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. (ํ˜„์žฌ ์•ฑ์€ ํ•„๋“œ ๋ชจ๋“œ ์ „์šฉ)
137
+ - ํ•œ ํŽ˜์ด์ง€์— ๋ผ๋ฒจ N๊ฐœ๋ผ๋ฉด, ํ•„๋“œ ์ด๋ฆ„์€ `๋ฐ•์Šค๋ฒˆํ˜ธ1..N`, `์ข…๋ฃŒ์—ฐ๋„1..N`, `๋ณด์กด๊ธฐ๊ฐ„1..N`, `๋‹จ์œ„์—…๋ฌด1..N`, `๊ธฐ๋ก๋ฌผ์ฒ 1..N`, `๋ชฉ๋ก1..N` ํ˜•ํƒœ์—ฌ์•ผ ํ•ฉ๋‹ˆ๋‹ค.
138
+ - `์ข…๋ฃŒ์—ฐ๋„` ๊ฐ’์€ ๋ฐ•์Šค๋ณ„ ์ตœ์†Œ~์ตœ๋Œ€๋กœ ๋ฌถ์–ด **์ƒ์‚ฐ์—ฐ๋„(๋ฒ”์œ„)** ๋กœ ์ž…๋ ฅ๋ฉ๋‹ˆ๋‹ค.
139
  """)
140
 
141
+ tpl_file = st.file_uploader("๐Ÿ“„ HWPX ํ…œํ”Œ๋ฆฟ ์—…๋กœ๋“œ", type=["hwpx"])
142
  batch_size = st.number_input("ํ…œํ”Œ๋ฆฟ์˜ ๋ผ๋ฒจ ์„ธํŠธ ๊ฐœ์ˆ˜ (ํ•œ ํŽ˜์ด์ง€ N๊ฐœ)", min_value=1, max_value=12, value=3, step=1)
143
+ data_file = st.file_uploader("๐Ÿ“Š ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ (Excel/CSV)", type=["xlsx","xls","csv"])
144
 
145
+ if tpl_file and data_file:
146
+ tpl_bytes = tpl_file.read()
147
+ df = pd.read_csv(data_file) if data_file.name.lower().endswith(".csv") else pd.read_excel(data_file)
 
148
 
 
149
  if "๋ฐ•์Šค๋ฒˆํ˜ธ" not in df.columns:
150
  st.error("โŒ ํ•„์ˆ˜ ์ปฌ๋Ÿผ '๋ฐ•์Šค๋ฒˆํ˜ธ'๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
151
  st.stop()
 
164
  work = merged[merged["๋ฐ•์Šค๋ฒˆํ˜ธ"].isin(selected)] if selected else merged
165
  rows = work.sort_values("๋ฐ•์Šค๋ฒˆํ˜ธ").to_dict(orient="records")
166
 
167
+ # 1ํŽ˜์ด์ง€ ๋งคํ•‘ ํ”„๋ฆฌ๋ทฐ
168
+ st.subheader("๐Ÿงช 1ํŽ˜์ด์ง€ ๋งคํ•‘ ํ”„๋ฆฌ๋ทฐ (ํ•„๋“œ ์ด๋ฆ„ โ†” ๊ฐ’)")
169
  keys = ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๋ชฉ๋ก"]
 
170
  n = int(batch_size)
171
+ preview = {}
172
  for i in range(n):
173
  if i < len(rows):
174
  r = rows[i]
 
177
  else:
178
  for k in keys:
179
  preview[f"{k}{i+1}"] = ""
180
+ st.dataframe(pd.DataFrame([{"ํ•„๋“œ๋ช…":k, "๊ฐ’ ์•ž๋ถ€๋ถ„":str(v)[:120]} for k,v in sorted(preview.items())]),
181
+ use_container_width=True, height=320)
 
 
182
 
183
  if st.button("๐Ÿš€ ๋ผ๋ฒจ ์ƒ์„ฑ (ํŽ˜์ด์ง€๋ณ„ HWPX ZIP)"):
184
  mem_zip = io.BytesIO()
185
  zout = zipfile.ZipFile(mem_zip, "w", zipfile.ZIP_DEFLATED)
186
+
187
  pages = (len(rows) + n - 1) // n
188
  all_dbg = []
 
189
  for p in range(pages):
190
  chunk = rows[p*n:(p+1)*n]
191
  mapping = {}
 
198
  for k in keys:
199
  mapping[f"{k}{i+1}"] = ""
200
 
201
+ out_hwpx, dbg = apply_field_mode(tpl_bytes, mapping, collect_debug=True)
202
+ all_dbg.append({"page": p+1, "stats": dbg})
203
+ name = "_".join([r.get("๋ฐ•์Šค๋ฒˆํ˜ธ","") for r in chunk]) if chunk else f"empty_{p+1}"
204
+ zout.writestr(f"label_{name}.hwpx", out_hwpx)
 
 
 
 
 
 
 
 
 
 
205
 
206
  zout.close(); mem_zip.seek(0)
207
  st.download_button("โฌ‡๏ธ ZIP ๋‹ค์šด๋กœ๋“œ", data=mem_zip, file_name="labels_by_page.zip", mime="application/zip")
208
  st.download_button("โฌ‡๏ธ ๋””๋ฒ„๊ทธ(JSON)", data=json.dumps(all_dbg, ensure_ascii=False, indent=2),
209
  file_name="debug.json", mime="application/json")
210
 
211
+ st.caption("ํ•„๋“œ ๋‚ด๋ถ€์— ๊ฐ’ + fieldEnd ๋’ค์— ๊ฐ€์‹œ ํ…์ŠคํŠธ๋ฅผ **์ค‘๋ณต ์‚ฝ์ž…**ํ•˜๋ฏ€๋กœ, ํ•œ๊ธ€์—์„œ ๊ฐ’์ด ํ•ญ์ƒ ๋ณด์ž…๋‹ˆ๋‹ค.")