dohyune commited on
Commit
4d98592
ยท
verified ยท
1 Parent(s): f2a332f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +195 -110
app.py CHANGED
@@ -1,50 +1,93 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import io, zipfile, re, html
4
-
5
- st.set_page_config(page_title="๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ ์ž๋™ ์ƒ์„ฑ๊ธฐ", layout="wide")
6
-
7
- # =========================================================
8
- # HWPX ํ† ํฐ ์น˜ํ™˜ ํ•จ์ˆ˜
9
- # =========================================================
10
- def replace_tokens_in_hwpx(hwpx_bytes: bytes, mapping: dict, collect_debug: bool=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  """
12
- - {{ํ† ํฐ}} ๋ฌธ์ž์—ด์„ ์ง์ ‘ ์น˜ํ™˜
13
- - ๋ชฉ๋ก(list) ๊ณ„์—ด์€ ์ค„๋ฐ”๊ฟˆ์„ <hp:lineBreak/> ๋กœ ์ฒ˜๋ฆฌ
14
- - collect_debug=True ์‹œ debug_info ๋ฐ˜ํ™˜
 
 
15
  """
16
- debug_info = {"token_hits": {}, "files_touched": []} if collect_debug else None
17
 
18
  zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
19
  mem_out = io.BytesIO()
20
  zout = zipfile.ZipFile(mem_out, "w")
21
 
22
- def build_runs(value: str, is_list: bool) -> str:
23
- if value is None:
24
- return ""
25
- text = str(value)
26
- if not is_list:
27
- return html.escape(text)
28
- # ์ค„๋ฐ”๊ฟˆ์€ ํ•œ๊ธ€์—์„œ <hp:lineBreak/> ํ•„์š”
29
- lines = text.replace("\r\n", "\n").split("\n")
30
- parts = []
31
- for i, ln in enumerate(lines):
32
- if i > 0:
33
- parts.append("<hp:lineBreak/>")
34
- parts.append(html.escape(ln))
35
- return "".join(parts)
36
-
37
- def repl_xml(xml_text: str, kv: dict) -> str:
38
- for k, v in kv.items():
39
- is_list = bool(re.match(r"^(๋ชฉ๋ก|list)\d*$", k, flags=re.IGNORECASE))
40
- token = f"{{{{{k}}}}}" # e.g. {{๋ฐ•์Šค๋ฒˆํ˜ธ1}}
41
- if token in xml_text:
42
- xml_text = xml_text.replace(token, build_runs(v, is_list))
43
- if collect_debug:
44
- debug_info["token_hits"][k] = debug_info["token_hits"].get(k, 0) + 1
45
- return xml_text
46
-
47
- # mimetype์€ ๋ฌด์••์ถ• + ์ œ์ผ ๋จผ์ € ๊ธฐ๋ก
48
  names = zin.namelist()
49
  if "mimetype" in names:
50
  data = zin.read("mimetype")
@@ -52,6 +95,26 @@ def replace_tokens_in_hwpx(hwpx_bytes: bytes, mapping: dict, collect_debug: bool
52
  zi.compress_type = zipfile.ZIP_STORED
53
  zout.writestr(zi, data)
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  for e in zin.infolist():
56
  if e.filename == "mimetype":
57
  continue
@@ -59,9 +122,9 @@ def replace_tokens_in_hwpx(hwpx_bytes: bytes, mapping: dict, collect_debug: bool
59
  if e.filename.startswith("Contents/") and e.filename.endswith(".xml"):
60
  try:
61
  s = data.decode("utf-8", errors="ignore")
62
- s2 = repl_xml(s, mapping)
63
- if collect_debug and s2 != s:
64
- debug_info["files_touched"].append(e.filename)
65
  data = s2.encode("utf-8")
66
  except Exception:
67
  pass
@@ -69,78 +132,100 @@ def replace_tokens_in_hwpx(hwpx_bytes: bytes, mapping: dict, collect_debug: bool
69
  zi.compress_type = zipfile.ZIP_DEFLATED
70
  zout.writestr(zi, data)
71
 
72
- zin.close()
73
- zout.close()
74
- mem_out.seek(0)
75
- return (mem_out.getvalue(), debug_info) if collect_debug else mem_out.getvalue()
76
-
77
 
78
- # =========================================================
79
- # Streamlit UI
80
- # =========================================================
81
- st.title("๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ ์ž๋™ ์ƒ์„ฑ๊ธฐ (.HWPX ํ† ํฐ ๋ฒ„์ „)")
82
-
83
- st.markdown("""
84
- HWPX ํ…œํ”Œ๋ฆฟ ์•ˆ์— `{{๋ฐ•์Šค๋ฒˆํ˜ธ1}}`, `{{์ข…๋ฃŒ์—ฐ๋„1}}`, `{{๋ณด์กด๊ธฐ๊ฐ„1}}`, `{{๋‹จ์œ„์—…๋ฌด1}}`,
85
- `{{๊ธฐ๋ก๋ฌผ์ฒ 1}}`, `{{๋ชฉ๋ก1}}` ๊ฐ™์€ ํ† ํฐ์„ ๋„ฃ์–ด์ฃผ์„ธ์š”.
86
-
87
- - ์—‘์…€/CSV ์—…๋กœ๋“œ โ†’ ์ปฌ๋Ÿผ๋ช… ๋งคํ•‘
88
- - ๋ผ๋ฒจ ์ƒ์„ฑ โ†’ ZIP์œผ๋กœ ๋‹ค์šด๋กœ๋“œ
89
- """)
90
-
91
- # ------------------------
92
- # ์—…๋กœ๋“œ ์˜์—ญ
93
- # ------------------------
94
  tpl_file = st.file_uploader("๐Ÿ“„ HWPX ํ…œํ”Œ๋ฆฟ ์—…๋กœ๋“œ", type=["hwpx"])
95
- excel_file = st.file_uploader("๐Ÿ“Š ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ (Excel/CSV)", type=["xlsx", "xls", "csv"])
 
96
 
97
- if tpl_file and excel_file:
98
- # ํ…œํ”Œ๋ฆฟ ์ฝ๊ธฐ
99
  tpl_bytes = tpl_file.read()
 
100
 
101
- # ๋ฐ์ดํ„ฐ ์ฝ๊ธฐ
102
- if excel_file.name.endswith(".csv"):
103
- df = pd.read_csv(excel_file)
104
- else:
105
- df = pd.read_excel(excel_file)
106
-
107
- st.subheader("๐Ÿ“‹ ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
108
- st.dataframe(df.head())
109
-
110
- box_col = "๋ฐ•์Šค๋ฒˆํ˜ธ"
111
- if box_col not in df.columns:
112
  st.error("โŒ ํ•„์ˆ˜ ์ปฌ๋Ÿผ '๋ฐ•์Šค๋ฒˆํ˜ธ'๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
113
- else:
114
- st.success("โœ… ์œ„์น˜ ๋งคํ•‘ ์™„๋ฃŒ (์—‘์…€ ์ธก)")
115
-
116
- # ๋ฐ•์Šค๋ฒˆํ˜ธ ๋ชฉ๋ก
117
- st.subheader("๐Ÿ”Ž ์—…๋กœ๋“œ๋œ ๋ฐ•์Šค๋ฒˆํ˜ธ ๋ชฉ๋ก")
118
- st.write(f"์ด {len(df[box_col].unique())}๊ฐœ")
119
- selected_boxes = st.multiselect("์ƒ์„ฑํ•  ๋ฐ•์Šค๋ฒˆํ˜ธ ์„ ํƒ (๋น„์šฐ๋ฉด ์ „์ฒด)",
120
- df[box_col].unique().tolist())
121
-
122
- # ๋ผ๋ฒจ ์ƒ์„ฑ ๋ฒ„ํŠผ
123
- if st.button("๐Ÿš€ ๋ผ๋ฒจ ์ƒ์„ฑ (ZIP)"):
124
- mem_zip = io.BytesIO()
125
- zout = zipfile.ZipFile(mem_zip, "w")
126
-
127
- for _, row in df.iterrows():
128
- box_no = str(row[box_col])
129
- if selected_boxes and box_no not in selected_boxes:
130
- continue
131
-
132
- mapping = {}
133
- for i, col in enumerate(df.columns, start=1):
134
- key = col
135
- value = row[col]
136
- mapping[key] = value
137
-
138
- hwpx_bytes, dbg = replace_tokens_in_hwpx(tpl_bytes, mapping, collect_debug=True)
139
-
140
- fn = f"label_{box_no}.hwpx"
141
- zout.writestr(fn, hwpx_bytes)
142
-
143
- zout.close()
144
- mem_zip.seek(0)
145
- st.download_button("โฌ‡๏ธ ZIP ๋‹ค์šด๋กœ๋“œ", data=mem_zip,
146
- file_name="labels.zip", mime="application/zip")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import io, zipfile, re, html, json
4
+
5
+ st.set_page_config(page_title="๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ ์ž๋™ ์ƒ์„ฑ๊ธฐ (ํ† ํฐยท๋ฐฐ์น˜)", layout="wide")
6
+ st.title("๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ ์ž๋™ ์ƒ์„ฑ๊ธฐ (.HWPX ํ† ํฐยท๋ฐฐ์น˜ ์ง€์›)")
7
+
8
+ with st.expander("์‚ฌ์šฉ ๋ฐฉ๋ฒ•", expanded=True):
9
+ st.markdown("""
10
+ 1) **HWPX ํ…œํ”Œ๋ฆฟ**: ๋ผ๋ฒจ ํ•œ ํŽ˜์ด์ง€์— `{{๋ฐ•์Šค๋ฒˆํ˜ธ1}} โ€ฆ {{๋ฐ•์Šค๋ฒˆํ˜ธN}}`, `{{์ข…๋ฃŒ์—ฐ๋„1}} โ€ฆ` ์ฒ˜๋Ÿผ **๋ฒˆํ˜ธ๊ฐ€ ๋ถ™์€ ํ† ํฐ**์„ ๋„ฃ์–ด ์ฃผ์„ธ์š”.
11
+ - ์‚ฌ์šฉ ํ† ํฐ ์˜ˆ: `{{๋ฐ•์Šค๋ฒˆํ˜ธi}}`, `{{์ข…๋ฃŒ์—ฐ๋„i}}`, `{{๋ณด์กด๊ธฐ๊ฐ„i}}`, `{{๋‹จ์œ„์—…๋ฌดi}}`, `{{๊ธฐ๋ก๋ฌผ์ฒ i}}`, `{{๋ชฉ๋กi}}` (i = 1..N)
12
+ 2) **์—‘์…€/CSV ์—…๋กœ๋“œ** โ†’ `๋ฐ•์Šค๋ฒˆํ˜ธ`๋Š” ํ•„์ˆ˜, ๋‚˜๋จธ์ง€๋Š” ์žˆ์œผ๋ฉด ์ž๋™ ๋ฐ˜์˜
13
+ 3) **ํ…œํ”Œ๋ฆฟ์˜ ๋ผ๋ฒจ ์„ธํŠธ ๊ฐœ์ˆ˜(N)** ๋ฅผ ์ง€์ •ํ•˜๋ฉด N๊ฐœ์”ฉ ๋ฌถ์–ด ํ•œ ํŽ˜์ด์ง€๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
14
+ 4) **ZIP ๋‹ค์šด๋กœ๋“œ**๋ฅผ ๋ฐ›์œผ๋ฉด `label_0001_0003.hwpx` ์ฒ˜๋Ÿผ ํŽ˜์ด์ง€๋ณ„ ํŒŒ์ผ์ด ๋“ค์–ด ์žˆ์Šต๋‹ˆ๋‹ค.
15
+ """)
16
+
17
+ # =========================
18
+ # ๋ฐ์ดํ„ฐ ์ „์ฒ˜๋ฆฌ
19
+ # =========================
20
+ def compute_year_range(series: pd.Series) -> str:
21
+ s = series.astype(str).fillna("")
22
+ valid = s[~s.isin(["", "0", "0000"])]
23
+ if len(valid) == 0:
24
+ return "0000-0000"
25
+ valid_int = pd.to_numeric(valid, errors="coerce").dropna().astype(int)
26
+ if len(valid_int) == 0:
27
+ return "0000-0000"
28
+ return f"{valid_int.min():04d}-{valid_int.max():04d}"
29
+
30
+ def build_merged_df(df: pd.DataFrame) -> pd.DataFrame:
31
+ df = df.copy()
32
+ # ํ‘œ์ค€ํ™”
33
+ df["๋ฐ•์Šค๋ฒˆํ˜ธ"] = df["๋ฐ•์Šค๋ฒˆํ˜ธ"].astype(str).str.zfill(4)
34
+ if "์ œ๋ชฉ" in df.columns:
35
+ df["์ œ๋ชฉ"] = df["์ œ๋ชฉ"].astype(str)
36
+
37
+ # ์ƒ์‚ฐ์—ฐ๋„(๋ฒ”์œ„) = ์ข…๋ฃŒ์—ฐ๋„ ๊ทธ๋ฃน ๋ฒ”์œ„
38
+ if "์ข…๋ฃŒ์—ฐ๋„" in df.columns:
39
+ prod_df = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ")["์ข…๋ฃŒ์—ฐ๋„"].apply(compute_year_range).reset_index()
40
+ prod_df.columns = ["๋ฐ•์Šค๋ฒˆํ˜ธ", "์ƒ์‚ฐ์—ฐ๋„"]
41
+ else:
42
+ prod_df = pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": df["๋ฐ•์Šค๋ฒˆํ˜ธ"].unique(), "์ƒ์‚ฐ์—ฐ๋„": "0000-0000"})
43
+
44
+ # ๋ชฉ๋ก(๊ด€๋ฆฌ๋ฒˆํ˜ธ + ์ œ๋ชฉ)
45
+ has_mgmt = "๊ด€๋ฆฌ๋ฒˆํ˜ธ" in df.columns
46
+ list_rows = []
47
+ for box, g in df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ"):
48
+ if has_mgmt:
49
+ lines = [f"- {r['๊ด€๋ฆฌ๋ฒˆํ˜ธ']} {r['์ œ๋ชฉ']}" for _, r in g.iterrows()]
50
+ else:
51
+ lines = [f"- {r['์ œ๋ชฉ']}" for _, r in g.iterrows()]
52
+ list_rows.append({"๋ฐ•์Šค๋ฒˆํ˜ธ": box, "๋ชฉ๋ก": "\r\n".join(lines)})
53
+ list_df = pd.DataFrame(list_rows)
54
+
55
+ # ๋Œ€ํ‘œ ๋ฉ”ํƒ€
56
+ meta_cols = ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","์ œ๋ชฉ"]
57
+ meta_exist = [c for c in meta_cols if c in df.columns]
58
+ meta_df = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ", as_index=False).first()[meta_exist] if meta_exist else pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": df["๋ฐ•์Šค๋ฒˆํ˜ธ"].unique()})
59
+
60
+ return meta_df.merge(list_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left").merge(prod_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left")
61
+
62
+ # =========================
63
+ # HWPX ํ† ํฐ ์น˜ํ™˜ (๋ฐฐ์น˜)
64
+ # =========================
65
+ def _build_runs_for_list(text: str) -> str:
66
+ """ ๋ชฉ๋ก ์ค„๋ฐ”๊ฟˆ์„ <hp:lineBreak/>๋กœ ๋ฐ”๊พผ ๋ฌธ์ž์—ด(ํ† ํฐ ์ž๋ฆฌ์— ๋“ค์–ด๊ฐˆ ํ…์ŠคํŠธ) """
67
+ if text is None: return ""
68
+ text = str(text)
69
+ lines = text.replace("\r\n", "\n").split("\n")
70
+ parts = []
71
+ for i, ln in enumerate(lines):
72
+ if i > 0:
73
+ parts.append("<hp:lineBreak/>")
74
+ parts.append(html.escape(ln))
75
+ return "".join(parts)
76
+
77
+ def replace_tokens_in_hwpx_batch(hwpx_bytes: bytes, mapping: dict, collect_debug: bool=False):
78
  """
79
+ mapping ์˜ˆ:
80
+ {'๋ฐ•์Šค๋ฒˆํ˜ธ1': '0001', '์ข…๋ฃŒ์—ฐ๋„1': '1999-2002', '๋ชฉ๋ก1': '<hp:run..>',
81
+ '๋ฐ•์Šค๋ฒˆํ˜ธ2': '0002', ...}
82
+ - {{ํ† ํฐ}} ๋ฌธ์ž์—ด์„ ์ง์ ‘ ์น˜ํ™˜ (ํ† ํฐ์€ ํ•œ run ์•ˆ์— ์žˆ์–ด์•ผ ์•ˆ์ „)
83
+ - mimetype์€ ๋ฌด์••์ถ• + ์ฒซ ์—”ํŠธ๋ฆฌ
84
  """
85
+ dbg = {"token_hits": {}, "files_touched": []} if collect_debug else None
86
 
87
  zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
88
  mem_out = io.BytesIO()
89
  zout = zipfile.ZipFile(mem_out, "w")
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  names = zin.namelist()
92
  if "mimetype" in names:
93
  data = zin.read("mimetype")
 
95
  zi.compress_type = zipfile.ZIP_STORED
96
  zout.writestr(zi, data)
97
 
98
+ token_keys = list(mapping.keys())
99
+
100
+ def repl_xml(xml_text: str) -> str:
101
+ changed = False
102
+ # ๋น ๋ฅธ ๊ฒฝ๋กœ: ํฌํ•จ๋œ ํ‚ค๋งŒ ์ˆœํšŒ (๊ฐ„๋‹จ/์•ˆ์ „)
103
+ for k in token_keys:
104
+ tok = f"{{{{{k}}}}}"
105
+ if tok in xml_text:
106
+ v = mapping.get(k, "")
107
+ # ๋ชฉ๋ก ๊ณ„์—ด ์ค„๋ฐ”๊ฟˆ ์ฒ˜๋ฆฌ
108
+ if re.match(r"^(๋ชฉ๋ก|list)\d+$", k):
109
+ v = _build_runs_for_list(v)
110
+ else:
111
+ v = html.escape("" if v is None else str(v))
112
+ xml_text = xml_text.replace(tok, v)
113
+ changed = True
114
+ if collect_debug:
115
+ dbg["token_hits"][k] = dbg["token_hits"].get(k, 0) + 1
116
+ return xml_text, changed
117
+
118
  for e in zin.infolist():
119
  if e.filename == "mimetype":
120
  continue
 
122
  if e.filename.startswith("Contents/") and e.filename.endswith(".xml"):
123
  try:
124
  s = data.decode("utf-8", errors="ignore")
125
+ s2, changed = repl_xml(s)
126
+ if collect_debug and changed:
127
+ dbg["files_touched"].append(e.filename)
128
  data = s2.encode("utf-8")
129
  except Exception:
130
  pass
 
132
  zi.compress_type = zipfile.ZIP_DEFLATED
133
  zout.writestr(zi, data)
134
 
135
+ zin.close(); zout.close(); mem_out.seek(0)
136
+ return (mem_out.getvalue(), dbg) if collect_debug else (mem_out.getvalue(), None)
 
 
 
137
 
138
+ # =========================
139
+ # UI
140
+ # =========================
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  tpl_file = st.file_uploader("๐Ÿ“„ HWPX ํ…œํ”Œ๋ฆฟ ์—…๋กœ๋“œ", type=["hwpx"])
142
+ batch_size = st.number_input("ํ…œํ”Œ๋ฆฟ์˜ ๋ผ๋ฒจ ์„ธํŠธ ๊ฐœ์ˆ˜ (ํ•œ ํŽ˜์ด์ง€์— ๋ช‡ ๊ฐœ?)", min_value=1, max_value=12, value=3, step=1)
143
+ data_file = st.file_uploader("๐Ÿ“Š ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ (Excel/CSV)", type=["xlsx","xls","csv"])
144
 
145
+ if tpl_file and data_file:
 
146
  tpl_bytes = tpl_file.read()
147
+ df = pd.read_csv(data_file) if data_file.name.lower().endswith(".csv") else pd.read_excel(data_file)
148
 
149
+ if "๋ฐ•์Šค๋ฒˆํ˜ธ" not in df.columns:
 
 
 
 
 
 
 
 
 
 
150
  st.error("โŒ ํ•„์ˆ˜ ์ปฌ๋Ÿผ '๋ฐ•์Šค๋ฒˆํ˜ธ'๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
151
+ st.stop()
152
+
153
+ st.success("โœ… ์œ„์น˜ ๋งคํ•‘ ์™„๋ฃŒ (์—‘์…€ ์ธก)")
154
+ st.dataframe(df.head(10), use_container_width=True)
155
+
156
+ merged = build_merged_df(df)
157
+ box_list = merged["๋ฐ•์Šค๋ฒˆํ˜ธ"].astype(str).str.zfill(4).unique().tolist()
158
+
159
+ st.subheader("๐Ÿ”Ž ์—…๋กœ๋“œ๋œ ๋ฐ•์Šค๋ฒˆํ˜ธ ๋ชฉ๋ก")
160
+ st.write(f"์ด **{len(box_list)}**๊ฐœ")
161
+ st.dataframe(pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": box_list}), use_container_width=True, height=240)
162
+
163
+ selected = st.multiselect("์ƒ์„ฑํ•  ๋ฐ•์Šค๋ฒˆํ˜ธ ์„ ํƒ (๋น„์šฐ๋ฉด ์ „์ฒด ์ƒ์„ฑ)", options=box_list)
164
+
165
+ work_df = merged[merged["๋ฐ•์Šค๋ฒˆํ˜ธ"].isin(selected)] if selected else merged
166
+ rows = work_df.sort_values("๋ฐ•์Šค๋ฒˆํ˜ธ").to_dict(orient="records")
167
+
168
+ # 1ํŽ˜์ด์ง€ ๋ฏธ๋ฆฌ๋ณด๊ธฐ์šฉ ๋งคํ•‘ ํ‘œ์‹œ
169
+ st.subheader("๐Ÿงช 1ํŽ˜์ด์ง€ ํ† ํฐ ๋งคํ•‘ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
170
+ first_page = rows[:int(batch_size)]
171
+ keys = ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๋ชฉ๋ก"]
172
+ mapping_preview = {}
173
+ for i in range(int(batch_size)):
174
+ if i < len(first_page):
175
+ r = first_page[i]
176
+ for k in keys:
177
+ if k == "์ข…๋ฃŒ์—ฐ๋„":
178
+ mapping_preview[f"{k}{i+1}"] = r.get("์ƒ์‚ฐ์—ฐ๋„","")
179
+ else:
180
+ mapping_preview[f"{k}{i+1}"] = r.get(k,"")
181
+ else:
182
+ for k in keys:
183
+ mapping_preview[f"{k}{i+1}"] = ""
184
+
185
+ st.dataframe(
186
+ pd.DataFrame(
187
+ [{"ํ† ํฐ": k, "๊ฐ’(์•ž๋ถ€๋ถ„)": (str(v)[:120] if v is not None else ""), "๊ธธ์ด": (len(str(v)) if v is not None else 0)}
188
+ for k, v in sorted(mapping_preview.items())]
189
+ ),
190
+ use_container_width=True, height=320
191
+ )
192
+
193
+ if st.button("๐Ÿš€ ๋ผ๋ฒจ ์ƒ์„ฑ (ํŽ˜์ด์ง€๋ณ„ HWPX ZIP)"):
194
+ mem_zip = io.BytesIO()
195
+ zout = zipfile.ZipFile(mem_zip, "w", zipfile.ZIP_DEFLATED)
196
+
197
+ n = int(batch_size)
198
+ total = len(rows)
199
+ pages = (total + n - 1) // n
200
+
201
+ all_debug = []
202
+
203
+ for p in range(pages):
204
+ start = p * n
205
+ chunk = rows[start:start+n]
206
+ mapping = {}
207
+ for i in range(n):
208
+ if i < len(chunk):
209
+ r = chunk[i]
210
+ for k in keys:
211
+ if k == "์ข…๋ฃŒ์—ฐ๋„":
212
+ mapping[f"{k}{i+1}"] = r.get("์ƒ์‚ฐ์—ฐ๋„","")
213
+ else:
214
+ mapping[f"{k}{i+1}"] = r.get(k,"")
215
+ else:
216
+ for k in keys:
217
+ mapping[f"{k}{i+1}"] = ""
218
+
219
+ out_hwpx, dbg = replace_tokens_in_hwpx_batch(tpl_bytes, mapping, collect_debug=True)
220
+ all_debug.append({"page": p+1, "mapping_keys": sorted(list(mapping.keys())), "stats": dbg})
221
+
222
+ page_boxes = [r.get("๋ฐ•์Šค๋ฒˆํ˜ธ","") for r in chunk]
223
+ safe = "_".join(page_boxes) if page_boxes else f"empty_{p+1}"
224
+ zout.writestr(f"label_{safe}.hwpx", out_hwpx)
225
+
226
+ zout.close(); mem_zip.seek(0)
227
+ st.download_button("โฌ‡๏ธ ZIP ๋‹ค์šด๋กœ๋“œ", data=mem_zip, file_name="labels_by_page.zip", mime="application/zip")
228
+ st.download_button("โฌ‡๏ธ ๋””๋ฒ„๊ทธ ๋ฆฌํฌํŠธ(JSON)", data=json.dumps(all_debug, ensure_ascii=False, indent=2),
229
+ file_name="debug_by_page.json", mime="application/json")
230
+
231
+ st.caption("โ€ป ํ…œํ”Œ๋ฆฟ์˜ ํ† ํฐ์€ **๋ฐ˜๋“œ์‹œ run ํ•˜๋‚˜์— ์˜จ์ „ํ•œ ๋ฌธ์ž์—ด**๋กœ ๋„ฃ์–ด์ฃผ์„ธ์š”(์˜ˆ: `{{๋ฐ•์Šค๋ฒˆํ˜ธ1}}`). ํ† ํฐ์ด ๊ธ€์ž ๋‹จ์œ„๋กœ ์ชผ๊ฐœ์ ธ ์—ฌ๋Ÿฌ run์— ๋‚˜๋‰˜๋ฉด ์น˜ํ™˜์ด ๋˜์ง€ ์•Š์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")