Update app.py
Browse files
app.py
CHANGED
|
@@ -1,50 +1,93 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
-
import io, zipfile, re, html
|
| 4 |
-
|
| 5 |
-
st.set_page_config(page_title="๐ฆ ๋ฐ์ค๋ผ๋ฒจ ์๋ ์์ฑ๊ธฐ", layout="wide")
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
"""
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
| 15 |
"""
|
| 16 |
-
|
| 17 |
|
| 18 |
zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
|
| 19 |
mem_out = io.BytesIO()
|
| 20 |
zout = zipfile.ZipFile(mem_out, "w")
|
| 21 |
|
| 22 |
-
def build_runs(value: str, is_list: bool) -> str:
|
| 23 |
-
if value is None:
|
| 24 |
-
return ""
|
| 25 |
-
text = str(value)
|
| 26 |
-
if not is_list:
|
| 27 |
-
return html.escape(text)
|
| 28 |
-
# ์ค๋ฐ๊ฟ์ ํ๊ธ์์ <hp:lineBreak/> ํ์
|
| 29 |
-
lines = text.replace("\r\n", "\n").split("\n")
|
| 30 |
-
parts = []
|
| 31 |
-
for i, ln in enumerate(lines):
|
| 32 |
-
if i > 0:
|
| 33 |
-
parts.append("<hp:lineBreak/>")
|
| 34 |
-
parts.append(html.escape(ln))
|
| 35 |
-
return "".join(parts)
|
| 36 |
-
|
| 37 |
-
def repl_xml(xml_text: str, kv: dict) -> str:
|
| 38 |
-
for k, v in kv.items():
|
| 39 |
-
is_list = bool(re.match(r"^(๋ชฉ๋ก|list)\d*$", k, flags=re.IGNORECASE))
|
| 40 |
-
token = f"{{{{{k}}}}}" # e.g. {{๋ฐ์ค๋ฒํธ1}}
|
| 41 |
-
if token in xml_text:
|
| 42 |
-
xml_text = xml_text.replace(token, build_runs(v, is_list))
|
| 43 |
-
if collect_debug:
|
| 44 |
-
debug_info["token_hits"][k] = debug_info["token_hits"].get(k, 0) + 1
|
| 45 |
-
return xml_text
|
| 46 |
-
|
| 47 |
-
# mimetype์ ๋ฌด์์ถ + ์ ์ผ ๋จผ์ ๊ธฐ๋ก
|
| 48 |
names = zin.namelist()
|
| 49 |
if "mimetype" in names:
|
| 50 |
data = zin.read("mimetype")
|
|
@@ -52,6 +95,26 @@ def replace_tokens_in_hwpx(hwpx_bytes: bytes, mapping: dict, collect_debug: bool
|
|
| 52 |
zi.compress_type = zipfile.ZIP_STORED
|
| 53 |
zout.writestr(zi, data)
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
for e in zin.infolist():
|
| 56 |
if e.filename == "mimetype":
|
| 57 |
continue
|
|
@@ -59,9 +122,9 @@ def replace_tokens_in_hwpx(hwpx_bytes: bytes, mapping: dict, collect_debug: bool
|
|
| 59 |
if e.filename.startswith("Contents/") and e.filename.endswith(".xml"):
|
| 60 |
try:
|
| 61 |
s = data.decode("utf-8", errors="ignore")
|
| 62 |
-
s2 = repl_xml(s
|
| 63 |
-
if collect_debug and
|
| 64 |
-
|
| 65 |
data = s2.encode("utf-8")
|
| 66 |
except Exception:
|
| 67 |
pass
|
|
@@ -69,78 +132,100 @@ def replace_tokens_in_hwpx(hwpx_bytes: bytes, mapping: dict, collect_debug: bool
|
|
| 69 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 70 |
zout.writestr(zi, data)
|
| 71 |
|
| 72 |
-
zin.close()
|
| 73 |
-
|
| 74 |
-
mem_out.seek(0)
|
| 75 |
-
return (mem_out.getvalue(), debug_info) if collect_debug else mem_out.getvalue()
|
| 76 |
-
|
| 77 |
|
| 78 |
-
# =========================
|
| 79 |
-
#
|
| 80 |
-
# =========================
|
| 81 |
-
st.title("๐ฆ ๋ฐ์ค๋ผ๋ฒจ ์๋ ์์ฑ๊ธฐ (.HWPX ํ ํฐ ๋ฒ์ )")
|
| 82 |
-
|
| 83 |
-
st.markdown("""
|
| 84 |
-
HWPX ํ
ํ๋ฆฟ ์์ `{{๋ฐ์ค๋ฒํธ1}}`, `{{์ข
๋ฃ์ฐ๋1}}`, `{{๋ณด์กด๊ธฐ๊ฐ1}}`, `{{๋จ์์
๋ฌด1}}`,
|
| 85 |
-
`{{๊ธฐ๋ก๋ฌผ์ฒ 1}}`, `{{๋ชฉ๋ก1}}` ๊ฐ์ ํ ํฐ์ ๋ฃ์ด์ฃผ์ธ์.
|
| 86 |
-
|
| 87 |
-
- ์์
/CSV ์
๋ก๋ โ ์ปฌ๋ผ๋ช
๋งคํ
|
| 88 |
-
- ๋ผ๋ฒจ ์์ฑ โ ZIP์ผ๋ก ๋ค์ด๋ก๋
|
| 89 |
-
""")
|
| 90 |
-
|
| 91 |
-
# ------------------------
|
| 92 |
-
# ์
๋ก๋ ์์ญ
|
| 93 |
-
# ------------------------
|
| 94 |
tpl_file = st.file_uploader("๐ HWPX ํ
ํ๋ฆฟ ์
๋ก๋", type=["hwpx"])
|
| 95 |
-
|
|
|
|
| 96 |
|
| 97 |
-
if tpl_file and
|
| 98 |
-
# ํ
ํ๋ฆฟ ์ฝ๊ธฐ
|
| 99 |
tpl_bytes = tpl_file.read()
|
|
|
|
| 100 |
|
| 101 |
-
|
| 102 |
-
if excel_file.name.endswith(".csv"):
|
| 103 |
-
df = pd.read_csv(excel_file)
|
| 104 |
-
else:
|
| 105 |
-
df = pd.read_excel(excel_file)
|
| 106 |
-
|
| 107 |
-
st.subheader("๐ ๋ฐ์ดํฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
|
| 108 |
-
st.dataframe(df.head())
|
| 109 |
-
|
| 110 |
-
box_col = "๋ฐ์ค๋ฒํธ"
|
| 111 |
-
if box_col not in df.columns:
|
| 112 |
st.error("โ ํ์ ์ปฌ๋ผ '๋ฐ์ค๋ฒํธ'๊ฐ ์์ต๋๋ค.")
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
+
import io, zipfile, re, html, json
|
| 4 |
+
|
| 5 |
+
st.set_page_config(page_title="๐ฆ ๋ฐ์ค๋ผ๋ฒจ ์๋ ์์ฑ๊ธฐ (ํ ํฐยท๋ฐฐ์น)", layout="wide")
|
| 6 |
+
st.title("๐ฆ ๋ฐ์ค๋ผ๋ฒจ ์๋ ์์ฑ๊ธฐ (.HWPX ํ ํฐยท๋ฐฐ์น ์ง์)")
|
| 7 |
+
|
| 8 |
+
with st.expander("์ฌ์ฉ ๋ฐฉ๋ฒ", expanded=True):
|
| 9 |
+
st.markdown("""
|
| 10 |
+
1) **HWPX ํ
ํ๋ฆฟ**: ๋ผ๋ฒจ ํ ํ์ด์ง์ `{{๋ฐ์ค๋ฒํธ1}} โฆ {{๋ฐ์ค๋ฒํธN}}`, `{{์ข
๋ฃ์ฐ๋1}} โฆ` ์ฒ๋ผ **๋ฒํธ๊ฐ ๋ถ์ ํ ํฐ**์ ๋ฃ์ด ์ฃผ์ธ์.
|
| 11 |
+
- ์ฌ์ฉ ํ ํฐ ์: `{{๋ฐ์ค๋ฒํธi}}`, `{{์ข
๋ฃ์ฐ๋i}}`, `{{๋ณด์กด๊ธฐ๊ฐi}}`, `{{๋จ์์
๋ฌดi}}`, `{{๊ธฐ๋ก๋ฌผ์ฒ i}}`, `{{๋ชฉ๋กi}}` (i = 1..N)
|
| 12 |
+
2) **์์
/CSV ์
๋ก๋** โ `๋ฐ์ค๋ฒํธ`๋ ํ์, ๋๋จธ์ง๋ ์์ผ๋ฉด ์๋ ๋ฐ์
|
| 13 |
+
3) **ํ
ํ๋ฆฟ์ ๋ผ๋ฒจ ์ธํธ ๊ฐ์(N)** ๋ฅผ ์ง์ ํ๋ฉด N๊ฐ์ฉ ๋ฌถ์ด ํ ํ์ด์ง๋ฅผ ์์ฑํฉ๋๋ค.
|
| 14 |
+
4) **ZIP ๋ค์ด๋ก๋**๋ฅผ ๋ฐ์ผ๋ฉด `label_0001_0003.hwpx` ์ฒ๋ผ ํ์ด์ง๋ณ ํ์ผ์ด ๋ค์ด ์์ต๋๋ค.
|
| 15 |
+
""")
|
| 16 |
+
|
| 17 |
+
# =========================
|
| 18 |
+
# ๋ฐ์ดํฐ ์ ์ฒ๋ฆฌ
|
| 19 |
+
# =========================
|
| 20 |
+
def compute_year_range(series: pd.Series) -> str:
|
| 21 |
+
s = series.astype(str).fillna("")
|
| 22 |
+
valid = s[~s.isin(["", "0", "0000"])]
|
| 23 |
+
if len(valid) == 0:
|
| 24 |
+
return "0000-0000"
|
| 25 |
+
valid_int = pd.to_numeric(valid, errors="coerce").dropna().astype(int)
|
| 26 |
+
if len(valid_int) == 0:
|
| 27 |
+
return "0000-0000"
|
| 28 |
+
return f"{valid_int.min():04d}-{valid_int.max():04d}"
|
| 29 |
+
|
| 30 |
+
def build_merged_df(df: pd.DataFrame) -> pd.DataFrame:
|
| 31 |
+
df = df.copy()
|
| 32 |
+
# ํ์คํ
|
| 33 |
+
df["๋ฐ์ค๋ฒํธ"] = df["๋ฐ์ค๋ฒํธ"].astype(str).str.zfill(4)
|
| 34 |
+
if "์ ๋ชฉ" in df.columns:
|
| 35 |
+
df["์ ๋ชฉ"] = df["์ ๋ชฉ"].astype(str)
|
| 36 |
+
|
| 37 |
+
# ์์ฐ์ฐ๋(๋ฒ์) = ์ข
๋ฃ์ฐ๋ ๊ทธ๋ฃน ๋ฒ์
|
| 38 |
+
if "์ข
๋ฃ์ฐ๋" in df.columns:
|
| 39 |
+
prod_df = df.groupby("๋ฐ์ค๋ฒํธ")["์ข
๋ฃ์ฐ๋"].apply(compute_year_range).reset_index()
|
| 40 |
+
prod_df.columns = ["๋ฐ์ค๋ฒํธ", "์์ฐ์ฐ๋"]
|
| 41 |
+
else:
|
| 42 |
+
prod_df = pd.DataFrame({"๋ฐ์ค๋ฒํธ": df["๋ฐ์ค๋ฒํธ"].unique(), "์์ฐ์ฐ๋": "0000-0000"})
|
| 43 |
+
|
| 44 |
+
# ๋ชฉ๋ก(๊ด๋ฆฌ๋ฒํธ + ์ ๋ชฉ)
|
| 45 |
+
has_mgmt = "๊ด๋ฆฌ๋ฒํธ" in df.columns
|
| 46 |
+
list_rows = []
|
| 47 |
+
for box, g in df.groupby("๋ฐ์ค๋ฒํธ"):
|
| 48 |
+
if has_mgmt:
|
| 49 |
+
lines = [f"- {r['๊ด๋ฆฌ๋ฒํธ']} {r['์ ๋ชฉ']}" for _, r in g.iterrows()]
|
| 50 |
+
else:
|
| 51 |
+
lines = [f"- {r['์ ๋ชฉ']}" for _, r in g.iterrows()]
|
| 52 |
+
list_rows.append({"๋ฐ์ค๋ฒํธ": box, "๋ชฉ๋ก": "\r\n".join(lines)})
|
| 53 |
+
list_df = pd.DataFrame(list_rows)
|
| 54 |
+
|
| 55 |
+
# ๋ํ ๋ฉํ
|
| 56 |
+
meta_cols = ["๋ฐ์ค๋ฒํธ","์ข
๋ฃ์ฐ๋","๋ณด์กด๊ธฐ๊ฐ","๋จ์์
๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","์ ๋ชฉ"]
|
| 57 |
+
meta_exist = [c for c in meta_cols if c in df.columns]
|
| 58 |
+
meta_df = df.groupby("๋ฐ์ค๋ฒํธ", as_index=False).first()[meta_exist] if meta_exist else pd.DataFrame({"๋ฐ์ค๋ฒํธ": df["๋ฐ์ค๋ฒํธ"].unique()})
|
| 59 |
+
|
| 60 |
+
return meta_df.merge(list_df, on="๋ฐ์ค๋ฒํธ", how="left").merge(prod_df, on="๋ฐ์ค๋ฒํธ", how="left")
|
| 61 |
+
|
| 62 |
+
# =========================
|
| 63 |
+
# HWPX ํ ํฐ ์นํ (๋ฐฐ์น)
|
| 64 |
+
# =========================
|
| 65 |
+
def _build_runs_for_list(text: str) -> str:
|
| 66 |
+
""" ๋ชฉ๋ก ์ค๋ฐ๊ฟ์ <hp:lineBreak/>๋ก ๋ฐ๊พผ ๋ฌธ์์ด(ํ ํฐ ์๋ฆฌ์ ๋ค์ด๊ฐ ํ
์คํธ) """
|
| 67 |
+
if text is None: return ""
|
| 68 |
+
text = str(text)
|
| 69 |
+
lines = text.replace("\r\n", "\n").split("\n")
|
| 70 |
+
parts = []
|
| 71 |
+
for i, ln in enumerate(lines):
|
| 72 |
+
if i > 0:
|
| 73 |
+
parts.append("<hp:lineBreak/>")
|
| 74 |
+
parts.append(html.escape(ln))
|
| 75 |
+
return "".join(parts)
|
| 76 |
+
|
| 77 |
+
def replace_tokens_in_hwpx_batch(hwpx_bytes: bytes, mapping: dict, collect_debug: bool=False):
|
| 78 |
"""
|
| 79 |
+
mapping ์:
|
| 80 |
+
{'๋ฐ์ค๋ฒํธ1': '0001', '์ข
๋ฃ์ฐ๋1': '1999-2002', '๋ชฉ๋ก1': '<hp:run..>',
|
| 81 |
+
'๋ฐ์ค๋ฒํธ2': '0002', ...}
|
| 82 |
+
- {{ํ ํฐ}} ๋ฌธ์์ด์ ์ง์ ์นํ (ํ ํฐ์ ํ run ์์ ์์ด์ผ ์์ )
|
| 83 |
+
- mimetype์ ๋ฌด์์ถ + ์ฒซ ์ํธ๋ฆฌ
|
| 84 |
"""
|
| 85 |
+
dbg = {"token_hits": {}, "files_touched": []} if collect_debug else None
|
| 86 |
|
| 87 |
zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
|
| 88 |
mem_out = io.BytesIO()
|
| 89 |
zout = zipfile.ZipFile(mem_out, "w")
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
names = zin.namelist()
|
| 92 |
if "mimetype" in names:
|
| 93 |
data = zin.read("mimetype")
|
|
|
|
| 95 |
zi.compress_type = zipfile.ZIP_STORED
|
| 96 |
zout.writestr(zi, data)
|
| 97 |
|
| 98 |
+
token_keys = list(mapping.keys())
|
| 99 |
+
|
| 100 |
+
def repl_xml(xml_text: str) -> str:
|
| 101 |
+
changed = False
|
| 102 |
+
# ๋น ๋ฅธ ๊ฒฝ๋ก: ํฌํจ๋ ํค๋ง ์ํ (๊ฐ๋จ/์์ )
|
| 103 |
+
for k in token_keys:
|
| 104 |
+
tok = f"{{{{{k}}}}}"
|
| 105 |
+
if tok in xml_text:
|
| 106 |
+
v = mapping.get(k, "")
|
| 107 |
+
# ๋ชฉ๋ก ๊ณ์ด ์ค๋ฐ๊ฟ ์ฒ๋ฆฌ
|
| 108 |
+
if re.match(r"^(๋ชฉ๋ก|list)\d+$", k):
|
| 109 |
+
v = _build_runs_for_list(v)
|
| 110 |
+
else:
|
| 111 |
+
v = html.escape("" if v is None else str(v))
|
| 112 |
+
xml_text = xml_text.replace(tok, v)
|
| 113 |
+
changed = True
|
| 114 |
+
if collect_debug:
|
| 115 |
+
dbg["token_hits"][k] = dbg["token_hits"].get(k, 0) + 1
|
| 116 |
+
return xml_text, changed
|
| 117 |
+
|
| 118 |
for e in zin.infolist():
|
| 119 |
if e.filename == "mimetype":
|
| 120 |
continue
|
|
|
|
| 122 |
if e.filename.startswith("Contents/") and e.filename.endswith(".xml"):
|
| 123 |
try:
|
| 124 |
s = data.decode("utf-8", errors="ignore")
|
| 125 |
+
s2, changed = repl_xml(s)
|
| 126 |
+
if collect_debug and changed:
|
| 127 |
+
dbg["files_touched"].append(e.filename)
|
| 128 |
data = s2.encode("utf-8")
|
| 129 |
except Exception:
|
| 130 |
pass
|
|
|
|
| 132 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 133 |
zout.writestr(zi, data)
|
| 134 |
|
| 135 |
+
zin.close(); zout.close(); mem_out.seek(0)
|
| 136 |
+
return (mem_out.getvalue(), dbg) if collect_debug else (mem_out.getvalue(), None)
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
+
# =========================
|
| 139 |
+
# UI
|
| 140 |
+
# =========================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
tpl_file = st.file_uploader("๐ HWPX ํ
ํ๋ฆฟ ์
๋ก๋", type=["hwpx"])
|
| 142 |
+
batch_size = st.number_input("ํ
ํ๋ฆฟ์ ๋ผ๋ฒจ ์ธํธ ๊ฐ์ (ํ ํ์ด์ง์ ๋ช ๊ฐ?)", min_value=1, max_value=12, value=3, step=1)
|
| 143 |
+
data_file = st.file_uploader("๐ ๋ฐ์ดํฐ ์
๋ก๋ (Excel/CSV)", type=["xlsx","xls","csv"])
|
| 144 |
|
| 145 |
+
if tpl_file and data_file:
|
|
|
|
| 146 |
tpl_bytes = tpl_file.read()
|
| 147 |
+
df = pd.read_csv(data_file) if data_file.name.lower().endswith(".csv") else pd.read_excel(data_file)
|
| 148 |
|
| 149 |
+
if "๋ฐ์ค๋ฒํธ" not in df.columns:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
st.error("โ ํ์ ์ปฌ๋ผ '๋ฐ์ค๋ฒํธ'๊ฐ ์์ต๋๋ค.")
|
| 151 |
+
st.stop()
|
| 152 |
+
|
| 153 |
+
st.success("โ
์์น ๋งคํ ์๋ฃ (์์
์ธก)")
|
| 154 |
+
st.dataframe(df.head(10), use_container_width=True)
|
| 155 |
+
|
| 156 |
+
merged = build_merged_df(df)
|
| 157 |
+
box_list = merged["๋ฐ์ค๋ฒํธ"].astype(str).str.zfill(4).unique().tolist()
|
| 158 |
+
|
| 159 |
+
st.subheader("๐ ์
๋ก๋๋ ๋ฐ์ค๋ฒํธ ๋ชฉ๋ก")
|
| 160 |
+
st.write(f"์ด **{len(box_list)}**๊ฐ")
|
| 161 |
+
st.dataframe(pd.DataFrame({"๋ฐ์ค๋ฒํธ": box_list}), use_container_width=True, height=240)
|
| 162 |
+
|
| 163 |
+
selected = st.multiselect("์์ฑํ ๋ฐ์ค๋ฒํธ ์ ํ (๋น์ฐ๋ฉด ์ ์ฒด ์์ฑ)", options=box_list)
|
| 164 |
+
|
| 165 |
+
work_df = merged[merged["๋ฐ์ค๋ฒํธ"].isin(selected)] if selected else merged
|
| 166 |
+
rows = work_df.sort_values("๋ฐ์ค๋ฒํธ").to_dict(orient="records")
|
| 167 |
+
|
| 168 |
+
# 1ํ์ด์ง ๋ฏธ๋ฆฌ๋ณด๊ธฐ์ฉ ๋งคํ ํ์
|
| 169 |
+
st.subheader("๐งช 1ํ์ด์ง ํ ํฐ ๋งคํ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
|
| 170 |
+
first_page = rows[:int(batch_size)]
|
| 171 |
+
keys = ["๋ฐ์ค๋ฒํธ","์ข
๋ฃ์ฐ๋","๋ณด์กด๊ธฐ๊ฐ","๋จ์์
๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๋ชฉ๋ก"]
|
| 172 |
+
mapping_preview = {}
|
| 173 |
+
for i in range(int(batch_size)):
|
| 174 |
+
if i < len(first_page):
|
| 175 |
+
r = first_page[i]
|
| 176 |
+
for k in keys:
|
| 177 |
+
if k == "์ข
๋ฃ์ฐ๋":
|
| 178 |
+
mapping_preview[f"{k}{i+1}"] = r.get("์์ฐ์ฐ๋","")
|
| 179 |
+
else:
|
| 180 |
+
mapping_preview[f"{k}{i+1}"] = r.get(k,"")
|
| 181 |
+
else:
|
| 182 |
+
for k in keys:
|
| 183 |
+
mapping_preview[f"{k}{i+1}"] = ""
|
| 184 |
+
|
| 185 |
+
st.dataframe(
|
| 186 |
+
pd.DataFrame(
|
| 187 |
+
[{"ํ ํฐ": k, "๊ฐ(์๋ถ๋ถ)": (str(v)[:120] if v is not None else ""), "๊ธธ์ด": (len(str(v)) if v is not None else 0)}
|
| 188 |
+
for k, v in sorted(mapping_preview.items())]
|
| 189 |
+
),
|
| 190 |
+
use_container_width=True, height=320
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
if st.button("๐ ๋ผ๋ฒจ ์์ฑ (ํ์ด์ง๋ณ HWPX ZIP)"):
|
| 194 |
+
mem_zip = io.BytesIO()
|
| 195 |
+
zout = zipfile.ZipFile(mem_zip, "w", zipfile.ZIP_DEFLATED)
|
| 196 |
+
|
| 197 |
+
n = int(batch_size)
|
| 198 |
+
total = len(rows)
|
| 199 |
+
pages = (total + n - 1) // n
|
| 200 |
+
|
| 201 |
+
all_debug = []
|
| 202 |
+
|
| 203 |
+
for p in range(pages):
|
| 204 |
+
start = p * n
|
| 205 |
+
chunk = rows[start:start+n]
|
| 206 |
+
mapping = {}
|
| 207 |
+
for i in range(n):
|
| 208 |
+
if i < len(chunk):
|
| 209 |
+
r = chunk[i]
|
| 210 |
+
for k in keys:
|
| 211 |
+
if k == "์ข
๋ฃ์ฐ๋":
|
| 212 |
+
mapping[f"{k}{i+1}"] = r.get("์์ฐ์ฐ๋","")
|
| 213 |
+
else:
|
| 214 |
+
mapping[f"{k}{i+1}"] = r.get(k,"")
|
| 215 |
+
else:
|
| 216 |
+
for k in keys:
|
| 217 |
+
mapping[f"{k}{i+1}"] = ""
|
| 218 |
+
|
| 219 |
+
out_hwpx, dbg = replace_tokens_in_hwpx_batch(tpl_bytes, mapping, collect_debug=True)
|
| 220 |
+
all_debug.append({"page": p+1, "mapping_keys": sorted(list(mapping.keys())), "stats": dbg})
|
| 221 |
+
|
| 222 |
+
page_boxes = [r.get("๋ฐ์ค๋ฒํธ","") for r in chunk]
|
| 223 |
+
safe = "_".join(page_boxes) if page_boxes else f"empty_{p+1}"
|
| 224 |
+
zout.writestr(f"label_{safe}.hwpx", out_hwpx)
|
| 225 |
+
|
| 226 |
+
zout.close(); mem_zip.seek(0)
|
| 227 |
+
st.download_button("โฌ๏ธ ZIP ๋ค์ด๋ก๋", data=mem_zip, file_name="labels_by_page.zip", mime="application/zip")
|
| 228 |
+
st.download_button("โฌ๏ธ ๋๋ฒ๊ทธ ๋ฆฌํฌํธ(JSON)", data=json.dumps(all_debug, ensure_ascii=False, indent=2),
|
| 229 |
+
file_name="debug_by_page.json", mime="application/json")
|
| 230 |
+
|
| 231 |
+
st.caption("โป ํ
ํ๋ฆฟ์ ํ ํฐ์ **๋ฐ๋์ run ํ๋์ ์จ์ ํ ๋ฌธ์์ด**๋ก ๋ฃ์ด์ฃผ์ธ์(์: `{{๋ฐ์ค๋ฒํธ1}}`). ํ ํฐ์ด ๊ธ์ ๋จ์๋ก ์ชผ๊ฐ์ ธ ์ฌ๋ฌ run์ ๋๋๋ฉด ์นํ์ด ๋์ง ์์ ์ ์์ต๋๋ค.")
|