Update app.py
Browse files
app.py
CHANGED
|
@@ -1,39 +1,27 @@
|
|
| 1 |
-
# app.py โ ์ ์ฝ๋ ๊ธฐ๋ฐ(์์ ์นํ) + lxml ๊ธฐ๋ฐ "๋จ์ผ HWPX ๋ณํฉ" ์ถ๋ ฅ
|
| 2 |
-
import io
|
| 3 |
-
import json
|
| 4 |
-
import html
|
| 5 |
-
import re
|
| 6 |
-
import zipfile
|
| 7 |
-
from typing import Dict, Tuple, List, Optional
|
| 8 |
-
|
| 9 |
-
import pandas as pd
|
| 10 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
-
|
| 13 |
-
st.
|
| 14 |
-
st.title("๐ฆ ๋ณด์กด๋ฐ์ค ๋ผ๋ฒจ ์์ฑ๊ธฐ ๐ฆ")
|
| 15 |
-
|
| 16 |
|
| 17 |
-
#
|
| 18 |
def _year_range(series: pd.Series) -> str:
|
| 19 |
s = series.astype(str).fillna("")
|
| 20 |
v = s[~s.isin(["", "0", "0000"])]
|
| 21 |
-
if v.empty:
|
| 22 |
-
return "0000-0000"
|
| 23 |
nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
|
| 24 |
-
if nums.empty:
|
| 25 |
-
return "0000-0000"
|
| 26 |
return f"{nums.min():04d}-{nums.max():04d}"
|
| 27 |
|
| 28 |
-
|
| 29 |
def build_rows(df: pd.DataFrame) -> pd.DataFrame:
|
| 30 |
-
"""๋ฐ์ค๋ฒํธ ๊ธฐ์ค ๋ํ ๋ฉํ + ๋ชฉ๋ก(์ฌ๋ฌ ์ค) + ์์ฐ์ฐ๋ ๋ฒ์ ์์ฑ"""
|
| 31 |
df = df.copy()
|
| 32 |
df["๋ฐ์ค๋ฒํธ"] = df["๋ฐ์ค๋ฒํธ"].astype(str).str.zfill(4)
|
| 33 |
if "์ ๋ชฉ" in df.columns:
|
| 34 |
df["์ ๋ชฉ"] = df["์ ๋ชฉ"].astype(str)
|
| 35 |
|
| 36 |
-
# ์์ฐ์ฐ๋(๋ฒ์)
|
| 37 |
if "์ข
๋ฃ์ฐ๋" in df.columns:
|
| 38 |
yr = df.groupby("๋ฐ์ค๋ฒํธ")["์ข
๋ฃ์ฐ๋"].apply(_year_range).reset_index()
|
| 39 |
yr.columns = ["๋ฐ์ค๋ฒํธ", "์์ฐ์ฐ๋"]
|
|
@@ -44,57 +32,116 @@ def build_rows(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 44 |
has_mgmt = "๊ด๋ฆฌ๋ฒํธ" in df.columns
|
| 45 |
lists = []
|
| 46 |
for b, g in df.groupby("๋ฐ์ค๋ฒํธ"):
|
| 47 |
-
lines = [
|
| 48 |
-
|
| 49 |
-
for _, r in g.iterrows()
|
| 50 |
-
]
|
| 51 |
lists.append({"๋ฐ์ค๋ฒํธ": b, "๋ชฉ๋ก": "\r\n".join(lines)})
|
| 52 |
list_df = pd.DataFrame(lists)
|
| 53 |
|
| 54 |
# ๋ํ ๋ฉํ
|
| 55 |
-
meta_cols = ["๋ฐ์ค๋ฒํธ",
|
| 56 |
meta_exist = [c for c in meta_cols if c in df.columns]
|
| 57 |
-
if meta_exist
|
| 58 |
-
|
| 59 |
-
else:
|
| 60 |
-
meta = pd.DataFrame({"๋ฐ์ค๋ฒํธ": df["๋ฐ์ค๋ฒํธ"].unique()})
|
| 61 |
|
| 62 |
merged = meta.merge(list_df, on="๋ฐ์ค๋ฒํธ", how="left").merge(yr, on="๋ฐ์ค๋ฒํธ", how="left")
|
| 63 |
return merged
|
| 64 |
|
| 65 |
-
|
| 66 |
-
# =============== ์นํ ์ ํธ (์ ์ฝ๋์ ์์ ๋ฒ์ ) ===============
|
| 67 |
FIELD_PAIR_RE_TMPL = (
|
| 68 |
-
r'<(?P<
|
| 69 |
r'(.*?)'
|
| 70 |
-
r'<(?P=
|
| 71 |
)
|
| 72 |
TOKEN_FMT = "{{{{{key}}}}}"
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
|
| 77 |
|
| 78 |
-
|
| 79 |
-
def _run_for_list(text: str) -> str:
|
| 80 |
-
if text is None:
|
| 81 |
-
return ""
|
| 82 |
-
lines = str(text).replace("\r\n", "\n").split("\n")
|
| 83 |
-
parts = []
|
| 84 |
-
for i, ln in enumerate(lines):
|
| 85 |
-
if i > 0:
|
| 86 |
-
parts.append("<hp:lineBreak/>")
|
| 87 |
-
parts.append(f"<hp:run><hp:t>{html.escape(ln)}</hp:t></hp:run>")
|
| 88 |
-
return "".join(parts)
|
| 89 |
-
|
| 90 |
-
|
| 91 |
def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
|
| 92 |
changed_any = False
|
| 93 |
|
| 94 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
for k, v in mapping.items():
|
| 96 |
-
|
| 97 |
-
|
|
|
|
| 98 |
pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(k)), re.DOTALL)
|
| 99 |
xml_new, n = pat.subn(replacement, xml)
|
| 100 |
if n:
|
|
@@ -102,16 +149,33 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
|
|
| 102 |
xml = xml_new
|
| 103 |
changed_any = True
|
| 104 |
|
| 105 |
-
# 2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
for k, v in mapping.items():
|
|
|
|
|
|
|
| 107 |
tok = TOKEN_FMT.format(key=k)
|
| 108 |
if tok in xml:
|
| 109 |
-
|
| 110 |
-
_run_for_list(v)
|
| 111 |
-
if re.match(r"^(๋ชฉ๋ก|list)\d+$", k, re.IGNORECASE)
|
| 112 |
-
else html.escape("" if v is None else str(v))
|
| 113 |
-
)
|
| 114 |
-
xml = xml.replace(tok, rep)
|
| 115 |
dbg["token_hits"][k] = dbg["token_hits"].get(k, 0) + 1
|
| 116 |
changed_any = True
|
| 117 |
|
|
@@ -119,21 +183,25 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
|
|
| 119 |
dbg["files_touched"] = True
|
| 120 |
return xml
|
| 121 |
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
"""
|
| 125 |
-
import time
|
| 126 |
-
|
| 127 |
-
dbg = {"field_hits": {}, "token_hits": {}, "touched_files": []}
|
| 128 |
zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
|
| 129 |
out_buf = io.BytesIO()
|
| 130 |
-
zout = zipfile.ZipFile(out_buf, "w")
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
# mimetype ๋ฌด์์ถ + ๋งจ์
|
| 133 |
names = zin.namelist()
|
| 134 |
if "mimetype" in names:
|
| 135 |
zi = zipfile.ZipInfo("mimetype")
|
| 136 |
zi.compress_type = zipfile.ZIP_STORED
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
zout.writestr(zi, zin.read("mimetype"))
|
| 138 |
|
| 139 |
for e in zin.infolist():
|
|
@@ -144,18 +212,22 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str, str]) -> Tuple[bytes,
|
|
| 144 |
try:
|
| 145 |
s = data.decode("utf-8", errors="ignore")
|
| 146 |
before = s
|
| 147 |
-
s = _apply_to_xml(
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
{"field_hits": dbg["field_hits"], "token_hits": dbg["token_hits"], "files_touched": False},
|
| 151 |
-
)
|
| 152 |
if s != before:
|
| 153 |
dbg["touched_files"].append(e.filename)
|
| 154 |
data = s.encode("utf-8")
|
| 155 |
except Exception:
|
| 156 |
pass
|
|
|
|
|
|
|
| 157 |
zi = zipfile.ZipInfo(e.filename)
|
| 158 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
zout.writestr(zi, data)
|
| 160 |
|
| 161 |
zout.close()
|
|
@@ -163,141 +235,27 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str, str]) -> Tuple[bytes,
|
|
| 163 |
zin.close()
|
| 164 |
return out_buf.getvalue(), dbg
|
| 165 |
|
| 166 |
-
|
| 167 |
-
# =============== ๋จ์ผ HWPX ๋ณํฉ(lxml) ===============
|
| 168 |
-
def merge_filled_hwpx_list_to_single(hwpx_list: list[bytes]) -> bytes:
|
| 169 |
-
"""
|
| 170 |
-
์ฌ๋ฌ ๊ฐ์ '์ด๋ฏธ ์ฑ์์ง' HWPX ๋ฐ์ดํธ๋ค์ ๋ฐ์
|
| 171 |
-
์ฒซ ๋ฒ์งธ ๋ฌธ์์ ๋ค ๋ฌธ์์ ํ์ด์ง์ ๋ณธ๋ฌธ ๋ธ๋ก(<p> ๋ฑ)์ ์์ ํ๊ฒ ๋ณํฉํ๋ค.
|
| 172 |
-
XPath๋ local-name()์ ์ฌ์ฉํ์ฌ ๋ค์์คํ์ด์ค ์ด์๋ฅผ ํํผํ๋ค.
|
| 173 |
-
"""
|
| 174 |
-
from lxml import etree
|
| 175 |
-
import io, zipfile
|
| 176 |
-
|
| 177 |
-
if not hwpx_list:
|
| 178 |
-
raise ValueError("๋ณํฉํ HWPX๊ฐ ์์ต๋๋ค.")
|
| 179 |
-
|
| 180 |
-
# ์ฒซ ๋ฌธ์๋ฅผ ๋ฒ ์ด์ค๋ก ํผ์น๊ธฐ
|
| 181 |
-
base_zip = zipfile.ZipFile(io.BytesIO(hwpx_list[0]), "r")
|
| 182 |
-
base_files = {name: base_zip.read(name) for name in base_zip.namelist()}
|
| 183 |
-
base_zip.close()
|
| 184 |
-
|
| 185 |
-
# ์น์
ํ์ผ ํ๋ณด
|
| 186 |
-
section_names = [n for n in base_files.keys() if n.startswith("Contents/section") and n.endswith(".xml")]
|
| 187 |
-
if not section_names:
|
| 188 |
-
return hwpx_list[0]
|
| 189 |
-
|
| 190 |
-
parser = etree.XMLParser(remove_blank_text=False, recover=True)
|
| 191 |
-
|
| 192 |
-
def first_or_none(nodes):
|
| 193 |
-
return nodes[0] if nodes else None
|
| 194 |
-
|
| 195 |
-
def findall_by_local(root, name):
|
| 196 |
-
# ๋ชจ๋ ๋ค์์คํ์ด์ค๋ฅผ ๋ฌด์ํ๊ณ ๋ก์ปฌ ์ด๋ฆ์ผ๋ก๋ง ํ์
|
| 197 |
-
return root.xpath(f".//*[local-name()='{name}']")
|
| 198 |
-
|
| 199 |
-
def findfirst_by_local(root, name):
|
| 200 |
-
return first_or_none(findall_by_local(root, name))
|
| 201 |
-
|
| 202 |
-
# ๋ฌธ์๋ณ๋ก ์น์
ํฉ์น๊ธฐ
|
| 203 |
-
for idx in range(1, len(hwpx_list)):
|
| 204 |
-
add_zip = zipfile.ZipFile(io.BytesIO(hwpx_list[idx]), "r")
|
| 205 |
-
add_files = {name: add_zip.read(name) for name in add_zip.namelist()}
|
| 206 |
-
add_zip.close()
|
| 207 |
-
|
| 208 |
-
for sec in section_names:
|
| 209 |
-
if sec not in add_files or sec not in base_files:
|
| 210 |
-
continue
|
| 211 |
-
|
| 212 |
-
base_xml = base_files[sec].decode("utf-8", errors="ignore")
|
| 213 |
-
add_xml = add_files[sec].decode("utf-8", errors="ignore")
|
| 214 |
-
|
| 215 |
-
base_root = etree.fromstring(base_xml.encode("utf-8"), parser=parser)
|
| 216 |
-
add_root = etree.fromstring(add_xml.encode("utf-8"), parser=parser)
|
| 217 |
-
|
| 218 |
-
# 1) <pages> ๋ณํฉ: add์ <page>๋ค์ base์ <pages> ๋ค์ ์ถ๊ฐ
|
| 219 |
-
base_pages = findfirst_by_local(base_root, "pages")
|
| 220 |
-
add_pages = findfirst_by_local(add_root, "pages")
|
| 221 |
-
if base_pages is not None and add_pages is not None:
|
| 222 |
-
for pg in list(add_pages): # <page> ์์๋ค
|
| 223 |
-
base_pages.append(pg)
|
| 224 |
-
|
| 225 |
-
# 2) ๋ณธ๋ฌธ ๋ณํฉ: <section> ์๋ ๋ธ๋ก๋ค์ ์ด์ด๋ถ์ + ์ฌ์ด์ pageBreak ๋ฌธ๋จ ์ฝ์
|
| 226 |
-
base_section = findfirst_by_local(base_root, "section")
|
| 227 |
-
add_section = findfirst_by_local(add_root, "section")
|
| 228 |
-
if base_section is not None and add_section is not None:
|
| 229 |
-
# hp ๋ค์์คํ์ด์ค URI ์ถ์ถ (๏ฟฝ๏ฟฝ์ผ๋ฉด ํ๊ทธ์์ ๋ฝ์๋ด๊ธฐ)
|
| 230 |
-
def ns_of(el):
|
| 231 |
-
if el is None or el.tag is None:
|
| 232 |
-
return None
|
| 233 |
-
if el.tag.startswith("{"):
|
| 234 |
-
return el.tag.split("}")[0][1:]
|
| 235 |
-
return None
|
| 236 |
-
|
| 237 |
-
# pages๋ section์ ํ๊ทธ์์ hp ns ์ถ์
|
| 238 |
-
hp_ns = ns_of(base_pages) or ns_of(base_section)
|
| 239 |
-
def E(tag):
|
| 240 |
-
return etree.Element(f"{{{hp_ns}}}{tag}") if hp_ns else etree.Element(tag)
|
| 241 |
-
|
| 242 |
-
# ํ์ด์ง ๊ตฌ๋ถ์ฉ pageBreak ๋ฌธ๋จ
|
| 243 |
-
pagebreak_p = E("p")
|
| 244 |
-
run = E("run")
|
| 245 |
-
br = E("pageBreak")
|
| 246 |
-
run.append(br)
|
| 247 |
-
pagebreak_p.append(run)
|
| 248 |
-
base_section.append(pagebreak_p)
|
| 249 |
-
|
| 250 |
-
# add_section์ ์์ ๋ธ๋ก๋ค(๋ณดํต p)์ ๊ทธ๋๋ก ์ด์ด๋ถ์
|
| 251 |
-
for child in list(add_section):
|
| 252 |
-
base_section.append(child)
|
| 253 |
-
|
| 254 |
-
# ์ง๋ ฌํํด์ ๋๋๋ ค์ฐ๊ธฐ
|
| 255 |
-
base_files[sec] = etree.tostring(base_root, encoding="utf-8", xml_declaration=False)
|
| 256 |
-
|
| 257 |
-
# ์ต์ข
zip ๋ค์ ๋ง๋ค๊ธฐ (mimetype ๋ฌด์์ถ + ๋งจ์)
|
| 258 |
-
out_buf = io.BytesIO()
|
| 259 |
-
zout = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
|
| 260 |
-
|
| 261 |
-
if "mimetype" in base_files:
|
| 262 |
-
zi = zipfile.ZipInfo("mimetype"); zi.compress_type = zipfile.ZIP_STORED
|
| 263 |
-
zout.writestr(zi, base_files["mimetype"])
|
| 264 |
-
del base_files["mimetype"]
|
| 265 |
-
|
| 266 |
-
for name, data in base_files.items():
|
| 267 |
-
if isinstance(data, str):
|
| 268 |
-
data = data.encode("utf-8")
|
| 269 |
-
zi = zipfile.ZipInfo(name); zi.compress_type = zipfile.ZIP_DEFLATED
|
| 270 |
-
zout.writestr(zi, data)
|
| 271 |
-
|
| 272 |
-
zout.close()
|
| 273 |
-
out_buf.seek(0)
|
| 274 |
-
return out_buf.getvalue()
|
| 275 |
-
|
| 276 |
-
# =============== UI ===============
|
| 277 |
with st.expander("์ฌ์ฉ๋ฒ", expanded=True):
|
| 278 |
-
st.markdown(
|
| 279 |
-
"""
|
| 280 |
1. ํ
ํ๋ฆฟ ํ์ผ์ ์
๋ก๋ํด์ฃผ์ธ์.
|
| 281 |
2. ๋ณด์กด์์ ์ ๋ณด๊ฐ ๋ค์ด์๋ ์์
ํ์ผ์ ์
๋ก๋ํด์ฃผ์ธ์.
|
| 282 |
3. ์ถ๋ ฅํ ๋ผ๋ฒจ ๋ฒํธ๋ฅผ ์ ํํด์ฃผ์ธ์.
|
| 283 |
4. ์์ถ ํ์ผ์ ๋ค์ด๋ฐ๊ณ , ์์ถํด์ ํ ํ์ผ์ ๋ณํฉํด์ฃผ์ธ์.
|
| 284 |
5. ๋ณํฉ ํ, ๋ผ๋ฒจ์ ์ถ๋ ฅํ์ธ์.
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
"""
|
| 288 |
-
)
|
| 289 |
|
| 290 |
tpl = st.file_uploader("๐ HWPX ํ
ํ๋ฆฟ ์
๋ก๋", type=["hwpx"])
|
| 291 |
n_per_page = st.number_input("ํ
ํ๋ฆฟ์ ๋ผ๋ฒจ ์ธํธ ๊ฐ์(ํ ํ์ด์ง N๊ฐ)", 1, 12, 3, 1)
|
| 292 |
-
data = st.file_uploader("๐ ๋ฐ์ดํฐ ์
๋ก๋ (Excel/CSV)", type=["xlsx",
|
| 293 |
|
| 294 |
if tpl and data:
|
| 295 |
tpl_bytes = tpl.read()
|
| 296 |
df = pd.read_csv(data) if data.name.lower().endswith(".csv") else pd.read_excel(data)
|
| 297 |
|
| 298 |
if "๋ฐ์ค๋ฒํธ" not in df.columns:
|
| 299 |
-
st.error("โ ํ์ ์ปฌ๋ผ '๋ฐ์ค๋ฒํธ'๊ฐ ์์ต๋๋ค.")
|
| 300 |
-
st.stop()
|
| 301 |
|
| 302 |
st.success("โ
์์น ๋งคํ ์๋ฃ (์์
์ธก)")
|
| 303 |
st.dataframe(df.head(10), use_container_width=True)
|
|
@@ -315,74 +273,55 @@ if tpl and data:
|
|
| 315 |
|
| 316 |
# 1ํ์ด์ง ๋งคํ ํ๋ฆฌ๋ทฐ
|
| 317 |
st.subheader("๐งช 1ํ์ด์ง ๋งคํ ํ๋ฆฌ๋ทฐ")
|
| 318 |
-
keys = ["๋ฐ์ค๋ฒํธ",
|
| 319 |
mapping_preview = {}
|
| 320 |
for i in range(int(n_per_page)):
|
| 321 |
if i < len(records):
|
| 322 |
r = records[i]
|
| 323 |
-
mapping_preview.update(
|
| 324 |
-
{
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
}
|
| 334 |
-
)
|
| 335 |
else:
|
| 336 |
-
for k in keys:
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
pd.DataFrame([{"ํค": k, "๊ฐ ์๋ถ๋ถ": str(v)[:120]} for k, v in sorted(mapping_preview.items())]),
|
| 340 |
-
use_container_width=True,
|
| 341 |
-
height=320,
|
| 342 |
-
)
|
| 343 |
|
| 344 |
-
if st.button("๐
|
|
|
|
| 345 |
pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
|
| 346 |
debug_all = []
|
| 347 |
-
filled_pages: List[bytes] = []
|
| 348 |
|
| 349 |
for p in range(pages):
|
| 350 |
-
chunk = records[p
|
| 351 |
-
mapping
|
| 352 |
for i in range(int(n_per_page)):
|
| 353 |
if i < len(chunk):
|
| 354 |
r = chunk[i]
|
| 355 |
-
mapping[f"๋ฐ์ค๋ฒํธ{i+1}"] = r.get("๋ฐ์ค๋ฒํธ",
|
| 356 |
-
mapping[f"์ข
๋ฃ์ฐ๋{i+1}"] = r.get("์์ฐ์ฐ๋",
|
| 357 |
-
mapping[f"๋ณด์กด๊ธฐ๊ฐ{i+1}"] = r.get("๋ณด์กด๊ธฐ๊ฐ",
|
| 358 |
-
mapping[f"๋จ์์
๋ฌด{i+1}"] = r.get("๋จ์์
๋ฌด",
|
| 359 |
-
mapping[f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}"] = r.get("๊ธฐ๋ก๋ฌผ์ฒ ",
|
| 360 |
-
mapping[f"๋ชฉ๋ก{i+1}"]
|
| 361 |
-
title_val = r.get("์ ๋ชฉ",
|
| 362 |
-
mapping[f"์ ๋ชฉ{i+1}"]
|
| 363 |
mapping[f"์
๋ฌด๋ช
{i+1}"] = title_val
|
| 364 |
else:
|
| 365 |
-
for k in keys:
|
| 366 |
-
mapping[f"{k}{i+1}"] = ""
|
| 367 |
|
| 368 |
out_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
|
| 369 |
-
debug_all.append({"page": p
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
last_box = records[-1].get("๋ฐ์ค๋ฒํธ", "0000") if records else "0000"
|
| 378 |
-
filename = (
|
| 379 |
-
f"labels_{first_box}to{last_box}.hwpx" if first_box != last_box else f"labels_{first_box}.hwpx"
|
| 380 |
-
)
|
| 381 |
-
|
| 382 |
-
st.download_button(
|
| 383 |
-
"โฌ๏ธ ๋จ์ผ HWPX ๋ค์ด๋ก๋",
|
| 384 |
-
data=merged_hwpx,
|
| 385 |
-
file_name=filename,
|
| 386 |
-
mime="application/vnd.hancom.hwpx",
|
| 387 |
-
)
|
| 388 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import io, zipfile, re, html, json
|
| 4 |
+
from typing import Dict, Tuple
|
| 5 |
|
| 6 |
+
st.set_page_config(page_title="๐ฆ ๋ณด์กด์์ ๋ผ๋ฒจ ์์ฑ๊ธฐ", layout="wide")
|
| 7 |
+
st.title("๐ฆ ๋ณด์กด์์ ๋ผ๋ฒจ ์์ฑ๊ธฐ ๐ฆ")
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
# -------------------- ๋ฐ์ดํฐ ์ ํธ --------------------
|
| 10 |
def _year_range(series: pd.Series) -> str:
|
| 11 |
s = series.astype(str).fillna("")
|
| 12 |
v = s[~s.isin(["", "0", "0000"])]
|
| 13 |
+
if v.empty: return "0000-0000"
|
|
|
|
| 14 |
nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
|
| 15 |
+
if nums.empty: return "0000-0000"
|
|
|
|
| 16 |
return f"{nums.min():04d}-{nums.max():04d}"
|
| 17 |
|
|
|
|
| 18 |
def build_rows(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
| 19 |
df = df.copy()
|
| 20 |
df["๋ฐ์ค๋ฒํธ"] = df["๋ฐ์ค๋ฒํธ"].astype(str).str.zfill(4)
|
| 21 |
if "์ ๋ชฉ" in df.columns:
|
| 22 |
df["์ ๋ชฉ"] = df["์ ๋ชฉ"].astype(str)
|
| 23 |
|
| 24 |
+
# ์์ฐ์ฐ๋(๋ฒ์) = ์ข
๋ฃ์ฐ๋ ๊ทธ๋ฃน ๋ฒ์
|
| 25 |
if "์ข
๋ฃ์ฐ๋" in df.columns:
|
| 26 |
yr = df.groupby("๋ฐ์ค๋ฒํธ")["์ข
๋ฃ์ฐ๋"].apply(_year_range).reset_index()
|
| 27 |
yr.columns = ["๋ฐ์ค๋ฒํธ", "์์ฐ์ฐ๋"]
|
|
|
|
| 32 |
has_mgmt = "๊ด๋ฆฌ๋ฒํธ" in df.columns
|
| 33 |
lists = []
|
| 34 |
for b, g in df.groupby("๋ฐ์ค๋ฒํธ"):
|
| 35 |
+
lines = [f"- {r['๊ด๋ฆฌ๋ฒํธ']} {r.get('์ ๋ชฉ','')}" if has_mgmt else f"- {r.get('์ ๋ชฉ','')}"
|
| 36 |
+
for _, r in g.iterrows()]
|
|
|
|
|
|
|
| 37 |
lists.append({"๋ฐ์ค๋ฒํธ": b, "๋ชฉ๋ก": "\r\n".join(lines)})
|
| 38 |
list_df = pd.DataFrame(lists)
|
| 39 |
|
| 40 |
# ๋ํ ๋ฉํ
|
| 41 |
+
meta_cols = ["๋ฐ์ค๋ฒํธ","์ข
๋ฃ์ฐ๋","๋ณด์กด๊ธฐ๊ฐ","๋จ์์
๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","์ ๋ชฉ"]
|
| 42 |
meta_exist = [c for c in meta_cols if c in df.columns]
|
| 43 |
+
meta = df.groupby("๋ฐ์ค๋ฒํธ", as_index=False).first()[meta_exist] if meta_exist \
|
| 44 |
+
else pd.DataFrame({"๋ฐ์ค๋ฒํธ": df["๋ฐ์ค๋ฒํธ"].unique()})
|
|
|
|
|
|
|
| 45 |
|
| 46 |
merged = meta.merge(list_df, on="๋ฐ์ค๋ฒํธ", how="left").merge(yr, on="๋ฐ์ค๋ฒํธ", how="left")
|
| 47 |
return merged
|
| 48 |
|
| 49 |
+
# -------------------- ์นํ ์ ํธ --------------------
|
|
|
|
| 50 |
FIELD_PAIR_RE_TMPL = (
|
| 51 |
+
r'<(?P<fprefix>[a-zA-Z0-9_]+):fieldBegin\b[^>]*\bname="{name}"[^>]*/>'
|
| 52 |
r'(.*?)'
|
| 53 |
+
r'<(?P=fprefix):fieldEnd\b[^>]*/>'
|
| 54 |
)
|
| 55 |
TOKEN_FMT = "{{{{{key}}}}}"
|
| 56 |
|
| 57 |
+
# ๋ฌธ๋จ(<*:p>) ํ์ ํจํด
|
| 58 |
+
PARA_RE = re.compile(
|
| 59 |
+
r'<(?P<pprefix>[a-zA-Z0-9_]+):p(?P<pattrs>[^>]*)>(?P<pbody>.*?)</(?P=pprefix):p>',
|
| 60 |
+
re.DOTALL
|
| 61 |
+
)
|
| 62 |
|
| 63 |
+
# ์๋ณธ run ์คํ์ผ์ ์ถ์ถํ๋ ํจ์
|
| 64 |
+
def _extract_run_style(body: str, pprefix: str) -> str:
|
| 65 |
+
"""๋ฌธ๋จ ๋ด์ฉ์์ ์ฒซ ๋ฒ์งธ run ์์์ ์คํ์ผ์ ์ถ์ถ"""
|
| 66 |
+
run_pattern = re.compile(
|
| 67 |
+
rf'<{pprefix}:run[^>]*>.*?</{pprefix}:run>',
|
| 68 |
+
re.DOTALL
|
| 69 |
+
)
|
| 70 |
+
match = run_pattern.search(body)
|
| 71 |
+
if match:
|
| 72 |
+
return match.group(0)
|
| 73 |
+
return f'<{pprefix}:run><{pprefix}:t><//{pprefix}:t></{pprefix}:run>'
|
| 74 |
+
|
| 75 |
+
# ๋ฌธ๋จ ํ๋๋ฅผ ๊ฐ์ ์คํ์ผ๋ก ๋ณต์ ํด์ฃผ๋ ํฌํผ (์คํ์ผ ๋ณด์กด)
|
| 76 |
+
def _make_para_with_style(pprefix: str, pattrs: str, text: str, original_run: str) -> str:
|
| 77 |
+
esc = html.escape("" if text is None else str(text))
|
| 78 |
+
|
| 79 |
+
# ์๋ณธ run์์ ํ
์คํธ ๋ถ๋ถ๋ง ๊ต์ฒด
|
| 80 |
+
text_pattern = re.compile(rf'(<{pprefix}:t[^>]*>)[^<]*(</{pprefix}:t>)')
|
| 81 |
+
new_run = text_pattern.sub(rf'\g<1>{esc}\g<2>', original_run)
|
| 82 |
+
|
| 83 |
+
# ๋ง์ฝ ํ
์คํธ ๋
ธ๋๊ฐ ์๋ค๋ฉด ๊ธฐ๋ณธ ํํ๋ก
|
| 84 |
+
if new_run == original_run:
|
| 85 |
+
t_pattern = re.compile(rf'(<{pprefix}:run[^>]*>)(.*?)(</{pprefix}:run>)', re.DOTALL)
|
| 86 |
+
new_run = t_pattern.sub(rf'\g<1><{pprefix}:t>{esc}</{pprefix}:t>\g<3>', original_run)
|
| 87 |
+
|
| 88 |
+
return f'<{pprefix}:p{pattrs}>{new_run}</{pprefix}:p>'
|
| 89 |
+
|
| 90 |
+
def _split_lines(val) -> list:
|
| 91 |
+
if val is None: return [""]
|
| 92 |
+
return str(val).replace("\r\n","\n").split("\n")
|
| 93 |
+
|
| 94 |
+
def _replace_para_multiline(xml: str, key: str, value: str, dbg: dict) -> str:
|
| 95 |
+
"""
|
| 96 |
+
key๊ฐ ํฌํจ๋ '๋ถ๋ชจ ๋ฌธ๋จ ์ ์ฒด'๋ฅผ, ๊ฐ์ ๊ฐ ์ค์ ๋ด์ ์ฌ๋ฌ ๋ฌธ๋จ์ผ๋ก ๊ต์ฒด.
|
| 97 |
+
์๋ณธ ์คํ์ผ์ ์ ์งํ๋ฉด์ ๊ต์ฒด.
|
| 98 |
+
"""
|
| 99 |
+
pair_pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(key)), re.DOTALL)
|
| 100 |
+
tnode_pat = re.compile(rf'<(?P<p>[a-zA-Z0-9_]+):t[^>]*>[^<]*{re.escape(key)}[^<]*</(?P=p):t>', re.DOTALL)
|
| 101 |
+
token_str = TOKEN_FMT.format(key=key)
|
| 102 |
+
|
| 103 |
+
def para_repl(m):
|
| 104 |
+
body = m.group("pbody")
|
| 105 |
+
if not (pair_pat.search(body) or tnode_pat.search(body) or (token_str in body)):
|
| 106 |
+
return m.group(0)
|
| 107 |
+
|
| 108 |
+
lines = _split_lines(value)
|
| 109 |
+
pprefix = m.group("pprefix")
|
| 110 |
+
pattrs = m.group("pattrs")
|
| 111 |
+
|
| 112 |
+
# ์๋ณธ run ์คํ์ผ ์ถ์ถ
|
| 113 |
+
original_run = _extract_run_style(body, pprefix)
|
| 114 |
+
|
| 115 |
+
# ๊ฐ ์ค์ ๋ํด ์๋ณธ ์คํ์ผ์ ์ ์งํ๋ฉด์ ์ ๋ฌธ๋จ ์์ฑ
|
| 116 |
+
new_paras = "".join(_make_para_with_style(pprefix, pattrs, ln, original_run) for ln in lines)
|
| 117 |
+
dbg["para_hits"][key] = dbg["para_hits"].get(key, 0) + 1
|
| 118 |
+
return new_paras
|
| 119 |
+
|
| 120 |
+
xml2 = PARA_RE.sub(para_repl, xml)
|
| 121 |
+
if xml2 != xml:
|
| 122 |
+
dbg["touched"] = True
|
| 123 |
+
return xml2
|
| 124 |
+
|
| 125 |
+
def _runs_plain(text: str) -> str:
|
| 126 |
return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
|
| 127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
|
| 129 |
changed_any = False
|
| 130 |
|
| 131 |
+
# 0) ๋ค์ค์ค ํค๋ ๋จผ์ "๋ถ๋ชจ ๋ฌธ๋จ ๊ต์ฒด"๋ก ์ฒ๋ฆฌ (์
๋ฌด๋ช
์ ์ ์ธํ์ฌ ํฐํธ ๋ฌธ์ ํด๊ฒฐ)
|
| 132 |
+
multi_key = re.compile(r"^(๋ชฉ๋ก|list|์ ๋ชฉ)\d+$", re.IGNORECASE)
|
| 133 |
+
for k, v in mapping.items():
|
| 134 |
+
if multi_key.match(k):
|
| 135 |
+
xml_new = _replace_para_multiline(xml, k, v, dbg)
|
| 136 |
+
if xml_new != xml:
|
| 137 |
+
xml = xml_new
|
| 138 |
+
changed_any = True
|
| 139 |
+
|
| 140 |
+
# 1) ํ๋์(์ธ๋ผ์ธ) ์นํ โ ๋จ์ผ์ค ํค๋ง
|
| 141 |
for k, v in mapping.items():
|
| 142 |
+
if multi_key.match(k):
|
| 143 |
+
continue
|
| 144 |
+
replacement = _runs_plain(v)
|
| 145 |
pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(k)), re.DOTALL)
|
| 146 |
xml_new, n = pat.subn(replacement, xml)
|
| 147 |
if n:
|
|
|
|
| 149 |
xml = xml_new
|
| 150 |
changed_any = True
|
| 151 |
|
| 152 |
+
# 2) ์์ ํ
์คํธ ์๋ฆฌํ์์(<*:t>ํค</*:t>) ๋ถ๋ถ์นํ โ ๋จ์ผ์ค ํค๋ง
|
| 153 |
+
tnode_all = re.compile(
|
| 154 |
+
r'(<(?P<prefix>[a-zA-Z0-9_]+):t[^>]*>)([^<]*?)</(?P=prefix):t>',
|
| 155 |
+
re.DOTALL
|
| 156 |
+
)
|
| 157 |
+
for k, v in mapping.items():
|
| 158 |
+
if multi_key.match(k):
|
| 159 |
+
continue
|
| 160 |
+
def repl_tnode(m):
|
| 161 |
+
text_node = m.group(3)
|
| 162 |
+
if k not in text_node:
|
| 163 |
+
return m.group(0)
|
| 164 |
+
new_text = html.escape(text_node.replace(k, "" if v is None else str(v)))
|
| 165 |
+
return f"{m.group(1)}{new_text}</{m.group('prefix')}:t>"
|
| 166 |
+
xml2 = tnode_all.sub(repl_tnode, xml)
|
| 167 |
+
if xml2 != xml:
|
| 168 |
+
dbg["text_hits"][k] = dbg["text_hits"].get(k, 0) + 1
|
| 169 |
+
xml = xml2
|
| 170 |
+
changed_any = True
|
| 171 |
+
|
| 172 |
+
# 3) ํ ํฐ ์นํ โ ๋จ์ผ์ค ํค๋ง
|
| 173 |
for k, v in mapping.items():
|
| 174 |
+
if multi_key.match(k):
|
| 175 |
+
continue
|
| 176 |
tok = TOKEN_FMT.format(key=k)
|
| 177 |
if tok in xml:
|
| 178 |
+
xml = xml.replace(tok, html.escape("" if v is None else str(v)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
dbg["token_hits"][k] = dbg["token_hits"].get(k, 0) + 1
|
| 180 |
changed_any = True
|
| 181 |
|
|
|
|
| 183 |
dbg["files_touched"] = True
|
| 184 |
return xml
|
| 185 |
|
| 186 |
+
def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, dict]:
|
| 187 |
+
import stat, time
|
| 188 |
+
dbg = {"para_hits":{}, "field_hits":{}, "text_hits":{}, "token_hits":{}, "touched_files": []}
|
|
|
|
|
|
|
|
|
|
| 189 |
zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
|
| 190 |
out_buf = io.BytesIO()
|
| 191 |
+
zout = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
|
| 192 |
+
|
| 193 |
+
# ํ์ฌ ์๊ฐ
|
| 194 |
+
now = time.localtime()
|
| 195 |
|
| 196 |
# mimetype ๋ฌด์์ถ + ๋งจ์
|
| 197 |
names = zin.namelist()
|
| 198 |
if "mimetype" in names:
|
| 199 |
zi = zipfile.ZipInfo("mimetype")
|
| 200 |
zi.compress_type = zipfile.ZIP_STORED
|
| 201 |
+
# ์์ ํ ์๋ก์ด ZipInfo๋ก ์ฝ๊ธฐ์ ์ฉ ๋ฐฉ์ง
|
| 202 |
+
zi.external_attr = 0o100666 << 16 # ์ผ๋ฐ ํ์ผ + ๋ชจ๋ ๊ถํ
|
| 203 |
+
zi.create_system = 0 # DOS/Windows
|
| 204 |
+
zi.date_time = now[:6]
|
| 205 |
zout.writestr(zi, zin.read("mimetype"))
|
| 206 |
|
| 207 |
for e in zin.infolist():
|
|
|
|
| 212 |
try:
|
| 213 |
s = data.decode("utf-8", errors="ignore")
|
| 214 |
before = s
|
| 215 |
+
s = _apply_to_xml(s, mapping, {"para_hits":dbg["para_hits"], "field_hits":dbg["field_hits"],
|
| 216 |
+
"text_hits":dbg["text_hits"], "token_hits":dbg["token_hits"],
|
| 217 |
+
"files_touched":False})
|
|
|
|
|
|
|
| 218 |
if s != before:
|
| 219 |
dbg["touched_files"].append(e.filename)
|
| 220 |
data = s.encode("utf-8")
|
| 221 |
except Exception:
|
| 222 |
pass
|
| 223 |
+
|
| 224 |
+
# ์์ ํ ์๋ก์ด ZipInfo ์์ฑ์ผ๋ก ์ฝ๊ธฐ์ ์ฉ ๋ฐฉ์ง
|
| 225 |
zi = zipfile.ZipInfo(e.filename)
|
| 226 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 227 |
+
zi.external_attr = 0o100666 << 16 # ์ผ๋ฐ ํ์ผ + ๋ชจ๋ ๊ถํ
|
| 228 |
+
zi.create_system = 0 # DOS/Windows ์์คํ
|
| 229 |
+
zi.date_time = now[:6] # ํ์ฌ ์๊ฐ
|
| 230 |
+
zi.flag_bits = 0 # ํน๋ณํ ํ๋๊ทธ ์์
|
| 231 |
zout.writestr(zi, data)
|
| 232 |
|
| 233 |
zout.close()
|
|
|
|
| 235 |
zin.close()
|
| 236 |
return out_buf.getvalue(), dbg
|
| 237 |
|
| 238 |
+
# -------------------- UI --------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
with st.expander("์ฌ์ฉ๋ฒ", expanded=True):
|
| 240 |
+
st.markdown("""
|
|
|
|
| 241 |
1. ํ
ํ๋ฆฟ ํ์ผ์ ์
๋ก๋ํด์ฃผ์ธ์.
|
| 242 |
2. ๋ณด์กด์์ ์ ๋ณด๊ฐ ๋ค์ด์๋ ์์
ํ์ผ์ ์
๋ก๋ํด์ฃผ์ธ์.
|
| 243 |
3. ์ถ๋ ฅํ ๋ผ๋ฒจ ๋ฒํธ๋ฅผ ์ ํํด์ฃผ์ธ์.
|
| 244 |
4. ์์ถ ํ์ผ์ ๋ค์ด๋ฐ๊ณ , ์์ถํด์ ํ ํ์ผ์ ๋ณํฉํด์ฃผ์ธ์.
|
| 245 |
5. ๋ณํฉ ํ, ๋ผ๋ฒจ์ ์ถ๋ ฅํ์ธ์.
|
| 246 |
+
๋จ, ํ
ํ๋ฆฟ์ .HWPX(ํ๊ธ) ํ์ผ์ด์ด์ผ ํฉ๋๋ค. (.HWP ๋ถ๊ฐ)
|
| 247 |
+
""")
|
|
|
|
|
|
|
| 248 |
|
| 249 |
tpl = st.file_uploader("๐ HWPX ํ
ํ๋ฆฟ ์
๋ก๋", type=["hwpx"])
|
| 250 |
n_per_page = st.number_input("ํ
ํ๋ฆฟ์ ๋ผ๋ฒจ ์ธํธ ๊ฐ์(ํ ํ์ด์ง N๊ฐ)", 1, 12, 3, 1)
|
| 251 |
+
data = st.file_uploader("๐ ๋ฐ์ดํฐ ์
๋ก๋ (Excel/CSV)", type=["xlsx","xls","csv"])
|
| 252 |
|
| 253 |
if tpl and data:
|
| 254 |
tpl_bytes = tpl.read()
|
| 255 |
df = pd.read_csv(data) if data.name.lower().endswith(".csv") else pd.read_excel(data)
|
| 256 |
|
| 257 |
if "๋ฐ์ค๋ฒํธ" not in df.columns:
|
| 258 |
+
st.error("โ ํ์ ์ปฌ๋ผ '๋ฐ์ค๋ฒํธ'๊ฐ ์์ต๋๋ค."); st.stop()
|
|
|
|
| 259 |
|
| 260 |
st.success("โ
์์น ๋งคํ ์๋ฃ (์์
์ธก)")
|
| 261 |
st.dataframe(df.head(10), use_container_width=True)
|
|
|
|
| 273 |
|
| 274 |
# 1ํ์ด์ง ๋งคํ ํ๋ฆฌ๋ทฐ
|
| 275 |
st.subheader("๐งช 1ํ์ด์ง ๋งคํ ํ๋ฆฌ๋ทฐ")
|
| 276 |
+
keys = ["๋ฐ์ค๋ฒํธ","์ข
๋ฃ์ฐ๋","๋ณด์กด๊ธฐ๊ฐ","๋จ์์
๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๋ชฉ๋ก","์ ๋ชฉ","์
๋ฌด๋ช
"]
|
| 277 |
mapping_preview = {}
|
| 278 |
for i in range(int(n_per_page)):
|
| 279 |
if i < len(records):
|
| 280 |
r = records[i]
|
| 281 |
+
mapping_preview.update({
|
| 282 |
+
f"๋ฐ์ค๋ฒํธ{i+1}": r.get("๋ฐ์ค๋ฒํธ",""),
|
| 283 |
+
f"์ข
๋ฃ์ฐ๋{i+1}": r.get("์์ฐ์ฐ๋",""),
|
| 284 |
+
f"๋ณด์กด๊ธฐ๊ฐ{i+1}": r.get("๋ณด์กด๊ธฐ๊ฐ",""),
|
| 285 |
+
f"๋จ์์
๋ฌด{i+1}": r.get("๋จ์์
๋ฌด",""),
|
| 286 |
+
f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}": r.get("๊ธฐ๋ก๋ฌผ์ฒ ",""),
|
| 287 |
+
f"๋ชฉ๋ก{i+1}": r.get("๋ชฉ๋ก",""),
|
| 288 |
+
f"์ ๋ชฉ{i+1}": r.get("์ ๋ชฉ",""),
|
| 289 |
+
f"์
๋ฌด๋ช
{i+1}": r.get("์ ๋ชฉ",""), # ํ
ํ๋ฆฟ์ด '์
๋ฌด๋ช
1'์ ์ฐ๋ ๊ฒฝ์ฐ ๋์
|
| 290 |
+
})
|
|
|
|
|
|
|
| 291 |
else:
|
| 292 |
+
for k in keys: mapping_preview[f"{k}{i+1}"] = ""
|
| 293 |
+
st.dataframe(pd.DataFrame([{"ํค":k, "๊ฐ ์๋ถ๋ถ":str(v)[:120]} for k,v in sorted(mapping_preview.items())]),
|
| 294 |
+
use_container_width=True, height=320)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
|
| 296 |
+
if st.button("๐ ๋ผ๋ฒจ ์์ฑ (ํ์ด์ง๋ณ HWPX ZIP)"):
|
| 297 |
+
mem = io.BytesIO(); zout = zipfile.ZipFile(mem, "w", zipfile.ZIP_DEFLATED)
|
| 298 |
pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
|
| 299 |
debug_all = []
|
|
|
|
| 300 |
|
| 301 |
for p in range(pages):
|
| 302 |
+
chunk = records[p*int(n_per_page):(p+1)*int(n_per_page)]
|
| 303 |
+
mapping = {}
|
| 304 |
for i in range(int(n_per_page)):
|
| 305 |
if i < len(chunk):
|
| 306 |
r = chunk[i]
|
| 307 |
+
mapping[f"๋ฐ์ค๋ฒํธ{i+1}"] = r.get("๋ฐ์ค๋ฒํธ","")
|
| 308 |
+
mapping[f"์ข
๋ฃ์ฐ๋{i+1}"] = r.get("์์ฐ์ฐ๋","")
|
| 309 |
+
mapping[f"๋ณด์กด๊ธฐ๊ฐ{i+1}"] = r.get("๋ณด์กด๊ธฐ๊ฐ","")
|
| 310 |
+
mapping[f"๋จ์์
๋ฌด{i+1}"] = r.get("๋จ์์
๋ฌด","")
|
| 311 |
+
mapping[f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}"] = r.get("๊ธฐ๋ก๋ฌผ์ฒ ","")
|
| 312 |
+
mapping[f"๋ชฉ๋ก{i+1}"] = r.get("๋ชฉ๋ก","")
|
| 313 |
+
title_val = r.get("์ ๋ชฉ","")
|
| 314 |
+
mapping[f"์ ๋ชฉ{i+1}"] = title_val
|
| 315 |
mapping[f"์
๋ฌด๋ช
{i+1}"] = title_val
|
| 316 |
else:
|
| 317 |
+
for k in keys: mapping[f"{k}{i+1}"] = ""
|
|
|
|
| 318 |
|
| 319 |
out_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
|
| 320 |
+
debug_all.append({"page": p+1, "stats": dbg})
|
| 321 |
+
name = "_".join([r.get("๋ฐ์ค๋ฒํธ","") for r in chunk]) if chunk else f"empty_{p+1}"
|
| 322 |
+
zout.writestr(f"label_{name}.hwpx", out_hwpx)
|
| 323 |
+
|
| 324 |
+
zout.close(); mem.seek(0)
|
| 325 |
+
st.download_button("โฌ๏ธ ZIP ๋ค์ด๋ก๋", data=mem, file_name="labels_by_page.zip", mime="application/zip")
|
| 326 |
+
st.download_button("โฌ๏ธ ๋๋ฒ๊ทธ(JSON)", data=json.dumps(debug_all, ensure_ascii=False, indent=2),
|
| 327 |
+
file_name="debug.json", mime="application/json")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|