Update app.py
Browse files
app.py
CHANGED
|
@@ -1,40 +1,27 @@
|
|
| 1 |
-
# app.py
|
| 2 |
-
import io
|
| 3 |
-
import json
|
| 4 |
-
import html
|
| 5 |
-
import re
|
| 6 |
-
import zipfile
|
| 7 |
-
from typing import Dict, Tuple
|
| 8 |
-
|
| 9 |
-
import pandas as pd
|
| 10 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
| 11 |
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
#
|
| 14 |
-
st.set_page_config(page_title="๐ฆ ๋ฐ์ค๋ผ๋ฒจ(HWPX) โ ํตํฉ ํ์ผ ์ถ๋ ฅ", layout="wide")
|
| 15 |
-
st.title("๐ฆ ๋ฐ์ค๋ผ๋ฒจ ์๋ ์์ฑ๊ธฐ โ HWPX ํ๋ยทํ ํฐยท๋ฌธ๋จ ์์ ์นํ + ๋คํ์ด์ง ํตํฉ ์ถ๋ ฅ")
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
# ====================== ๋ฐ์ดํฐ ์ ํธ ======================
|
| 19 |
def _year_range(series: pd.Series) -> str:
|
| 20 |
s = series.astype(str).fillna("")
|
| 21 |
v = s[~s.isin(["", "0", "0000"])]
|
| 22 |
-
if v.empty:
|
| 23 |
-
return "0000-0000"
|
| 24 |
nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
|
| 25 |
-
if nums.empty:
|
| 26 |
-
return "0000-0000"
|
| 27 |
return f"{nums.min():04d}-{nums.max():04d}"
|
| 28 |
|
| 29 |
-
|
| 30 |
def build_rows(df: pd.DataFrame) -> pd.DataFrame:
|
| 31 |
-
"""๋ฐ์ค๋ฒํธ ๊ธฐ์ค ๋ํ ๋ฉํ + ๋ชฉ๋ก(์ฌ๋ฌ ์ค) + ์์ฐ์ฐ๋ ๋ฒ์ ์์ฑ"""
|
| 32 |
df = df.copy()
|
| 33 |
df["๋ฐ์ค๋ฒํธ"] = df["๋ฐ์ค๋ฒํธ"].astype(str).str.zfill(4)
|
| 34 |
if "์ ๋ชฉ" in df.columns:
|
| 35 |
df["์ ๋ชฉ"] = df["์ ๋ชฉ"].astype(str)
|
| 36 |
|
| 37 |
-
# ์์ฐ์ฐ๋(๋ฒ์)
|
| 38 |
if "์ข
๋ฃ์ฐ๋" in df.columns:
|
| 39 |
yr = df.groupby("๋ฐ์ค๋ฒํธ")["์ข
๋ฃ์ฐ๋"].apply(_year_range).reset_index()
|
| 40 |
yr.columns = ["๋ฐ์ค๋ฒํธ", "์์ฐ์ฐ๋"]
|
|
@@ -45,102 +32,72 @@ def build_rows(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 45 |
has_mgmt = "๊ด๋ฆฌ๋ฒํธ" in df.columns
|
| 46 |
lists = []
|
| 47 |
for b, g in df.groupby("๋ฐ์ค๋ฒํธ"):
|
| 48 |
-
lines = [
|
| 49 |
-
|
| 50 |
-
for _, r in g.iterrows()
|
| 51 |
-
]
|
| 52 |
lists.append({"๋ฐ์ค๋ฒํธ": b, "๋ชฉ๋ก": "\r\n".join(lines)})
|
| 53 |
list_df = pd.DataFrame(lists)
|
| 54 |
|
| 55 |
# ๋ํ ๋ฉํ
|
| 56 |
-
meta_cols = ["๋ฐ์ค๋ฒํธ",
|
| 57 |
meta_exist = [c for c in meta_cols if c in df.columns]
|
| 58 |
-
if meta_exist
|
| 59 |
-
|
| 60 |
-
else:
|
| 61 |
-
meta = pd.DataFrame({"๋ฐ์ค๋ฒํธ": df["๋ฐ์ค๋ฒํธ"].unique()})
|
| 62 |
|
| 63 |
merged = meta.merge(list_df, on="๋ฐ์ค๋ฒํธ", how="left").merge(yr, on="๋ฐ์ค๋ฒํธ", how="left")
|
| 64 |
return merged
|
| 65 |
|
| 66 |
-
|
| 67 |
-
# ====================== ์นํ ์ ํธ (์ธ๋ผ์ธ/๋ฌธ๋จ) ======================
|
| 68 |
-
|
| 69 |
-
# fieldBegin/fieldEnd ์ (์ ๋์ด ์์ผ๋์นด๋)
|
| 70 |
FIELD_PAIR_RE_TMPL = (
|
| 71 |
r'<(?P<fprefix>[a-zA-Z0-9_]+):fieldBegin\b[^>]*\bname="{name}"[^>]*/>'
|
| 72 |
r'(.*?)'
|
| 73 |
r'<(?P=fprefix):fieldEnd\b[^>]*/>'
|
| 74 |
)
|
| 75 |
-
# ํ ํฐ ํฌ๋งท
|
| 76 |
TOKEN_FMT = "{{{{{key}}}}}"
|
| 77 |
|
| 78 |
-
# ๋ฌธ๋จ
|
| 79 |
PARA_RE = re.compile(
|
| 80 |
r'<(?P<pprefix>[a-zA-Z0-9_]+):p(?P<pattrs>[^>]*)>(?P<pbody>.*?)</(?P=pprefix):p>',
|
| 81 |
-
re.DOTALL
|
| 82 |
-
)
|
| 83 |
-
|
| 84 |
-
# run / t ๋
ธ๋ ์ถ์ถ์ฉ
|
| 85 |
-
RUN_RE = re.compile(
|
| 86 |
-
r'<(?P<prefix>[a-zA-Z0-9_]+):run(?P<rattrs>[^>]*)>(?P<body>.*?)</(?P=prefix):run>',
|
| 87 |
-
re.DOTALL,
|
| 88 |
)
|
| 89 |
-
TP_RE = re.compile(
|
| 90 |
-
r'<(?P<prefix>[a-zA-Z0-9_]+):t[^>]*>(?P<text>.*?)</(?P=prefix):t>',
|
| 91 |
-
re.DOTALL,
|
| 92 |
-
)
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
def _clone_run_with_text(run_xml: str, text: str) -> str:
|
| 96 |
-
"""๊ธฐ์กด run์ rPr/์์ฑ ๋ณด์กด, t ๋ด์ฉ๋ง ๊ต์ฒด"""
|
| 97 |
-
def _repl_t(m):
|
| 98 |
-
return f"<{m.group('prefix')}:t>{html.escape(text)}</{m.group('prefix')}:t>"
|
| 99 |
-
|
| 100 |
-
if TP_RE.search(run_xml):
|
| 101 |
-
return TP_RE.sub(_repl_t, run_xml, count=1)
|
| 102 |
-
# t ๋
ธ๋ ์์ผ๋ฉด ๊ธฐ๋ณธ ์ฝ์
|
| 103 |
-
m = RUN_RE.search(run_xml)
|
| 104 |
-
if not m:
|
| 105 |
-
return f"<hp:run><hp:t>{html.escape(text)}</hp:t></hp:run>"
|
| 106 |
-
prefix = m.group("prefix")
|
| 107 |
-
return f"<{prefix}:run><{prefix}:t>{html.escape(text)}</{prefix}:t></{prefix}:run>"
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
def _extract_ppr_and_template_run(pbody: str):
|
| 111 |
-
"""๋ฌธ๋จ pPr(์์ผ๋ฉด)๊ณผ ์ฒซ ๋ฒ์งธ run ์ํ์ ์ถ์ถ"""
|
| 112 |
-
ppr_match = re.search(r'<(?P<prefix>[a-zA-Z0-9_]+):pPr\b[^>]*/>', pbody)
|
| 113 |
-
ppr_xml = ppr_match.group(0) if ppr_match else ""
|
| 114 |
-
|
| 115 |
-
run_match = RUN_RE.search(pbody)
|
| 116 |
-
if run_match:
|
| 117 |
-
template_run = run_match.group(0) # rPr ํฌํจ
|
| 118 |
-
else:
|
| 119 |
-
template_run = "<hp:run><hp:t></hp:t></hp:run>"
|
| 120 |
-
return ppr_xml, template_run
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
def _make_para_from_templates(pprefix: str, pattrs: str, ppr_xml: str, template_run: str, text: str) -> str:
|
| 124 |
-
cloned_run = _clone_run_with_text(template_run, text)
|
| 125 |
-
return f"<{pprefix}:p{pattrs}>{ppr_xml}{cloned_run}</{pprefix}:p>"
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
def _split_lines(val) -> list:
|
| 129 |
-
if val is None:
|
| 130 |
-
|
| 131 |
-
return str(val).replace("\r\n", "\n").split("\n")
|
| 132 |
-
|
| 133 |
|
| 134 |
def _replace_para_multiline(xml: str, key: str, value: str, dbg: dict) -> str:
|
| 135 |
"""
|
| 136 |
-
key๊ฐ ํฌํจ๋ '๋ถ๋ชจ ๋ฌธ๋จ ์ ์ฒด'
|
| 137 |
-
|
| 138 |
"""
|
| 139 |
pair_pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(key)), re.DOTALL)
|
| 140 |
-
tnode_pat = re.compile(
|
| 141 |
-
rf'<(?P<p>[a-zA-Z0-9_]+):t[^>]*>[^<]*{re.escape(key)}[^<]*</(?P=p):t>',
|
| 142 |
-
re.DOTALL,
|
| 143 |
-
)
|
| 144 |
token_str = TOKEN_FMT.format(key=key)
|
| 145 |
|
| 146 |
def para_repl(m):
|
|
@@ -150,28 +107,29 @@ def _replace_para_multiline(xml: str, key: str, value: str, dbg: dict) -> str:
|
|
| 150 |
|
| 151 |
lines = _split_lines(value)
|
| 152 |
pprefix = m.group("pprefix")
|
| 153 |
-
pattrs
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
| 157 |
dbg["para_hits"][key] = dbg["para_hits"].get(key, 0) + 1
|
| 158 |
return new_paras
|
| 159 |
|
| 160 |
xml2 = PARA_RE.sub(para_repl, xml)
|
| 161 |
if xml2 != xml:
|
| 162 |
-
dbg["
|
| 163 |
return xml2
|
| 164 |
|
| 165 |
-
|
| 166 |
def _runs_plain(text: str) -> str:
|
| 167 |
return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
|
| 168 |
|
| 169 |
-
|
| 170 |
def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
|
| 171 |
changed_any = False
|
| 172 |
|
| 173 |
-
#
|
| 174 |
-
multi_key = re.compile(r"^(๋ชฉ๋ก|list
|
| 175 |
for k, v in mapping.items():
|
| 176 |
if multi_key.match(k):
|
| 177 |
xml_new = _replace_para_multiline(xml, k, v, dbg)
|
|
@@ -179,7 +137,7 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
|
|
| 179 |
xml = xml_new
|
| 180 |
changed_any = True
|
| 181 |
|
| 182 |
-
#
|
| 183 |
for k, v in mapping.items():
|
| 184 |
if multi_key.match(k):
|
| 185 |
continue
|
|
@@ -191,29 +149,27 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
|
|
| 191 |
xml = xml_new
|
| 192 |
changed_any = True
|
| 193 |
|
| 194 |
-
#
|
| 195 |
tnode_all = re.compile(
|
| 196 |
r'(<(?P<prefix>[a-zA-Z0-9_]+):t[^>]*>)([^<]*?)</(?P=prefix):t>',
|
| 197 |
-
re.DOTALL
|
| 198 |
)
|
| 199 |
for k, v in mapping.items():
|
| 200 |
if multi_key.match(k):
|
| 201 |
continue
|
| 202 |
-
|
| 203 |
def repl_tnode(m):
|
| 204 |
text_node = m.group(3)
|
| 205 |
if k not in text_node:
|
| 206 |
return m.group(0)
|
| 207 |
new_text = html.escape(text_node.replace(k, "" if v is None else str(v)))
|
| 208 |
return f"{m.group(1)}{new_text}</{m.group('prefix')}:t>"
|
| 209 |
-
|
| 210 |
xml2 = tnode_all.sub(repl_tnode, xml)
|
| 211 |
if xml2 != xml:
|
| 212 |
dbg["text_hits"][k] = dbg["text_hits"].get(k, 0) + 1
|
| 213 |
xml = xml2
|
| 214 |
changed_any = True
|
| 215 |
|
| 216 |
-
#
|
| 217 |
for k, v in mapping.items():
|
| 218 |
if multi_key.match(k):
|
| 219 |
continue
|
|
@@ -227,25 +183,24 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
|
|
| 227 |
dbg["files_touched"] = True
|
| 228 |
return xml
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
"""
|
| 233 |
-
import time
|
| 234 |
-
|
| 235 |
-
dbg = {"para_hits": {}, "field_hits": {}, "text_hits": {}, "token_hits": {}, "touched_files": []}
|
| 236 |
zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
|
| 237 |
out_buf = io.BytesIO()
|
| 238 |
zout = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
|
| 239 |
|
|
|
|
| 240 |
now = time.localtime()
|
| 241 |
|
| 242 |
-
# mimetype
|
| 243 |
names = zin.namelist()
|
| 244 |
if "mimetype" in names:
|
| 245 |
zi = zipfile.ZipInfo("mimetype")
|
| 246 |
zi.compress_type = zipfile.ZIP_STORED
|
| 247 |
-
|
| 248 |
-
zi.
|
|
|
|
| 249 |
zi.date_time = now[:6]
|
| 250 |
zout.writestr(zi, zin.read("mimetype"))
|
| 251 |
|
|
@@ -257,29 +212,22 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str, str]) -> Tuple[bytes,
|
|
| 257 |
try:
|
| 258 |
s = data.decode("utf-8", errors="ignore")
|
| 259 |
before = s
|
| 260 |
-
s = _apply_to_xml(
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
{
|
| 264 |
-
"para_hits": dbg["para_hits"],
|
| 265 |
-
"field_hits": dbg["field_hits"],
|
| 266 |
-
"text_hits": dbg["text_hits"],
|
| 267 |
-
"token_hits": dbg["token_hits"],
|
| 268 |
-
"files_touched": False,
|
| 269 |
-
},
|
| 270 |
-
)
|
| 271 |
if s != before:
|
| 272 |
dbg["touched_files"].append(e.filename)
|
| 273 |
data = s.encode("utf-8")
|
| 274 |
except Exception:
|
| 275 |
pass
|
| 276 |
-
|
|
|
|
| 277 |
zi = zipfile.ZipInfo(e.filename)
|
| 278 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 279 |
-
zi.external_attr = 0o100666 << 16
|
| 280 |
-
zi.create_system = 0
|
| 281 |
-
zi.date_time = now[:6]
|
| 282 |
-
zi.flag_bits = 0
|
| 283 |
zout.writestr(zi, data)
|
| 284 |
|
| 285 |
zout.close()
|
|
@@ -287,155 +235,24 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str, str]) -> Tuple[bytes,
|
|
| 287 |
zin.close()
|
| 288 |
return out_buf.getvalue(), dbg
|
| 289 |
|
| 290 |
-
|
| 291 |
-
# ====================== ์น์
/ํ์ด์ง ๋ณํฉ (๋จ์ผ HWPX๋ก ์ถ๋ ฅ) ======================
|
| 292 |
-
|
| 293 |
-
def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
|
| 294 |
-
"""๋ HWPX๋ฅผ 1๊ฐ๋ก ๋ณํฉ: pages ๋ชฉ๋ก๊ณผ ๋ณธ๋ฌธ ๋ฌธ๋จ๊น์ง ํฉ์นจ"""
|
| 295 |
-
import time
|
| 296 |
-
|
| 297 |
-
base_zip = zipfile.ZipFile(io.BytesIO(base_hwpx), "r")
|
| 298 |
-
add_zip = zipfile.ZipFile(io.BytesIO(additional_hwpx), "r")
|
| 299 |
-
|
| 300 |
-
out_buf = io.BytesIO()
|
| 301 |
-
out_zip = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
|
| 302 |
-
now = time.localtime()
|
| 303 |
-
|
| 304 |
-
# mimetype
|
| 305 |
-
if "mimetype" in base_zip.namelist():
|
| 306 |
-
zi = zipfile.ZipInfo("mimetype")
|
| 307 |
-
zi.compress_type = zipfile.ZIP_STORED
|
| 308 |
-
zi.external_attr = 0o100666 << 16
|
| 309 |
-
zi.create_system = 0
|
| 310 |
-
zi.date_time = now[:6]
|
| 311 |
-
out_zip.writestr(zi, base_zip.read("mimetype"))
|
| 312 |
-
|
| 313 |
-
# ์น์
XML ์์ง
|
| 314 |
-
base_sections, add_sections = {}, {}
|
| 315 |
-
for fn in base_zip.namelist():
|
| 316 |
-
if fn == "mimetype":
|
| 317 |
-
continue
|
| 318 |
-
if fn.startswith("Contents/section") and fn.endswith(".xml"):
|
| 319 |
-
base_sections[fn] = base_zip.read(fn).decode("utf-8", errors="ignore")
|
| 320 |
-
else:
|
| 321 |
-
zi = zipfile.ZipInfo(fn)
|
| 322 |
-
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 323 |
-
zi.external_attr = 0o100666 << 16
|
| 324 |
-
zi.create_system = 0
|
| 325 |
-
zi.date_time = now[:6]
|
| 326 |
-
zi.flag_bits = 0
|
| 327 |
-
out_zip.writestr(zi, base_zip.read(fn))
|
| 328 |
-
|
| 329 |
-
for fn in add_zip.namelist():
|
| 330 |
-
if fn.startswith("Contents/section") and fn.endswith(".xml"):
|
| 331 |
-
add_sections[fn] = add_zip.read(fn).decode("utf-8", errors="ignore")
|
| 332 |
-
|
| 333 |
-
# ์น์
๋ณํฉ
|
| 334 |
-
merged_sections = merge_sections(base_sections, add_sections)
|
| 335 |
-
|
| 336 |
-
# ๊ฒฐ๊ณผ ๊ธฐ๋ก
|
| 337 |
-
for fn, content in merged_sections.items():
|
| 338 |
-
zi = zipfile.ZipInfo(fn)
|
| 339 |
-
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 340 |
-
zi.external_attr = 0o100666 << 16
|
| 341 |
-
zi.create_system = 0
|
| 342 |
-
zi.date_time = now[:6]
|
| 343 |
-
zi.flag_bits = 0
|
| 344 |
-
out_zip.writestr(zi, content.encode("utf-8"))
|
| 345 |
-
|
| 346 |
-
base_zip.close()
|
| 347 |
-
add_zip.close()
|
| 348 |
-
out_zip.close()
|
| 349 |
-
out_buf.seek(0)
|
| 350 |
-
return out_buf.getvalue()
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
def merge_sections(base_sections: dict, add_sections: dict) -> dict:
|
| 354 |
-
merged = base_sections.copy()
|
| 355 |
-
for fn, add_xml in add_sections.items():
|
| 356 |
-
if fn in merged:
|
| 357 |
-
merged[fn] = merge_section_content(merged[fn], add_xml)
|
| 358 |
-
else:
|
| 359 |
-
merged[fn] = add_xml
|
| 360 |
-
return merged
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
def merge_section_content(base_xml: str, add_xml: str) -> str:
|
| 364 |
-
"""
|
| 365 |
-
1) <*:pages>์ ์ page ์ํธ๋ฆฌ ์ถ๊ฐ (self-closing/์ผ๋ฐ ๋ชจ๋)
|
| 366 |
-
2) ๋ณธ๋ฌธ(<*:p>) ๋์ pageBreak + ์ถ๊ฐ ๋ฌธ๋จ ๋ถ์ด๊ธฐ
|
| 367 |
-
"""
|
| 368 |
-
# pages ๋ชฉ๋ก ํฉ์น๊ธฐ
|
| 369 |
-
pages_block_re = re.compile(
|
| 370 |
-
r'<(?P<pfx>[a-zA-Z0-9_]+):pages\b[^>]*>(?P<body>.*?)</(?P=pfx):pages>',
|
| 371 |
-
re.DOTALL,
|
| 372 |
-
)
|
| 373 |
-
m_base_pages = pages_block_re.search(base_xml)
|
| 374 |
-
m_add_pages = pages_block_re.search(add_xml)
|
| 375 |
-
if m_base_pages and m_add_pages:
|
| 376 |
-
pfx = m_base_pages.group("pfx")
|
| 377 |
-
body_base = m_base_pages.group("body")
|
| 378 |
-
body_add = m_add_pages.group("body")
|
| 379 |
-
add_entries = re.findall(
|
| 380 |
-
rf'<{pfx}:page\b[^>]*/>|<{pfx}:page\b[^>]*>.*?</{pfx}:page>',
|
| 381 |
-
body_add,
|
| 382 |
-
re.DOTALL,
|
| 383 |
-
)
|
| 384 |
-
if add_entries:
|
| 385 |
-
new_body = body_base + "".join(add_entries)
|
| 386 |
-
base_xml = (
|
| 387 |
-
base_xml[: m_base_pages.start("body")]
|
| 388 |
-
+ new_body
|
| 389 |
-
+ base_xml[m_base_pages.end("body") :]
|
| 390 |
-
)
|
| 391 |
-
|
| 392 |
-
# ๋ณธ๋ฌธ ๋ฌธ๋จ ํฉ์น๊ธฐ
|
| 393 |
-
para_re = re.compile(
|
| 394 |
-
r'<(?P<pfx>[a-zA-Z0-9_]+):p\b[^>]*>.*?</(?P=pfx):p>', re.DOTALL
|
| 395 |
-
)
|
| 396 |
-
pfx_in_base = None
|
| 397 |
-
m0 = para_re.search(base_xml)
|
| 398 |
-
if m0:
|
| 399 |
-
pfx_in_base = m0.group("pfx")
|
| 400 |
-
|
| 401 |
-
add_paras = [m.group(0) for m in para_re.finditer(add_xml)]
|
| 402 |
-
if add_paras and pfx_in_base:
|
| 403 |
-
pagebreak_para = (
|
| 404 |
-
f'<{pfx_in_base}:p><{pfx_in_base}:run>'
|
| 405 |
-
f'<{pfx_in_base}:pageBreak/>'
|
| 406 |
-
f'</{pfx_in_base}:run></{pfx_in_base}:p>'
|
| 407 |
-
)
|
| 408 |
-
section_end_re = re.compile(rf'</{pfx_in_base}:section>')
|
| 409 |
-
m_end = section_end_re.search(base_xml)
|
| 410 |
-
if m_end:
|
| 411 |
-
insert_at = m_end.start()
|
| 412 |
-
base_xml = (
|
| 413 |
-
base_xml[:insert_at] + pagebreak_para + "".join(add_paras) + base_xml[insert_at:]
|
| 414 |
-
)
|
| 415 |
-
return base_xml
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
# ====================== UI ======================
|
| 419 |
with st.expander("์ฌ์ฉ๋ฒ", expanded=True):
|
| 420 |
-
st.markdown(
|
| 421 |
-
|
| 422 |
-
-
|
| 423 |
-
-
|
| 424 |
-
|
| 425 |
-
"""
|
| 426 |
-
)
|
| 427 |
|
| 428 |
tpl = st.file_uploader("๐ HWPX ํ
ํ๋ฆฟ ์
๋ก๋", type=["hwpx"])
|
| 429 |
n_per_page = st.number_input("ํ
ํ๋ฆฟ์ ๋ผ๋ฒจ ์ธํธ ๊ฐ์(ํ ํ์ด์ง N๊ฐ)", 1, 12, 3, 1)
|
| 430 |
-
data = st.file_uploader("๐ ๋ฐ์ดํฐ ์
๋ก๋ (Excel/CSV)", type=["xlsx",
|
| 431 |
|
| 432 |
if tpl and data:
|
| 433 |
tpl_bytes = tpl.read()
|
| 434 |
df = pd.read_csv(data) if data.name.lower().endswith(".csv") else pd.read_excel(data)
|
| 435 |
|
| 436 |
if "๋ฐ์ค๋ฒํธ" not in df.columns:
|
| 437 |
-
st.error("โ ํ์ ์ปฌ๋ผ '๋ฐ์ค๋ฒํธ'๊ฐ ์์ต๋๋ค.")
|
| 438 |
-
st.stop()
|
| 439 |
|
| 440 |
st.success("โ
์์น ๋งคํ ์๋ฃ (์๏ฟฝ๏ฟฝ๏ฟฝ ์ธก)")
|
| 441 |
st.dataframe(df.head(10), use_container_width=True)
|
|
@@ -453,81 +270,55 @@ if tpl and data:
|
|
| 453 |
|
| 454 |
# 1ํ์ด์ง ๋งคํ ํ๋ฆฌ๋ทฐ
|
| 455 |
st.subheader("๐งช 1ํ์ด์ง ๋งคํ ํ๋ฆฌ๋ทฐ")
|
| 456 |
-
keys = ["๋ฐ์ค๋ฒํธ",
|
| 457 |
mapping_preview = {}
|
| 458 |
for i in range(int(n_per_page)):
|
| 459 |
if i < len(records):
|
| 460 |
r = records[i]
|
| 461 |
-
mapping_preview.update(
|
| 462 |
-
{
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
}
|
| 472 |
-
)
|
| 473 |
else:
|
| 474 |
-
for k in keys:
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
pd.DataFrame([{"ํค": k, "๊ฐ ์๋ถ๋ถ": str(v)[:120]} for k, v in sorted(mapping_preview.items())]),
|
| 478 |
-
use_container_width=True,
|
| 479 |
-
height=320,
|
| 480 |
-
)
|
| 481 |
|
| 482 |
-
if st.button("๐
|
|
|
|
| 483 |
pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
|
| 484 |
debug_all = []
|
| 485 |
|
| 486 |
-
merged_hwpx: bytes | None = None
|
| 487 |
-
|
| 488 |
for p in range(pages):
|
| 489 |
-
chunk = records[p
|
| 490 |
-
mapping
|
| 491 |
for i in range(int(n_per_page)):
|
| 492 |
if i < len(chunk):
|
| 493 |
r = chunk[i]
|
| 494 |
-
mapping[f"๋ฐ์ค๋ฒํธ{i+1}"] = r.get("๋ฐ์ค๋ฒํธ",
|
| 495 |
-
mapping[f"์ข
๋ฃ์ฐ๋{i+1}"] = r.get("์์ฐ์ฐ๋",
|
| 496 |
-
mapping[f"๋ณด์กด๊ธฐ๊ฐ{i+1}"] = r.get("๋ณด์กด๊ธฐ๊ฐ",
|
| 497 |
-
mapping[f"๋จ์์
๋ฌด{i+1}"] = r.get("๋จ์์
๋ฌด",
|
| 498 |
-
mapping[f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}"] = r.get("๊ธฐ๋ก๋ฌผ์ฒ ",
|
| 499 |
-
mapping[f"๋ชฉ๋ก{i+1}"]
|
| 500 |
-
title_val = r.get("์ ๋ชฉ",
|
| 501 |
-
mapping[f"์ ๋ชฉ{i+1}"]
|
| 502 |
mapping[f"์
๋ฌด๋ช
{i+1}"] = title_val
|
| 503 |
else:
|
| 504 |
-
for k in keys:
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
# ํ์ผ๋ช
|
| 516 |
-
first_box = records[0].get("๋ฐ์ค๋ฒํธ", "0000") if records else "0000"
|
| 517 |
-
last_box = records[-1].get("๋ฐ์ค๋ฒํธ", "0000") if records else "0000"
|
| 518 |
-
filename = (
|
| 519 |
-
f"labels_{first_box}to{last_box}.hwpx" if first_box != last_box else f"labels_{first_box}.hwpx"
|
| 520 |
-
)
|
| 521 |
-
|
| 522 |
-
st.download_button(
|
| 523 |
-
"โฌ๏ธ ํต๏ฟฝ๏ฟฝ HWPX ๋ค์ด๋ก๋",
|
| 524 |
-
data=merged_hwpx,
|
| 525 |
-
file_name=filename,
|
| 526 |
-
mime="application/vnd.hancom.hwpx",
|
| 527 |
-
)
|
| 528 |
-
st.download_button(
|
| 529 |
-
"โฌ๏ธ ๋๋ฒ๊ทธ(JSON)",
|
| 530 |
-
data=json.dumps(debug_all, ensure_ascii=False, indent=2),
|
| 531 |
-
file_name="debug.json",
|
| 532 |
-
mime="application/json",
|
| 533 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import io, zipfile, re, html, json
|
| 4 |
+
from typing import Dict, Tuple
|
| 5 |
|
| 6 |
+
st.set_page_config(page_title="๐ฆ ๋ฐ์ค๋ผ๋ฒจ(HWPX) โ ๋ฌธ๋จ ๋จ์ ์์ ์นํ", layout="wide")
|
| 7 |
+
st.title("๐ฆ ๋ฐ์ค๋ผ๋ฒจ ์๋ ์์ฑ๊ธฐ โ HWPX ํ๋ยทํ ํฐยทํ
์คํธ ์์ ์นํ(๋ฌธ๋จ ๋จ์)")
|
| 8 |
|
| 9 |
+
# -------------------- ๋ฐ์ดํฐ ์ ํธ --------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
def _year_range(series: pd.Series) -> str:
|
| 11 |
s = series.astype(str).fillna("")
|
| 12 |
v = s[~s.isin(["", "0", "0000"])]
|
| 13 |
+
if v.empty: return "0000-0000"
|
|
|
|
| 14 |
nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
|
| 15 |
+
if nums.empty: return "0000-0000"
|
|
|
|
| 16 |
return f"{nums.min():04d}-{nums.max():04d}"
|
| 17 |
|
|
|
|
| 18 |
def build_rows(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
| 19 |
df = df.copy()
|
| 20 |
df["๋ฐ์ค๋ฒํธ"] = df["๋ฐ์ค๋ฒํธ"].astype(str).str.zfill(4)
|
| 21 |
if "์ ๋ชฉ" in df.columns:
|
| 22 |
df["์ ๋ชฉ"] = df["์ ๋ชฉ"].astype(str)
|
| 23 |
|
| 24 |
+
# ์์ฐ์ฐ๋(๋ฒ์) = ์ข
๋ฃ์ฐ๋ ๊ทธ๋ฃน ๋ฒ์
|
| 25 |
if "์ข
๋ฃ์ฐ๋" in df.columns:
|
| 26 |
yr = df.groupby("๋ฐ์ค๋ฒํธ")["์ข
๋ฃ์ฐ๋"].apply(_year_range).reset_index()
|
| 27 |
yr.columns = ["๋ฐ์ค๋ฒํธ", "์์ฐ์ฐ๋"]
|
|
|
|
| 32 |
has_mgmt = "๊ด๋ฆฌ๋ฒํธ" in df.columns
|
| 33 |
lists = []
|
| 34 |
for b, g in df.groupby("๋ฐ์ค๋ฒํธ"):
|
| 35 |
+
lines = [f"- {r['๊ด๋ฆฌ๋ฒํธ']} {r.get('์ ๋ชฉ','')}" if has_mgmt else f"- {r.get('์ ๋ชฉ','')}"
|
| 36 |
+
for _, r in g.iterrows()]
|
|
|
|
|
|
|
| 37 |
lists.append({"๋ฐ์ค๋ฒํธ": b, "๋ชฉ๋ก": "\r\n".join(lines)})
|
| 38 |
list_df = pd.DataFrame(lists)
|
| 39 |
|
| 40 |
# ๋ํ ๋ฉํ
|
| 41 |
+
meta_cols = ["๋ฐ์ค๋ฒํธ","์ข
๋ฃ์ฐ๋","๋ณด์กด๊ธฐ๊ฐ","๋จ์์
๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","์ ๋ชฉ"]
|
| 42 |
meta_exist = [c for c in meta_cols if c in df.columns]
|
| 43 |
+
meta = df.groupby("๋ฐ์ค๋ฒํธ", as_index=False).first()[meta_exist] if meta_exist \
|
| 44 |
+
else pd.DataFrame({"๋ฐ์ค๋ฒํธ": df["๋ฐ์ค๋ฒํธ"].unique()})
|
|
|
|
|
|
|
| 45 |
|
| 46 |
merged = meta.merge(list_df, on="๋ฐ์ค๋ฒํธ", how="left").merge(yr, on="๋ฐ์ค๋ฒํธ", how="left")
|
| 47 |
return merged
|
| 48 |
|
| 49 |
+
# -------------------- ์นํ ์ ํธ --------------------
|
|
|
|
|
|
|
|
|
|
| 50 |
FIELD_PAIR_RE_TMPL = (
|
| 51 |
r'<(?P<fprefix>[a-zA-Z0-9_]+):fieldBegin\b[^>]*\bname="{name}"[^>]*/>'
|
| 52 |
r'(.*?)'
|
| 53 |
r'<(?P=fprefix):fieldEnd\b[^>]*/>'
|
| 54 |
)
|
|
|
|
| 55 |
TOKEN_FMT = "{{{{{key}}}}}"
|
| 56 |
|
| 57 |
+
# ๋ฌธ๋จ(<*:p>) ํ์ ํจํด
|
| 58 |
PARA_RE = re.compile(
|
| 59 |
r'<(?P<pprefix>[a-zA-Z0-9_]+):p(?P<pattrs>[^>]*)>(?P<pbody>.*?)</(?P=pprefix):p>',
|
| 60 |
+
re.DOTALL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
+
# ์๋ณธ run ์คํ์ผ์ ์ถ์ถํ๋ ํจ์
|
| 64 |
+
def _extract_run_style(body: str, pprefix: str) -> str:
|
| 65 |
+
"""๋ฌธ๋จ ๋ด์ฉ์์ ์ฒซ ๋ฒ์งธ run ์์์ ์คํ์ผ์ ์ถ์ถ"""
|
| 66 |
+
run_pattern = re.compile(
|
| 67 |
+
rf'<{pprefix}:run[^>]*>.*?</{pprefix}:run>',
|
| 68 |
+
re.DOTALL
|
| 69 |
+
)
|
| 70 |
+
match = run_pattern.search(body)
|
| 71 |
+
if match:
|
| 72 |
+
return match.group(0)
|
| 73 |
+
return f'<{pprefix}:run><{pprefix}:t><//{pprefix}:t></{pprefix}:run>'
|
| 74 |
+
|
| 75 |
+
# ๋ฌธ๋จ ํ๋๋ฅผ ๊ฐ์ ์คํ์ผ๋ก ๋ณต์ ํด์ฃผ๋ ํฌํผ (์คํ์ผ ๋ณด์กด)
|
| 76 |
+
def _make_para_with_style(pprefix: str, pattrs: str, text: str, original_run: str) -> str:
|
| 77 |
+
esc = html.escape("" if text is None else str(text))
|
| 78 |
+
|
| 79 |
+
# ์๋ณธ run์์ ํ
์คํธ ๋ถ๋ถ๋ง ๊ต์ฒด
|
| 80 |
+
text_pattern = re.compile(rf'(<{pprefix}:t[^>]*>)[^<]*(</{pprefix}:t>)')
|
| 81 |
+
new_run = text_pattern.sub(rf'\g<1>{esc}\g<2>', original_run)
|
| 82 |
+
|
| 83 |
+
# ๋ง์ฝ ํ
์คํธ ๋
ธ๋๊ฐ ์๋ค๋ฉด ๊ธฐ๋ณธ ํํ๋ก
|
| 84 |
+
if new_run == original_run:
|
| 85 |
+
t_pattern = re.compile(rf'(<{pprefix}:run[^>]*>)(.*?)(</{pprefix}:run>)', re.DOTALL)
|
| 86 |
+
new_run = t_pattern.sub(rf'\g<1><{pprefix}:t>{esc}</{pprefix}:t>\g<3>', original_run)
|
| 87 |
+
|
| 88 |
+
return f'<{pprefix}:p{pattrs}>{new_run}</{pprefix}:p>'
|
| 89 |
|
| 90 |
def _split_lines(val) -> list:
|
| 91 |
+
if val is None: return [""]
|
| 92 |
+
return str(val).replace("\r\n","\n").split("\n")
|
|
|
|
|
|
|
| 93 |
|
| 94 |
def _replace_para_multiline(xml: str, key: str, value: str, dbg: dict) -> str:
|
| 95 |
"""
|
| 96 |
+
key๊ฐ ํฌํจ๋ '๋ถ๋ชจ ๋ฌธ๋จ ์ ์ฒด'๋ฅผ, ๊ฐ์ ๊ฐ ์ค์ ๋ด์ ์ฌ๋ฌ ๋ฌธ๋จ์ผ๋ก ๊ต์ฒด.
|
| 97 |
+
์๋ณธ ์คํ์ผ์ ์ ์งํ๋ฉด์ ๊ต์ฒด.
|
| 98 |
"""
|
| 99 |
pair_pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(key)), re.DOTALL)
|
| 100 |
+
tnode_pat = re.compile(rf'<(?P<p>[a-zA-Z0-9_]+):t[^>]*>[^<]*{re.escape(key)}[^<]*</(?P=p):t>', re.DOTALL)
|
|
|
|
|
|
|
|
|
|
| 101 |
token_str = TOKEN_FMT.format(key=key)
|
| 102 |
|
| 103 |
def para_repl(m):
|
|
|
|
| 107 |
|
| 108 |
lines = _split_lines(value)
|
| 109 |
pprefix = m.group("pprefix")
|
| 110 |
+
pattrs = m.group("pattrs")
|
| 111 |
+
|
| 112 |
+
# ์๋ณธ run ์คํ์ผ ์ถ์ถ
|
| 113 |
+
original_run = _extract_run_style(body, pprefix)
|
| 114 |
+
|
| 115 |
+
# ๊ฐ ์ค์ ๋ํด ์๋ณธ ์คํ์ผ์ ์ ์งํ๋ฉด์ ์ ๋ฌธ๋จ ์์ฑ
|
| 116 |
+
new_paras = "".join(_make_para_with_style(pprefix, pattrs, ln, original_run) for ln in lines)
|
| 117 |
dbg["para_hits"][key] = dbg["para_hits"].get(key, 0) + 1
|
| 118 |
return new_paras
|
| 119 |
|
| 120 |
xml2 = PARA_RE.sub(para_repl, xml)
|
| 121 |
if xml2 != xml:
|
| 122 |
+
dbg["touched"] = True
|
| 123 |
return xml2
|
| 124 |
|
|
|
|
| 125 |
def _runs_plain(text: str) -> str:
|
| 126 |
return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
|
| 127 |
|
|
|
|
| 128 |
def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
|
| 129 |
changed_any = False
|
| 130 |
|
| 131 |
+
# 0) ๋ค์ค์ค ํค๋ ๋จผ์ "๋ถ๋ชจ ๋ฌธ๋จ ๊ต์ฒด"๋ก ์ฒ๋ฆฌ (์
๋ฌด๋ช
์ ์ ์ธํ์ฌ ํฐํธ ๋ฌธ์ ํด๊ฒฐ)
|
| 132 |
+
multi_key = re.compile(r"^(๋ชฉ๋ก|list|์ ๋ชฉ)\d+$", re.IGNORECASE)
|
| 133 |
for k, v in mapping.items():
|
| 134 |
if multi_key.match(k):
|
| 135 |
xml_new = _replace_para_multiline(xml, k, v, dbg)
|
|
|
|
| 137 |
xml = xml_new
|
| 138 |
changed_any = True
|
| 139 |
|
| 140 |
+
# 1) ํ๋์(์ธ๋ผ์ธ) ์นํ โ ๋จ์ผ์ค ํค๋ง
|
| 141 |
for k, v in mapping.items():
|
| 142 |
if multi_key.match(k):
|
| 143 |
continue
|
|
|
|
| 149 |
xml = xml_new
|
| 150 |
changed_any = True
|
| 151 |
|
| 152 |
+
# 2) ์์ ํ
์คํธ ์๋ฆฌํ์์(<*:t>ํค</*:t>) ๋ถ๋ถ์นํ โ ๋จ์ผ์ค ํค๋ง
|
| 153 |
tnode_all = re.compile(
|
| 154 |
r'(<(?P<prefix>[a-zA-Z0-9_]+):t[^>]*>)([^<]*?)</(?P=prefix):t>',
|
| 155 |
+
re.DOTALL
|
| 156 |
)
|
| 157 |
for k, v in mapping.items():
|
| 158 |
if multi_key.match(k):
|
| 159 |
continue
|
|
|
|
| 160 |
def repl_tnode(m):
|
| 161 |
text_node = m.group(3)
|
| 162 |
if k not in text_node:
|
| 163 |
return m.group(0)
|
| 164 |
new_text = html.escape(text_node.replace(k, "" if v is None else str(v)))
|
| 165 |
return f"{m.group(1)}{new_text}</{m.group('prefix')}:t>"
|
|
|
|
| 166 |
xml2 = tnode_all.sub(repl_tnode, xml)
|
| 167 |
if xml2 != xml:
|
| 168 |
dbg["text_hits"][k] = dbg["text_hits"].get(k, 0) + 1
|
| 169 |
xml = xml2
|
| 170 |
changed_any = True
|
| 171 |
|
| 172 |
+
# 3) ํ ํฐ ์นํ โ ๋จ์ผ์ค ํค๋ง
|
| 173 |
for k, v in mapping.items():
|
| 174 |
if multi_key.match(k):
|
| 175 |
continue
|
|
|
|
| 183 |
dbg["files_touched"] = True
|
| 184 |
return xml
|
| 185 |
|
| 186 |
+
def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, dict]:
|
| 187 |
+
import stat, time
|
| 188 |
+
dbg = {"para_hits":{}, "field_hits":{}, "text_hits":{}, "token_hits":{}, "touched_files": []}
|
|
|
|
|
|
|
|
|
|
| 189 |
zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
|
| 190 |
out_buf = io.BytesIO()
|
| 191 |
zout = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
|
| 192 |
|
| 193 |
+
# ํ์ฌ ์๊ฐ
|
| 194 |
now = time.localtime()
|
| 195 |
|
| 196 |
+
# mimetype ๋ฌด์์ถ + ๋งจ์
|
| 197 |
names = zin.namelist()
|
| 198 |
if "mimetype" in names:
|
| 199 |
zi = zipfile.ZipInfo("mimetype")
|
| 200 |
zi.compress_type = zipfile.ZIP_STORED
|
| 201 |
+
# ์์ ํ ์๋ก์ด ZipInfo๋ก ์ฝ๊ธฐ์ ์ฉ ๋ฐฉ์ง
|
| 202 |
+
zi.external_attr = 0o100666 << 16 # ์ผ๋ฐ ํ์ผ + ๋ชจ๋ ๊ถํ
|
| 203 |
+
zi.create_system = 0 # DOS/Windows
|
| 204 |
zi.date_time = now[:6]
|
| 205 |
zout.writestr(zi, zin.read("mimetype"))
|
| 206 |
|
|
|
|
| 212 |
try:
|
| 213 |
s = data.decode("utf-8", errors="ignore")
|
| 214 |
before = s
|
| 215 |
+
s = _apply_to_xml(s, mapping, {"para_hits":dbg["para_hits"], "field_hits":dbg["field_hits"],
|
| 216 |
+
"text_hits":dbg["text_hits"], "token_hits":dbg["token_hits"],
|
| 217 |
+
"files_touched":False})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
if s != before:
|
| 219 |
dbg["touched_files"].append(e.filename)
|
| 220 |
data = s.encode("utf-8")
|
| 221 |
except Exception:
|
| 222 |
pass
|
| 223 |
+
|
| 224 |
+
# ์์ ํ ์๋ก์ด ZipInfo ์์ฑ์ผ๋ก ์ฝ๊ธฐ์ ์ฉ ๋ฐฉ์ง
|
| 225 |
zi = zipfile.ZipInfo(e.filename)
|
| 226 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 227 |
+
zi.external_attr = 0o100666 << 16 # ์ผ๋ฐ ํ์ผ + ๋ชจ๋ ๊ถํ
|
| 228 |
+
zi.create_system = 0 # DOS/Windows ์์คํ
|
| 229 |
+
zi.date_time = now[:6] # ํ์ฌ ์๊ฐ
|
| 230 |
+
zi.flag_bits = 0 # ํน๋ณํ ํ๋๊ทธ ์์
|
| 231 |
zout.writestr(zi, data)
|
| 232 |
|
| 233 |
zout.close()
|
|
|
|
| 235 |
zin.close()
|
| 236 |
return out_buf.getvalue(), dbg
|
| 237 |
|
| 238 |
+
# -------------------- UI --------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
with st.expander("์ฌ์ฉ๋ฒ", expanded=True):
|
| 240 |
+
st.markdown("""
|
| 241 |
+
- **๋ค์ค ์ค(๋ชฉ๋ก/์ ๋ชฉ)์ ๋ถ๋ชจ ๋ฌธ๋จ์ ์ฌ๋ฌ ๋ฌธ๋จ์ผ๋ก ๊ต์ฒด**ํ์ฌ ๊ฒน์นจ ์์ด ํ์ํฉ๋๋ค.
|
| 242 |
+
- **์
๋ฌด๋ช
์ ๋จ์ผ์ค๋ก ์ฒ๋ฆฌ**ํ์ฌ ์๋ณธ ํฐํธ ์คํ์ผ์ ์ ์งํฉ๋๋ค.
|
| 243 |
+
- **์์ฑ๋ HWPX ํ์ผ์ ์ฝ๊ธฐ์ ์ฉ ์์ฑ์ด ํด์ **๋์ด ํธ์ง ๊ฐ๋ฅํฉ๋๋ค.
|
| 244 |
+
""")
|
|
|
|
|
|
|
| 245 |
|
| 246 |
tpl = st.file_uploader("๐ HWPX ํ
ํ๋ฆฟ ์
๋ก๋", type=["hwpx"])
|
| 247 |
n_per_page = st.number_input("ํ
ํ๋ฆฟ์ ๋ผ๋ฒจ ์ธํธ ๊ฐ์(ํ ํ์ด์ง N๊ฐ)", 1, 12, 3, 1)
|
| 248 |
+
data = st.file_uploader("๐ ๋ฐ์ดํฐ ์
๋ก๋ (Excel/CSV)", type=["xlsx","xls","csv"])
|
| 249 |
|
| 250 |
if tpl and data:
|
| 251 |
tpl_bytes = tpl.read()
|
| 252 |
df = pd.read_csv(data) if data.name.lower().endswith(".csv") else pd.read_excel(data)
|
| 253 |
|
| 254 |
if "๋ฐ์ค๋ฒํธ" not in df.columns:
|
| 255 |
+
st.error("โ ํ์ ์ปฌ๋ผ '๋ฐ์ค๋ฒํธ'๊ฐ ์์ต๋๋ค."); st.stop()
|
|
|
|
| 256 |
|
| 257 |
st.success("โ
์์น ๋งคํ ์๋ฃ (์๏ฟฝ๏ฟฝ๏ฟฝ ์ธก)")
|
| 258 |
st.dataframe(df.head(10), use_container_width=True)
|
|
|
|
| 270 |
|
| 271 |
# 1ํ์ด์ง ๋งคํ ํ๋ฆฌ๋ทฐ
|
| 272 |
st.subheader("๐งช 1ํ์ด์ง ๋งคํ ํ๋ฆฌ๋ทฐ")
|
| 273 |
+
keys = ["๋ฐ์ค๋ฒํธ","์ข
๋ฃ์ฐ๋","๋ณด์กด๊ธฐ๊ฐ","๋จ์์
๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๋ชฉ๋ก","์ ๋ชฉ","์
๋ฌด๋ช
"]
|
| 274 |
mapping_preview = {}
|
| 275 |
for i in range(int(n_per_page)):
|
| 276 |
if i < len(records):
|
| 277 |
r = records[i]
|
| 278 |
+
mapping_preview.update({
|
| 279 |
+
f"๋ฐ์ค๋ฒํธ{i+1}": r.get("๋ฐ์ค๋ฒํธ",""),
|
| 280 |
+
f"์ข
๋ฃ์ฐ๋{i+1}": r.get("์์ฐ์ฐ๋",""),
|
| 281 |
+
f"๋ณด์กด๊ธฐ๊ฐ{i+1}": r.get("๋ณด์กด๊ธฐ๊ฐ",""),
|
| 282 |
+
f"๋จ์์
๋ฌด{i+1}": r.get("๋จ์์
๋ฌด",""),
|
| 283 |
+
f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}": r.get("๊ธฐ๋ก๋ฌผ์ฒ ",""),
|
| 284 |
+
f"๋ชฉ๋ก{i+1}": r.get("๋ชฉ๋ก",""),
|
| 285 |
+
f"์ ๋ชฉ{i+1}": r.get("์ ๋ชฉ",""),
|
| 286 |
+
f"์
๋ฌด๋ช
{i+1}": r.get("์ ๋ชฉ",""), # ํ
ํ๋ฆฟ์ด '์
๋ฌด๋ช
1'์ ์ฐ๋ ๊ฒฝ์ฐ ๋์
|
| 287 |
+
})
|
|
|
|
|
|
|
| 288 |
else:
|
| 289 |
+
for k in keys: mapping_preview[f"{k}{i+1}"] = ""
|
| 290 |
+
st.dataframe(pd.DataFrame([{"ํค":k, "๊ฐ ์๋ถ๋ถ":str(v)[:120]} for k,v in sorted(mapping_preview.items())]),
|
| 291 |
+
use_container_width=True, height=320)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
+
if st.button("๐ ๋ผ๋ฒจ ์์ฑ (ํ์ด์ง๋ณ HWPX ZIP)"):
|
| 294 |
+
mem = io.BytesIO(); zout = zipfile.ZipFile(mem, "w", zipfile.ZIP_DEFLATED)
|
| 295 |
pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
|
| 296 |
debug_all = []
|
| 297 |
|
|
|
|
|
|
|
| 298 |
for p in range(pages):
|
| 299 |
+
chunk = records[p*int(n_per_page):(p+1)*int(n_per_page)]
|
| 300 |
+
mapping = {}
|
| 301 |
for i in range(int(n_per_page)):
|
| 302 |
if i < len(chunk):
|
| 303 |
r = chunk[i]
|
| 304 |
+
mapping[f"๋ฐ์ค๋ฒํธ{i+1}"] = r.get("๋ฐ์ค๋ฒํธ","")
|
| 305 |
+
mapping[f"์ข
๋ฃ์ฐ๋{i+1}"] = r.get("์์ฐ์ฐ๋","")
|
| 306 |
+
mapping[f"๋ณด์กด๊ธฐ๊ฐ{i+1}"] = r.get("๋ณด์กด๊ธฐ๊ฐ","")
|
| 307 |
+
mapping[f"๋จ์์
๋ฌด{i+1}"] = r.get("๋จ์์
๋ฌด","")
|
| 308 |
+
mapping[f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}"] = r.get("๊ธฐ๋ก๋ฌผ์ฒ ","")
|
| 309 |
+
mapping[f"๋ชฉ๋ก{i+1}"] = r.get("๋ชฉ๋ก","")
|
| 310 |
+
title_val = r.get("์ ๋ชฉ","")
|
| 311 |
+
mapping[f"์ ๋ชฉ{i+1}"] = title_val
|
| 312 |
mapping[f"์
๋ฌด๋ช
{i+1}"] = title_val
|
| 313 |
else:
|
| 314 |
+
for k in keys: mapping[f"{k}{i+1}"] = ""
|
| 315 |
+
|
| 316 |
+
out_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
|
| 317 |
+
debug_all.append({"page": p+1, "stats": dbg})
|
| 318 |
+
name = "_".join([r.get("๋ฐ์ค๋ฒํธ","") for r in chunk]) if chunk else f"empty_{p+1}"
|
| 319 |
+
zout.writestr(f"label_{name}.hwpx", out_hwpx)
|
| 320 |
+
|
| 321 |
+
zout.close(); mem.seek(0)
|
| 322 |
+
st.download_button("โฌ๏ธ ZIP ๋ค์ด๋ก๋", data=mem, file_name="labels_by_page.zip", mime="application/zip")
|
| 323 |
+
st.download_button("โฌ๏ธ ๋๋ฒ๊ทธ(JSON)", data=json.dumps(debug_all, ensure_ascii=False, indent=2),
|
| 324 |
+
file_name="debug.json", mime="application/json")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|