Update app.py
Browse files
app.py
CHANGED
|
@@ -1,27 +1,40 @@
|
|
| 1 |
-
|
| 2 |
-
import
|
| 3 |
-
import
|
|
|
|
|
|
|
|
|
|
| 4 |
from typing import Dict, Tuple
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
|
|
|
|
| 10 |
def _year_range(series: pd.Series) -> str:
|
| 11 |
s = series.astype(str).fillna("")
|
| 12 |
v = s[~s.isin(["", "0", "0000"])]
|
| 13 |
-
if v.empty:
|
|
|
|
| 14 |
nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
|
| 15 |
-
if nums.empty:
|
|
|
|
| 16 |
return f"{nums.min():04d}-{nums.max():04d}"
|
| 17 |
|
|
|
|
| 18 |
def build_rows(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
| 19 |
df = df.copy()
|
| 20 |
df["๋ฐ์ค๋ฒํธ"] = df["๋ฐ์ค๋ฒํธ"].astype(str).str.zfill(4)
|
| 21 |
if "์ ๋ชฉ" in df.columns:
|
| 22 |
df["์ ๋ชฉ"] = df["์ ๋ชฉ"].astype(str)
|
| 23 |
|
| 24 |
-
# ์์ฐ์ฐ๋(๋ฒ์)
|
| 25 |
if "์ข
๋ฃ์ฐ๋" in df.columns:
|
| 26 |
yr = df.groupby("๋ฐ์ค๋ฒํธ")["์ข
๋ฃ์ฐ๋"].apply(_year_range).reset_index()
|
| 27 |
yr.columns = ["๋ฐ์ค๋ฒํธ", "์์ฐ์ฐ๋"]
|
|
@@ -32,72 +45,102 @@ def build_rows(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 32 |
has_mgmt = "๊ด๋ฆฌ๋ฒํธ" in df.columns
|
| 33 |
lists = []
|
| 34 |
for b, g in df.groupby("๋ฐ์ค๋ฒํธ"):
|
| 35 |
-
lines = [
|
| 36 |
-
|
|
|
|
|
|
|
| 37 |
lists.append({"๋ฐ์ค๋ฒํธ": b, "๋ชฉ๋ก": "\r\n".join(lines)})
|
| 38 |
list_df = pd.DataFrame(lists)
|
| 39 |
|
| 40 |
# ๋ํ ๋ฉํ
|
| 41 |
-
meta_cols = ["๋ฐ์ค๋ฒํธ","์ข
๋ฃ์ฐ๋","๋ณด์กด๊ธฐ๊ฐ","๋จ์์
๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","์ ๋ชฉ"]
|
| 42 |
meta_exist = [c for c in meta_cols if c in df.columns]
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
| 45 |
|
| 46 |
merged = meta.merge(list_df, on="๋ฐ์ค๋ฒํธ", how="left").merge(yr, on="๋ฐ์ค๋ฒํธ", how="left")
|
| 47 |
return merged
|
| 48 |
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
| 50 |
FIELD_PAIR_RE_TMPL = (
|
| 51 |
r'<(?P<fprefix>[a-zA-Z0-9_]+):fieldBegin\b[^>]*\bname="{name}"[^>]*/>'
|
| 52 |
r'(.*?)'
|
| 53 |
r'<(?P=fprefix):fieldEnd\b[^>]*/>'
|
| 54 |
)
|
|
|
|
| 55 |
TOKEN_FMT = "{{{{{key}}}}}"
|
| 56 |
|
| 57 |
-
# ๋ฌธ๋จ
|
| 58 |
PARA_RE = re.compile(
|
| 59 |
r'<(?P<pprefix>[a-zA-Z0-9_]+):p(?P<pattrs>[^>]*)>(?P<pbody>.*?)</(?P=pprefix):p>',
|
| 60 |
-
re.DOTALL
|
| 61 |
)
|
| 62 |
|
| 63 |
-
#
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
)
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
def
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
def _split_lines(val) -> list:
|
| 91 |
-
if val is None:
|
| 92 |
-
|
|
|
|
|
|
|
| 93 |
|
| 94 |
def _replace_para_multiline(xml: str, key: str, value: str, dbg: dict) -> str:
|
| 95 |
"""
|
| 96 |
-
key๊ฐ ํฌํจ๋ '๋ถ๋ชจ ๋ฌธ๋จ ์ ์ฒด'
|
| 97 |
-
|
| 98 |
"""
|
| 99 |
pair_pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(key)), re.DOTALL)
|
| 100 |
-
tnode_pat = re.compile(
|
|
|
|
|
|
|
|
|
|
| 101 |
token_str = TOKEN_FMT.format(key=key)
|
| 102 |
|
| 103 |
def para_repl(m):
|
|
@@ -107,29 +150,28 @@ def _replace_para_multiline(xml: str, key: str, value: str, dbg: dict) -> str:
|
|
| 107 |
|
| 108 |
lines = _split_lines(value)
|
| 109 |
pprefix = m.group("pprefix")
|
| 110 |
-
pattrs
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
# ๊ฐ ์ค์ ๋ํด ์๋ณธ ์คํ์ผ์ ์ ์งํ๋ฉด์ ์ ๋ฌธ๋จ ์์ฑ
|
| 116 |
-
new_paras = "".join(_make_para_with_style(pprefix, pattrs, ln, original_run) for ln in lines)
|
| 117 |
dbg["para_hits"][key] = dbg["para_hits"].get(key, 0) + 1
|
| 118 |
return new_paras
|
| 119 |
|
| 120 |
xml2 = PARA_RE.sub(para_repl, xml)
|
| 121 |
if xml2 != xml:
|
| 122 |
-
dbg["
|
| 123 |
return xml2
|
| 124 |
|
|
|
|
| 125 |
def _runs_plain(text: str) -> str:
|
| 126 |
return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
|
| 127 |
|
|
|
|
| 128 |
def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
|
| 129 |
changed_any = False
|
| 130 |
|
| 131 |
-
#
|
| 132 |
-
multi_key = re.compile(r"^(๋ชฉ๋ก|list
|
| 133 |
for k, v in mapping.items():
|
| 134 |
if multi_key.match(k):
|
| 135 |
xml_new = _replace_para_multiline(xml, k, v, dbg)
|
|
@@ -137,7 +179,7 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
|
|
| 137 |
xml = xml_new
|
| 138 |
changed_any = True
|
| 139 |
|
| 140 |
-
#
|
| 141 |
for k, v in mapping.items():
|
| 142 |
if multi_key.match(k):
|
| 143 |
continue
|
|
@@ -149,27 +191,29 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
|
|
| 149 |
xml = xml_new
|
| 150 |
changed_any = True
|
| 151 |
|
| 152 |
-
#
|
| 153 |
tnode_all = re.compile(
|
| 154 |
r'(<(?P<prefix>[a-zA-Z0-9_]+):t[^>]*>)([^<]*?)</(?P=prefix):t>',
|
| 155 |
-
re.DOTALL
|
| 156 |
)
|
| 157 |
for k, v in mapping.items():
|
| 158 |
if multi_key.match(k):
|
| 159 |
continue
|
|
|
|
| 160 |
def repl_tnode(m):
|
| 161 |
text_node = m.group(3)
|
| 162 |
if k not in text_node:
|
| 163 |
return m.group(0)
|
| 164 |
new_text = html.escape(text_node.replace(k, "" if v is None else str(v)))
|
| 165 |
return f"{m.group(1)}{new_text}</{m.group('prefix')}:t>"
|
|
|
|
| 166 |
xml2 = tnode_all.sub(repl_tnode, xml)
|
| 167 |
if xml2 != xml:
|
| 168 |
dbg["text_hits"][k] = dbg["text_hits"].get(k, 0) + 1
|
| 169 |
xml = xml2
|
| 170 |
changed_any = True
|
| 171 |
|
| 172 |
-
#
|
| 173 |
for k, v in mapping.items():
|
| 174 |
if multi_key.match(k):
|
| 175 |
continue
|
|
@@ -183,24 +227,25 @@ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
|
|
| 183 |
dbg["files_touched"] = True
|
| 184 |
return xml
|
| 185 |
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
|
|
|
|
|
|
|
|
|
| 189 |
zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
|
| 190 |
out_buf = io.BytesIO()
|
| 191 |
zout = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
|
| 192 |
|
| 193 |
-
# ํ์ฌ ์๊ฐ
|
| 194 |
now = time.localtime()
|
| 195 |
|
| 196 |
-
# mimetype ๋ฌด์์ถ + ๋งจ์
|
| 197 |
names = zin.namelist()
|
| 198 |
if "mimetype" in names:
|
| 199 |
zi = zipfile.ZipInfo("mimetype")
|
| 200 |
zi.compress_type = zipfile.ZIP_STORED
|
| 201 |
-
|
| 202 |
-
zi.
|
| 203 |
-
zi.create_system = 0 # DOS/Windows
|
| 204 |
zi.date_time = now[:6]
|
| 205 |
zout.writestr(zi, zin.read("mimetype"))
|
| 206 |
|
|
@@ -212,22 +257,29 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, d
|
|
| 212 |
try:
|
| 213 |
s = data.decode("utf-8", errors="ignore")
|
| 214 |
before = s
|
| 215 |
-
s = _apply_to_xml(
|
| 216 |
-
|
| 217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
if s != before:
|
| 219 |
dbg["touched_files"].append(e.filename)
|
| 220 |
data = s.encode("utf-8")
|
| 221 |
except Exception:
|
| 222 |
pass
|
| 223 |
-
|
| 224 |
-
# ์์ ํ ์๋ก์ด ZipInfo ์์ฑ์ผ๋ก ์ฝ๊ธฐ์ ์ฉ ๋ฐฉ์ง
|
| 225 |
zi = zipfile.ZipInfo(e.filename)
|
| 226 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 227 |
-
zi.external_attr = 0o100666 << 16
|
| 228 |
-
zi.create_system = 0
|
| 229 |
-
zi.date_time = now[:6]
|
| 230 |
-
zi.flag_bits = 0
|
| 231 |
zout.writestr(zi, data)
|
| 232 |
|
| 233 |
zout.close()
|
|
@@ -235,200 +287,155 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, d
|
|
| 235 |
zin.close()
|
| 236 |
return out_buf.getvalue(), dbg
|
| 237 |
|
|
|
|
|
|
|
|
|
|
| 238 |
def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
|
| 239 |
-
"""HWPX
|
| 240 |
import time
|
| 241 |
-
|
| 242 |
base_zip = zipfile.ZipFile(io.BytesIO(base_hwpx), "r")
|
| 243 |
add_zip = zipfile.ZipFile(io.BytesIO(additional_hwpx), "r")
|
| 244 |
-
|
| 245 |
out_buf = io.BytesIO()
|
| 246 |
out_zip = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
|
| 247 |
-
|
| 248 |
now = time.localtime()
|
| 249 |
-
|
| 250 |
-
# mimetype
|
| 251 |
if "mimetype" in base_zip.namelist():
|
| 252 |
zi = zipfile.ZipInfo("mimetype")
|
| 253 |
zi.compress_type = zipfile.ZIP_STORED
|
| 254 |
zi.external_attr = 0o100666 << 16
|
| 255 |
zi.create_system = 0
|
| 256 |
zi.date_time = now[:6]
|
| 257 |
-
zi.flag_bits = 0
|
| 258 |
out_zip.writestr(zi, base_zip.read("mimetype"))
|
| 259 |
-
|
| 260 |
-
#
|
| 261 |
-
base_sections = {}
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
for filename in base_zip.namelist():
|
| 265 |
-
if filename == "mimetype":
|
| 266 |
continue
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
if filename.startswith("Contents/section") and filename.endswith(".xml"):
|
| 270 |
-
base_sections[filename] = data.decode("utf-8", errors="ignore")
|
| 271 |
else:
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
add_sections[
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
zi
|
| 290 |
-
zi.create_system = 0
|
| 291 |
-
zi.date_time = now[:6]
|
| 292 |
-
zi.flag_bits = 0
|
| 293 |
-
out_zip.writestr(zi, data)
|
| 294 |
-
|
| 295 |
-
# ๋ฒ ์ด์ค ์น์
๋ค ๋ณต์ฌ
|
| 296 |
-
for filename, content in base_sections.items():
|
| 297 |
-
zi = zipfile.ZipInfo(filename)
|
| 298 |
-
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 299 |
-
zi.external_attr = 0o100666 << 16
|
| 300 |
-
zi.create_system = 0
|
| 301 |
-
zi.date_time = now[:6]
|
| 302 |
-
zi.flag_bits = 0
|
| 303 |
-
out_zip.writestr(zi, content.encode("utf-8"))
|
| 304 |
-
|
| 305 |
-
# ์๋ก์ด ์น์
๋ค ์ถ๊ฐ
|
| 306 |
-
for filename, content in add_sections.items():
|
| 307 |
-
zi = zipfile.ZipInfo(filename)
|
| 308 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 309 |
zi.external_attr = 0o100666 << 16
|
| 310 |
zi.create_system = 0
|
| 311 |
zi.date_time = now[:6]
|
| 312 |
zi.flag_bits = 0
|
| 313 |
out_zip.writestr(zi, content.encode("utf-8"))
|
| 314 |
-
|
| 315 |
-
# BodyText ์
๋ฐ์ดํธ (์ ์น์
์ฐธ์กฐ ์ถ๊ฐ)
|
| 316 |
-
if "Contents/bodytext.xml" in base_files:
|
| 317 |
-
bodytext = base_files["Contents/bodytext.xml"].decode("utf-8", errors="ignore")
|
| 318 |
-
updated_bodytext = add_sections_to_bodytext(bodytext, list(add_sections.keys()))
|
| 319 |
-
|
| 320 |
-
zi = zipfile.ZipInfo("Contents/bodytext.xml")
|
| 321 |
-
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 322 |
-
zi.external_attr = 0o100666 << 16
|
| 323 |
-
zi.create_system = 0
|
| 324 |
-
zi.date_time = now[:6]
|
| 325 |
-
zi.flag_bits = 0
|
| 326 |
-
out_zip.writestr(zi, updated_bodytext.encode("utf-8"))
|
| 327 |
-
|
| 328 |
base_zip.close()
|
| 329 |
add_zip.close()
|
| 330 |
out_zip.close()
|
| 331 |
out_buf.seek(0)
|
| 332 |
-
|
| 333 |
return out_buf.getvalue()
|
| 334 |
|
| 335 |
-
def add_sections_to_bodytext(bodytext: str, new_section_files: list) -> str:
|
| 336 |
-
"""BodyText์ ์ ์น์
์ฐธ์กฐ ์ถ๊ฐ"""
|
| 337 |
-
# ๋ง์ง๋ง ์น์
๋ค์ ์ ์น์
๋ค ์ถ๊ฐ
|
| 338 |
-
# </hml:body> ํ๊ทธ ์์ ์ ์น์
์ฐธ์กฐ ์ฝ์
|
| 339 |
-
|
| 340 |
-
section_refs = []
|
| 341 |
-
for section_file in new_section_files:
|
| 342 |
-
# section1.xml -> 1 ์ถ์ถ
|
| 343 |
-
section_num = section_file.split("section")[1].split(".xml")[0]
|
| 344 |
-
section_ref = f'<hml:secDef><hml:secPtr hml:hRef="../Contents/section{section_num}.xml#0"/></hml:secDef>'
|
| 345 |
-
section_refs.append(section_ref)
|
| 346 |
-
|
| 347 |
-
if section_refs:
|
| 348 |
-
# </hml:body> ์์ ์ฝ์
|
| 349 |
-
body_close_pattern = re.compile(r'(</hml:body>)')
|
| 350 |
-
new_sections_xml = ''.join(section_refs)
|
| 351 |
-
bodytext = body_close_pattern.sub(new_sections_xml + r'\1', bodytext)
|
| 352 |
-
|
| 353 |
-
return bodytext
|
| 354 |
-
|
| 355 |
-
def update_page_id(base_xml: str, new_page: str) -> str:
|
| 356 |
-
"""ํ์ด์ง ID๋ฅผ ์ค๋ณต๋์ง ์๊ฒ ์
๋ฐ์ดํธ (๋ ์ด์ ์ฌ์ฉํ์ง ์์)"""
|
| 357 |
-
return new_page
|
| 358 |
-
|
| 359 |
-
def add_page_to_section(base_xml: str, add_xml: str) -> str:
|
| 360 |
-
"""์น์
์ ์ ํ์ด์ง ์ถ๊ฐ (๋ ์ด์ ์ฌ์ฉํ์ง ์์)"""
|
| 361 |
-
return base_xml
|
| 362 |
-
|
| 363 |
-
def merge_section_xml_list(xml_list: list) -> str:
|
| 364 |
-
"""์ฌ๋ฌ ์น์
XML์ ํ๋๋ก ๋ณํฉ (์ฌ์ฉํ์ง ์์ง๋ง ํธํ์ฑ ์ ์ง)"""
|
| 365 |
-
if len(xml_list) <= 1:
|
| 366 |
-
return xml_list[0] if xml_list else ""
|
| 367 |
-
|
| 368 |
-
base_xml = xml_list[0]
|
| 369 |
-
for additional_xml in xml_list[1:]:
|
| 370 |
-
base_xml = add_page_to_section(base_xml, additional_xml)
|
| 371 |
-
|
| 372 |
-
return base_xml
|
| 373 |
|
| 374 |
def merge_sections(base_sections: dict, add_sections: dict) -> dict:
|
| 375 |
-
"""์น์
XML๋ค์ ๋ณํฉ"""
|
| 376 |
merged = base_sections.copy()
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
# ๊ธฐ์กด ์น์
์ ํ์ด์ง ์ถ๊ฐ
|
| 381 |
-
merged[filename] = merge_section_content(merged[filename], add_content)
|
| 382 |
else:
|
| 383 |
-
|
| 384 |
-
merged[filename] = add_content
|
| 385 |
-
|
| 386 |
return merged
|
| 387 |
|
|
|
|
| 388 |
def merge_section_content(base_xml: str, add_xml: str) -> str:
|
| 389 |
-
"""
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
)
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
with st.expander("์ฌ์ฉ๋ฒ", expanded=True):
|
| 414 |
-
st.markdown(
|
| 415 |
-
|
| 416 |
-
-
|
| 417 |
-
-
|
| 418 |
-
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
|
| 422 |
tpl = st.file_uploader("๐ HWPX ํ
ํ๋ฆฟ ์
๋ก๋", type=["hwpx"])
|
| 423 |
n_per_page = st.number_input("ํ
ํ๋ฆฟ์ ๋ผ๋ฒจ ์ธํธ ๊ฐ์(ํ ํ์ด์ง N๊ฐ)", 1, 12, 3, 1)
|
| 424 |
-
data = st.file_uploader("๐ ๋ฐ์ดํฐ ์
๋ก๋ (Excel/CSV)", type=["xlsx","xls","csv"])
|
| 425 |
|
| 426 |
if tpl and data:
|
| 427 |
tpl_bytes = tpl.read()
|
| 428 |
df = pd.read_csv(data) if data.name.lower().endswith(".csv") else pd.read_excel(data)
|
| 429 |
|
| 430 |
if "๋ฐ์ค๋ฒํธ" not in df.columns:
|
| 431 |
-
st.error("โ ํ์ ์ปฌ๋ผ '๋ฐ์ค๋ฒํธ'๊ฐ ์์ต๋๋ค.")
|
|
|
|
| 432 |
|
| 433 |
st.success("โ
์์น ๋งคํ ์๋ฃ (์์
์ธก)")
|
| 434 |
st.dataframe(df.head(10), use_container_width=True)
|
|
@@ -446,66 +453,81 @@ if tpl and data:
|
|
| 446 |
|
| 447 |
# 1ํ์ด์ง ๋งคํ ํ๋ฆฌ๋ทฐ
|
| 448 |
st.subheader("๐งช 1ํ์ด์ง ๋งคํ ํ๋ฆฌ๋ทฐ")
|
| 449 |
-
keys = ["๋ฐ์ค๋ฒํธ","์ข
๋ฃ์ฐ๋","๋ณด์กด๊ธฐ๊ฐ","๋จ์์
๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๋ชฉ๋ก","์ ๋ชฉ","์
๋ฌด๋ช
"]
|
| 450 |
mapping_preview = {}
|
| 451 |
for i in range(int(n_per_page)):
|
| 452 |
if i < len(records):
|
| 453 |
r = records[i]
|
| 454 |
-
mapping_preview.update(
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
|
|
|
|
|
|
| 464 |
else:
|
| 465 |
-
for k in keys:
|
| 466 |
-
|
| 467 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
|
| 469 |
-
if st.button("๐ ํตํฉ
|
| 470 |
pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
|
| 471 |
debug_all = []
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
for p in range(pages):
|
| 477 |
-
chunk = records[p*int(n_per_page):(p+1)*int(n_per_page)]
|
| 478 |
-
mapping = {}
|
| 479 |
for i in range(int(n_per_page)):
|
| 480 |
if i < len(chunk):
|
| 481 |
r = chunk[i]
|
| 482 |
-
mapping[f"๋ฐ์ค๋ฒํธ{i+1}"] = r.get("๋ฐ์ค๋ฒํธ","")
|
| 483 |
-
mapping[f"์ข
๋ฃ์ฐ๋{i+1}"] = r.get("์์ฐ์ฐ๋","")
|
| 484 |
-
mapping[f"๋ณด์กด๊ธฐ๊ฐ{i+1}"] = r.get("๋ณด์กด๊ธฐ๊ฐ","")
|
| 485 |
-
mapping[f"๋จ์์
๋ฌด{i+1}"] = r.get("๋จ์์
๋ฌด","")
|
| 486 |
-
mapping[f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}"] = r.get("๊ธฐ๋ก๋ฌผ์ฒ ","")
|
| 487 |
-
mapping[f"๋ชฉ๋ก{i+1}"]
|
| 488 |
-
title_val = r.get("์ ๋ชฉ","")
|
| 489 |
-
mapping[f"์ ๋ชฉ{i+1}"]
|
| 490 |
mapping[f"์
๋ฌด๋ช
{i+1}"] = title_val
|
| 491 |
else:
|
| 492 |
-
for k in keys:
|
|
|
|
| 493 |
|
| 494 |
if p == 0:
|
| 495 |
-
# ์ฒซ ํ์ด์ง: ํ
ํ๋ฆฟ ๊ธฐ๋ฐ์ผ๋ก ์์ฑ
|
| 496 |
merged_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
|
| 497 |
else:
|
| 498 |
-
# ๋ ๋ฒ์งธ ํ์ด์ง๋ถํฐ: ๊ธฐ์กด HWPX์ ํ์ด์ง ์ถ๊ฐ
|
| 499 |
page_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
|
| 500 |
merged_hwpx = merge_hwpx_pages(merged_hwpx, page_hwpx)
|
| 501 |
-
|
| 502 |
-
debug_all.append({"page": p+1, "stats": dbg})
|
| 503 |
|
| 504 |
-
|
|
|
|
|
|
|
| 505 |
first_box = records[0].get("๋ฐ์ค๋ฒํธ", "0000") if records else "0000"
|
| 506 |
last_box = records[-1].get("๋ฐ์ค๋ฒํธ", "0000") if records else "0000"
|
| 507 |
-
filename =
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
import io
|
| 3 |
+
import json
|
| 4 |
+
import html
|
| 5 |
+
import re
|
| 6 |
+
import zipfile
|
| 7 |
from typing import Dict, Tuple
|
| 8 |
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import streamlit as st
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# ====================== Streamlit ======================
|
| 14 |
+
st.set_page_config(page_title="๐ฆ ๋ฐ์ค๋ผ๋ฒจ(HWPX) โ ํตํฉ ํ์ผ ์ถ๋ ฅ", layout="wide")
|
| 15 |
+
st.title("๐ฆ ๋ฐ์ค๋ผ๋ฒจ ์๋ ์์ฑ๊ธฐ โ HWPX ํ๋ยทํ ํฐยท๋ฌธ๋จ ์์ ์นํ + ๋คํ์ด์ง ํตํฉ ์ถ๋ ฅ")
|
| 16 |
|
| 17 |
+
|
| 18 |
+
# ====================== ๋ฐ์ดํฐ ์ ํธ ======================
|
| 19 |
def _year_range(series: pd.Series) -> str:
|
| 20 |
s = series.astype(str).fillna("")
|
| 21 |
v = s[~s.isin(["", "0", "0000"])]
|
| 22 |
+
if v.empty:
|
| 23 |
+
return "0000-0000"
|
| 24 |
nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
|
| 25 |
+
if nums.empty:
|
| 26 |
+
return "0000-0000"
|
| 27 |
return f"{nums.min():04d}-{nums.max():04d}"
|
| 28 |
|
| 29 |
+
|
| 30 |
def build_rows(df: pd.DataFrame) -> pd.DataFrame:
|
| 31 |
+
"""๋ฐ์ค๋ฒํธ ๊ธฐ์ค ๋ํ ๋ฉํ + ๋ชฉ๋ก(์ฌ๋ฌ ์ค) + ์์ฐ์ฐ๋ ๋ฒ์ ์์ฑ"""
|
| 32 |
df = df.copy()
|
| 33 |
df["๋ฐ์ค๋ฒํธ"] = df["๋ฐ์ค๋ฒํธ"].astype(str).str.zfill(4)
|
| 34 |
if "์ ๋ชฉ" in df.columns:
|
| 35 |
df["์ ๋ชฉ"] = df["์ ๋ชฉ"].astype(str)
|
| 36 |
|
| 37 |
+
# ์์ฐ์ฐ๋(๋ฒ์)
|
| 38 |
if "์ข
๋ฃ์ฐ๋" in df.columns:
|
| 39 |
yr = df.groupby("๋ฐ์ค๋ฒํธ")["์ข
๋ฃ์ฐ๋"].apply(_year_range).reset_index()
|
| 40 |
yr.columns = ["๋ฐ์ค๋ฒํธ", "์์ฐ์ฐ๋"]
|
|
|
|
| 45 |
has_mgmt = "๊ด๋ฆฌ๋ฒํธ" in df.columns
|
| 46 |
lists = []
|
| 47 |
for b, g in df.groupby("๋ฐ์ค๋ฒํธ"):
|
| 48 |
+
lines = [
|
| 49 |
+
f"- {r['๊ด๋ฆฌ๋ฒํธ']} {r.get('์ ๋ชฉ','')}" if has_mgmt else f"- {r.get('์ ๋ชฉ','')}"
|
| 50 |
+
for _, r in g.iterrows()
|
| 51 |
+
]
|
| 52 |
lists.append({"๋ฐ์ค๋ฒํธ": b, "๋ชฉ๋ก": "\r\n".join(lines)})
|
| 53 |
list_df = pd.DataFrame(lists)
|
| 54 |
|
| 55 |
# ๋ํ ๋ฉํ
|
| 56 |
+
meta_cols = ["๋ฐ์ค๋ฒํธ", "์ข
๋ฃ์ฐ๋", "๋ณด์กด๊ธฐ๊ฐ", "๋จ์์
๋ฌด", "๊ธฐ๋ก๋ฌผ์ฒ ", "์ ๋ชฉ"]
|
| 57 |
meta_exist = [c for c in meta_cols if c in df.columns]
|
| 58 |
+
if meta_exist:
|
| 59 |
+
meta = df.groupby("๋ฐ์ค๋ฒํธ", as_index=False).first()[meta_exist]
|
| 60 |
+
else:
|
| 61 |
+
meta = pd.DataFrame({"๋ฐ์ค๋ฒํธ": df["๋ฐ์ค๋ฒํธ"].unique()})
|
| 62 |
|
| 63 |
merged = meta.merge(list_df, on="๋ฐ์ค๋ฒํธ", how="left").merge(yr, on="๋ฐ์ค๋ฒํธ", how="left")
|
| 64 |
return merged
|
| 65 |
|
| 66 |
+
|
| 67 |
+
# ====================== ์นํ ์ ํธ (์ธ๋ผ์ธ/๋ฌธ๋จ) ======================
|
| 68 |
+
|
| 69 |
+
# fieldBegin/fieldEnd ์ (์ ๋์ด ์์ผ๋์นด๋)
|
| 70 |
FIELD_PAIR_RE_TMPL = (
|
| 71 |
r'<(?P<fprefix>[a-zA-Z0-9_]+):fieldBegin\b[^>]*\bname="{name}"[^>]*/>'
|
| 72 |
r'(.*?)'
|
| 73 |
r'<(?P=fprefix):fieldEnd\b[^>]*/>'
|
| 74 |
)
|
| 75 |
+
# ํ ํฐ ํฌ๋งท
|
| 76 |
TOKEN_FMT = "{{{{{key}}}}}"
|
| 77 |
|
| 78 |
+
# ๋ฌธ๋จ ํ์์ฉ
|
| 79 |
PARA_RE = re.compile(
|
| 80 |
r'<(?P<pprefix>[a-zA-Z0-9_]+):p(?P<pattrs>[^>]*)>(?P<pbody>.*?)</(?P=pprefix):p>',
|
| 81 |
+
re.DOTALL,
|
| 82 |
)
|
| 83 |
|
| 84 |
+
# run / t ๏ฟฝ๏ฟฝ๏ฟฝ๋ ์ถ์ถ์ฉ
|
| 85 |
+
RUN_RE = re.compile(
|
| 86 |
+
r'<(?P<prefix>[a-zA-Z0-9_]+):run(?P<rattrs>[^>]*)>(?P<body>.*?)</(?P=prefix):run>',
|
| 87 |
+
re.DOTALL,
|
| 88 |
+
)
|
| 89 |
+
TP_RE = re.compile(
|
| 90 |
+
r'<(?P<prefix>[a-zA-Z0-9_]+):t[^>]*>(?P<text>.*?)</(?P=prefix):t>',
|
| 91 |
+
re.DOTALL,
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _clone_run_with_text(run_xml: str, text: str) -> str:
|
| 96 |
+
"""๊ธฐ์กด run์ rPr/์์ฑ ๋ณด์กด, t ๋ด์ฉ๋ง ๊ต์ฒด"""
|
| 97 |
+
def _repl_t(m):
|
| 98 |
+
return f"<{m.group('prefix')}:t>{html.escape(text)}</{m.group('prefix')}:t>"
|
| 99 |
+
|
| 100 |
+
if TP_RE.search(run_xml):
|
| 101 |
+
return TP_RE.sub(_repl_t, run_xml, count=1)
|
| 102 |
+
# t ๋
ธ๋ ์์ผ๋ฉด ๊ธฐ๋ณธ ์ฝ์
|
| 103 |
+
m = RUN_RE.search(run_xml)
|
| 104 |
+
if not m:
|
| 105 |
+
return f"<hp:run><hp:t>{html.escape(text)}</hp:t></hp:run>"
|
| 106 |
+
prefix = m.group("prefix")
|
| 107 |
+
return f"<{prefix}:run><{prefix}:t>{html.escape(text)}</{prefix}:t></{prefix}:run>"
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def _extract_ppr_and_template_run(pbody: str):
|
| 111 |
+
"""๋ฌธ๋จ pPr(์์ผ๋ฉด)๊ณผ ์ฒซ ๋ฒ์งธ run ์ํ์ ์ถ์ถ"""
|
| 112 |
+
ppr_match = re.search(r'<(?P<prefix>[a-zA-Z0-9_]+):pPr\b[^>]*/>', pbody)
|
| 113 |
+
ppr_xml = ppr_match.group(0) if ppr_match else ""
|
| 114 |
+
|
| 115 |
+
run_match = RUN_RE.search(pbody)
|
| 116 |
+
if run_match:
|
| 117 |
+
template_run = run_match.group(0) # rPr ํฌํจ
|
| 118 |
+
else:
|
| 119 |
+
template_run = "<hp:run><hp:t></hp:t></hp:run>"
|
| 120 |
+
return ppr_xml, template_run
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def _make_para_from_templates(pprefix: str, pattrs: str, ppr_xml: str, template_run: str, text: str) -> str:
|
| 124 |
+
cloned_run = _clone_run_with_text(template_run, text)
|
| 125 |
+
return f"<{pprefix}:p{pattrs}>{ppr_xml}{cloned_run}</{pprefix}:p>"
|
| 126 |
+
|
| 127 |
|
| 128 |
def _split_lines(val) -> list:
|
| 129 |
+
if val is None:
|
| 130 |
+
return [""]
|
| 131 |
+
return str(val).replace("\r\n", "\n").split("\n")
|
| 132 |
+
|
| 133 |
|
| 134 |
def _replace_para_multiline(xml: str, key: str, value: str, dbg: dict) -> str:
|
| 135 |
"""
|
| 136 |
+
key๊ฐ ํฌํจ๋ '๋ถ๋ชจ ๋ฌธ๋จ ์ ์ฒด'๋ฅผ ๊ฐ์ ๊ฐ ์ค์ ๋ด์ ๋ค์ ๋ฌธ๋จ์ผ๋ก ๊ต์ฒด.
|
| 137 |
+
์ ๋ฌธ๋จ pPr/rPr ์คํ์ผ ์ ์ง.
|
| 138 |
"""
|
| 139 |
pair_pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(key)), re.DOTALL)
|
| 140 |
+
tnode_pat = re.compile(
|
| 141 |
+
rf'<(?P<p>[a-zA-Z0-9_]+):t[^>]*>[^<]*{re.escape(key)}[^<]*</(?P=p):t>',
|
| 142 |
+
re.DOTALL,
|
| 143 |
+
)
|
| 144 |
token_str = TOKEN_FMT.format(key=key)
|
| 145 |
|
| 146 |
def para_repl(m):
|
|
|
|
| 150 |
|
| 151 |
lines = _split_lines(value)
|
| 152 |
pprefix = m.group("pprefix")
|
| 153 |
+
pattrs = m.group("pattrs")
|
| 154 |
+
ppr_xml, template_run = _extract_ppr_and_template_run(body)
|
| 155 |
+
|
| 156 |
+
new_paras = "".join(_make_para_from_templates(pprefix, pattrs, ppr_xml, template_run, ln) for ln in lines)
|
|
|
|
|
|
|
|
|
|
| 157 |
dbg["para_hits"][key] = dbg["para_hits"].get(key, 0) + 1
|
| 158 |
return new_paras
|
| 159 |
|
| 160 |
xml2 = PARA_RE.sub(para_repl, xml)
|
| 161 |
if xml2 != xml:
|
| 162 |
+
dbg["files_touched"] = True
|
| 163 |
return xml2
|
| 164 |
|
| 165 |
+
|
| 166 |
def _runs_plain(text: str) -> str:
|
| 167 |
return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
|
| 168 |
|
| 169 |
+
|
| 170 |
def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
|
| 171 |
changed_any = False
|
| 172 |
|
| 173 |
+
# (A) ๋ค์ค ์ค ํค๋ "๋ฌธ๋จ ๊ต์ฒด"๋ก ๋จผ์ ์ฒ๋ฆฌ (๋ชฉ๋ก/์ ๋ชฉ/์
๋ฌด๋ช
๋ชจ๋ ์ค๋ฐ๊ฟ ๊ฐ์ )
|
| 174 |
+
multi_key = re.compile(r"^(๋ชฉ๋ก|list|์ ๋ชฉ|์
๋ฌด๋ช
)\d+$", re.IGNORECASE)
|
| 175 |
for k, v in mapping.items():
|
| 176 |
if multi_key.match(k):
|
| 177 |
xml_new = _replace_para_multiline(xml, k, v, dbg)
|
|
|
|
| 179 |
xml = xml_new
|
| 180 |
changed_any = True
|
| 181 |
|
| 182 |
+
# (B) ์ธ๋ผ์ธ ํ๋์ ์นํ โ ๋จ์ผ ์ค๋ง
|
| 183 |
for k, v in mapping.items():
|
| 184 |
if multi_key.match(k):
|
| 185 |
continue
|
|
|
|
| 191 |
xml = xml_new
|
| 192 |
changed_any = True
|
| 193 |
|
| 194 |
+
# (C) ์์ ํ
์คํธ ์๋ฆฌํ์์(<*:t>ํค</*:t>) ์นํ โ ๋จ์ผ ์ค๋ง
|
| 195 |
tnode_all = re.compile(
|
| 196 |
r'(<(?P<prefix>[a-zA-Z0-9_]+):t[^>]*>)([^<]*?)</(?P=prefix):t>',
|
| 197 |
+
re.DOTALL,
|
| 198 |
)
|
| 199 |
for k, v in mapping.items():
|
| 200 |
if multi_key.match(k):
|
| 201 |
continue
|
| 202 |
+
|
| 203 |
def repl_tnode(m):
|
| 204 |
text_node = m.group(3)
|
| 205 |
if k not in text_node:
|
| 206 |
return m.group(0)
|
| 207 |
new_text = html.escape(text_node.replace(k, "" if v is None else str(v)))
|
| 208 |
return f"{m.group(1)}{new_text}</{m.group('prefix')}:t>"
|
| 209 |
+
|
| 210 |
xml2 = tnode_all.sub(repl_tnode, xml)
|
| 211 |
if xml2 != xml:
|
| 212 |
dbg["text_hits"][k] = dbg["text_hits"].get(k, 0) + 1
|
| 213 |
xml = xml2
|
| 214 |
changed_any = True
|
| 215 |
|
| 216 |
+
# (D) ํ ํฐ ์นํ โ ๋จ์ผ ์ค๋ง
|
| 217 |
for k, v in mapping.items():
|
| 218 |
if multi_key.match(k):
|
| 219 |
continue
|
|
|
|
| 227 |
dbg["files_touched"] = True
|
| 228 |
return xml
|
| 229 |
|
| 230 |
+
|
| 231 |
+
def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str, str]) -> Tuple[bytes, dict]:
|
| 232 |
+
"""HWPX(zip) ๋ด๋ถ ๋ชจ๋ XML์ ์นํ ์ ์ฉ"""
|
| 233 |
+
import time
|
| 234 |
+
|
| 235 |
+
dbg = {"para_hits": {}, "field_hits": {}, "text_hits": {}, "token_hits": {}, "touched_files": []}
|
| 236 |
zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
|
| 237 |
out_buf = io.BytesIO()
|
| 238 |
zout = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
|
| 239 |
|
|
|
|
| 240 |
now = time.localtime()
|
| 241 |
|
| 242 |
+
# mimetype: ๋ฌด์์ถ + ๋งจ์
|
| 243 |
names = zin.namelist()
|
| 244 |
if "mimetype" in names:
|
| 245 |
zi = zipfile.ZipInfo("mimetype")
|
| 246 |
zi.compress_type = zipfile.ZIP_STORED
|
| 247 |
+
zi.external_attr = 0o100666 << 16
|
| 248 |
+
zi.create_system = 0
|
|
|
|
| 249 |
zi.date_time = now[:6]
|
| 250 |
zout.writestr(zi, zin.read("mimetype"))
|
| 251 |
|
|
|
|
| 257 |
try:
|
| 258 |
s = data.decode("utf-8", errors="ignore")
|
| 259 |
before = s
|
| 260 |
+
s = _apply_to_xml(
|
| 261 |
+
s,
|
| 262 |
+
mapping,
|
| 263 |
+
{
|
| 264 |
+
"para_hits": dbg["para_hits"],
|
| 265 |
+
"field_hits": dbg["field_hits"],
|
| 266 |
+
"text_hits": dbg["text_hits"],
|
| 267 |
+
"token_hits": dbg["token_hits"],
|
| 268 |
+
"files_touched": False,
|
| 269 |
+
},
|
| 270 |
+
)
|
| 271 |
if s != before:
|
| 272 |
dbg["touched_files"].append(e.filename)
|
| 273 |
data = s.encode("utf-8")
|
| 274 |
except Exception:
|
| 275 |
pass
|
| 276 |
+
|
|
|
|
| 277 |
zi = zipfile.ZipInfo(e.filename)
|
| 278 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 279 |
+
zi.external_attr = 0o100666 << 16
|
| 280 |
+
zi.create_system = 0
|
| 281 |
+
zi.date_time = now[:6]
|
| 282 |
+
zi.flag_bits = 0
|
| 283 |
zout.writestr(zi, data)
|
| 284 |
|
| 285 |
zout.close()
|
|
|
|
| 287 |
zin.close()
|
| 288 |
return out_buf.getvalue(), dbg
|
| 289 |
|
| 290 |
+
|
| 291 |
+
# ====================== ์น์
/ํ์ด์ง ๋ณํฉ (๋จ์ผ HWPX๋ก ์ถ๋ ฅ) ======================
|
| 292 |
+
|
| 293 |
def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
|
| 294 |
+
"""๋ HWPX๋ฅผ 1๊ฐ๋ก ๋ณํฉ: pages ๋ชฉ๋ก๊ณผ ๋ณธ๋ฌธ ๋ฌธ๋จ๊น์ง ํฉ์นจ"""
|
| 295 |
import time
|
| 296 |
+
|
| 297 |
base_zip = zipfile.ZipFile(io.BytesIO(base_hwpx), "r")
|
| 298 |
add_zip = zipfile.ZipFile(io.BytesIO(additional_hwpx), "r")
|
| 299 |
+
|
| 300 |
out_buf = io.BytesIO()
|
| 301 |
out_zip = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
|
|
|
|
| 302 |
now = time.localtime()
|
| 303 |
+
|
| 304 |
+
# mimetype
|
| 305 |
if "mimetype" in base_zip.namelist():
|
| 306 |
zi = zipfile.ZipInfo("mimetype")
|
| 307 |
zi.compress_type = zipfile.ZIP_STORED
|
| 308 |
zi.external_attr = 0o100666 << 16
|
| 309 |
zi.create_system = 0
|
| 310 |
zi.date_time = now[:6]
|
|
|
|
| 311 |
out_zip.writestr(zi, base_zip.read("mimetype"))
|
| 312 |
+
|
| 313 |
+
# ์น์
XML ์์ง
|
| 314 |
+
base_sections, add_sections = {}, {}
|
| 315 |
+
for fn in base_zip.namelist():
|
| 316 |
+
if fn == "mimetype":
|
|
|
|
|
|
|
| 317 |
continue
|
| 318 |
+
if fn.startswith("Contents/section") and fn.endswith(".xml"):
|
| 319 |
+
base_sections[fn] = base_zip.read(fn).decode("utf-8", errors="ignore")
|
|
|
|
|
|
|
| 320 |
else:
|
| 321 |
+
zi = zipfile.ZipInfo(fn)
|
| 322 |
+
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 323 |
+
zi.external_attr = 0o100666 << 16
|
| 324 |
+
zi.create_system = 0
|
| 325 |
+
zi.date_time = now[:6]
|
| 326 |
+
zi.flag_bits = 0
|
| 327 |
+
out_zip.writestr(zi, base_zip.read(fn))
|
| 328 |
+
|
| 329 |
+
for fn in add_zip.namelist():
|
| 330 |
+
if fn.startswith("Contents/section") and fn.endswith(".xml"):
|
| 331 |
+
add_sections[fn] = add_zip.read(fn).decode("utf-8", errors="ignore")
|
| 332 |
+
|
| 333 |
+
# ์น์
๋ณํฉ
|
| 334 |
+
merged_sections = merge_sections(base_sections, add_sections)
|
| 335 |
+
|
| 336 |
+
# ๊ฒฐ๊ณผ ๊ธฐ๋ก
|
| 337 |
+
for fn, content in merged_sections.items():
|
| 338 |
+
zi = zipfile.ZipInfo(fn)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 340 |
zi.external_attr = 0o100666 << 16
|
| 341 |
zi.create_system = 0
|
| 342 |
zi.date_time = now[:6]
|
| 343 |
zi.flag_bits = 0
|
| 344 |
out_zip.writestr(zi, content.encode("utf-8"))
|
| 345 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
base_zip.close()
|
| 347 |
add_zip.close()
|
| 348 |
out_zip.close()
|
| 349 |
out_buf.seek(0)
|
|
|
|
| 350 |
return out_buf.getvalue()
|
| 351 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
|
| 353 |
def merge_sections(base_sections: dict, add_sections: dict) -> dict:
|
|
|
|
| 354 |
merged = base_sections.copy()
|
| 355 |
+
for fn, add_xml in add_sections.items():
|
| 356 |
+
if fn in merged:
|
| 357 |
+
merged[fn] = merge_section_content(merged[fn], add_xml)
|
|
|
|
|
|
|
| 358 |
else:
|
| 359 |
+
merged[fn] = add_xml
|
|
|
|
|
|
|
| 360 |
return merged
|
| 361 |
|
| 362 |
+
|
| 363 |
def merge_section_content(base_xml: str, add_xml: str) -> str:
|
| 364 |
+
"""
|
| 365 |
+
1) <*:pages>์ ์ page ์ํธ๋ฆฌ ์ถ๊ฐ (self-closing/์ผ๋ฐ ๋ชจ๋)
|
| 366 |
+
2) ๋ณธ๋ฌธ(<*:p>) ๋์ pageBreak + ์ถ๊ฐ ๋ฌธ๋จ ๋ถ์ด๊ธฐ
|
| 367 |
+
"""
|
| 368 |
+
# pages ๋ชฉ๋ก ํฉ์น๊ธฐ
|
| 369 |
+
pages_block_re = re.compile(
|
| 370 |
+
r'<(?P<pfx>[a-zA-Z0-9_]+):pages\b[^>]*>(?P<body>.*?)</(?P=pfx):pages>',
|
| 371 |
+
re.DOTALL,
|
| 372 |
+
)
|
| 373 |
+
m_base_pages = pages_block_re.search(base_xml)
|
| 374 |
+
m_add_pages = pages_block_re.search(add_xml)
|
| 375 |
+
if m_base_pages and m_add_pages:
|
| 376 |
+
pfx = m_base_pages.group("pfx")
|
| 377 |
+
body_base = m_base_pages.group("body")
|
| 378 |
+
body_add = m_add_pages.group("body")
|
| 379 |
+
add_entries = re.findall(
|
| 380 |
+
rf'<{pfx}:page\b[^>]*/>|<{pfx}:page\b[^>]*>.*?</{pfx}:page>',
|
| 381 |
+
body_add,
|
| 382 |
+
re.DOTALL,
|
| 383 |
+
)
|
| 384 |
+
if add_entries:
|
| 385 |
+
new_body = body_base + "".join(add_entries)
|
| 386 |
+
base_xml = (
|
| 387 |
+
base_xml[: m_base_pages.start("body")]
|
| 388 |
+
+ new_body
|
| 389 |
+
+ base_xml[m_base_pages.end("body") :]
|
| 390 |
+
)
|
| 391 |
+
|
| 392 |
+
# ๋ณธ๋ฌธ ๋ฌธ๋จ ํฉ์น๊ธฐ
|
| 393 |
+
para_re = re.compile(
|
| 394 |
+
r'<(?P<pfx>[a-zA-Z0-9_]+):p\b[^>]*>.*?</(?P=pfx):p>', re.DOTALL
|
| 395 |
)
|
| 396 |
+
pfx_in_base = None
|
| 397 |
+
m0 = para_re.search(base_xml)
|
| 398 |
+
if m0:
|
| 399 |
+
pfx_in_base = m0.group("pfx")
|
| 400 |
+
|
| 401 |
+
add_paras = [m.group(0) for m in para_re.finditer(add_xml)]
|
| 402 |
+
if add_paras and pfx_in_base:
|
| 403 |
+
pagebreak_para = (
|
| 404 |
+
f'<{pfx_in_base}:p><{pfx_in_base}:run>'
|
| 405 |
+
f'<{pfx_in_base}:pageBreak/>'
|
| 406 |
+
f'</{pfx_in_base}:run></{pfx_in_base}:p>'
|
| 407 |
+
)
|
| 408 |
+
section_end_re = re.compile(rf'</{pfx_in_base}:section>')
|
| 409 |
+
m_end = section_end_re.search(base_xml)
|
| 410 |
+
if m_end:
|
| 411 |
+
insert_at = m_end.start()
|
| 412 |
+
base_xml = (
|
| 413 |
+
base_xml[:insert_at] + pagebreak_para + "".join(add_paras) + base_xml[insert_at:]
|
| 414 |
+
)
|
| 415 |
+
return base_xml
|
| 416 |
+
|
| 417 |
+
|
| 418 |
+
# ====================== UI ======================
|
| 419 |
with st.expander("์ฌ์ฉ๋ฒ", expanded=True):
|
| 420 |
+
st.markdown(
|
| 421 |
+
"""
|
| 422 |
+
- **๋ค์ค ์ค(๋ชฉ๋ก/์ ๋ชฉ/์
๋ฌด๋ช
)** ์ ์ ๋ฌธ๋จ ์คํ์ผ์ ์ ์งํ ์ฑ **๋ถ๋ชจ ๋ฌธ๋จ์ ์ค ์๋งํผ ๋ณต์ **ํ์ฌ ๊ฒน์นจ ์์ด ํ์ํฉ๋๋ค.
|
| 423 |
+
- ๋ฐ์ค๊ฐ ๋ง์๋ **๋ง์ง๋ง์ ํ ๊ฐ์ HWPX ํ์ผ**๋ก ํตํฉํด ๋ด๋ ค์ค๋๋ค.
|
| 424 |
+
- ํ
ํ๋ฆฟ์ ๋ฐ๋์ **.HWPX** ์ฌ์ผ ํฉ๋๋ค. (.HWP ๋ถ๊ฐ)
|
| 425 |
+
"""
|
| 426 |
+
)
|
| 427 |
|
| 428 |
tpl = st.file_uploader("๐ HWPX ํ
ํ๋ฆฟ ์
๋ก๋", type=["hwpx"])
|
| 429 |
n_per_page = st.number_input("ํ
ํ๋ฆฟ์ ๋ผ๋ฒจ ์ธํธ ๊ฐ์(ํ ํ์ด์ง N๊ฐ)", 1, 12, 3, 1)
|
| 430 |
+
data = st.file_uploader("๐ ๋ฐ์ดํฐ ์
๋ก๋ (Excel/CSV)", type=["xlsx", "xls", "csv"])
|
| 431 |
|
| 432 |
if tpl and data:
|
| 433 |
tpl_bytes = tpl.read()
|
| 434 |
df = pd.read_csv(data) if data.name.lower().endswith(".csv") else pd.read_excel(data)
|
| 435 |
|
| 436 |
if "๋ฐ์ค๋ฒํธ" not in df.columns:
|
| 437 |
+
st.error("โ ํ์ ์ปฌ๋ผ '๋ฐ์ค๋ฒํธ'๊ฐ ์์ต๋๋ค.")
|
| 438 |
+
st.stop()
|
| 439 |
|
| 440 |
st.success("โ
์์น ๋งคํ ์๋ฃ (์์
์ธก)")
|
| 441 |
st.dataframe(df.head(10), use_container_width=True)
|
|
|
|
| 453 |
|
| 454 |
# 1ํ์ด์ง ๋งคํ ํ๋ฆฌ๋ทฐ
|
| 455 |
st.subheader("๐งช 1ํ์ด์ง ๋งคํ ํ๋ฆฌ๋ทฐ")
|
| 456 |
+
keys = ["๋ฐ์ค๋ฒํธ", "์ข
๋ฃ์ฐ๋", "๋ณด์กด๊ธฐ๊ฐ", "๋จ์์
๋ฌด", "๊ธฐ๋ก๋ฌผ์ฒ ", "๋ชฉ๋ก", "์ ๋ชฉ", "์
๋ฌด๋ช
"]
|
| 457 |
mapping_preview = {}
|
| 458 |
for i in range(int(n_per_page)):
|
| 459 |
if i < len(records):
|
| 460 |
r = records[i]
|
| 461 |
+
mapping_preview.update(
|
| 462 |
+
{
|
| 463 |
+
f"๋ฐ์ค๋ฒํธ{i+1}": r.get("๋ฐ์ค๋ฒํธ", ""),
|
| 464 |
+
f"์ข
๋ฃ์ฐ๋{i+1}": r.get("์์ฐ์ฐ๋", ""),
|
| 465 |
+
f"๋ณด์กด๊ธฐ๊ฐ{i+1}": r.get("๋ณด์กด๊ธฐ๊ฐ", ""),
|
| 466 |
+
f"๋จ์์
๋ฌด{i+1}": r.get("๋จ์์
๋ฌด", ""),
|
| 467 |
+
f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}": r.get("๊ธฐ๋ก๋ฌผ์ฒ ", ""),
|
| 468 |
+
f"๋ชฉ๋ก{i+1}": r.get("๋ชฉ๋ก", ""),
|
| 469 |
+
f"์ ๋ชฉ{i+1}": r.get("์ ๋ชฉ", ""),
|
| 470 |
+
f"์
๋ฌด๋ช
{i+1}": r.get("์ ๋ชฉ", ""), # ํ
ํ๋ฆฟ์ด '์
๋ฌด๋ช
X'์ ์ฌ์ฉํ ์ ์์ด ๋์ ๋งคํ
|
| 471 |
+
}
|
| 472 |
+
)
|
| 473 |
else:
|
| 474 |
+
for k in keys:
|
| 475 |
+
mapping_preview[f"{k}{i+1}"] = ""
|
| 476 |
+
st.dataframe(
|
| 477 |
+
pd.DataFrame([{"ํค": k, "๊ฐ ์๋ถ๋ถ": str(v)[:120]} for k, v in sorted(mapping_preview.items())]),
|
| 478 |
+
use_container_width=True,
|
| 479 |
+
height=320,
|
| 480 |
+
)
|
| 481 |
|
| 482 |
+
if st.button("๐ ํตํฉ HWPX ์์ฑ (ํ ํ์ผ๋ก ๋ค์ด๋ก๋)"):
|
| 483 |
pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
|
| 484 |
debug_all = []
|
| 485 |
+
|
| 486 |
+
merged_hwpx: bytes | None = None
|
| 487 |
+
|
|
|
|
| 488 |
for p in range(pages):
|
| 489 |
+
chunk = records[p * int(n_per_page) : (p + 1) * int(n_per_page)]
|
| 490 |
+
mapping: Dict[str, str] = {}
|
| 491 |
for i in range(int(n_per_page)):
|
| 492 |
if i < len(chunk):
|
| 493 |
r = chunk[i]
|
| 494 |
+
mapping[f"๋ฐ์ค๋ฒํธ{i+1}"] = r.get("๋ฐ์ค๋ฒํธ", "")
|
| 495 |
+
mapping[f"์ข
๋ฃ์ฐ๋{i+1}"] = r.get("์์ฐ์ฐ๋", "")
|
| 496 |
+
mapping[f"๋ณด์กด๊ธฐ๊ฐ{i+1}"] = r.get("๋ณด์กด๊ธฐ๊ฐ", "")
|
| 497 |
+
mapping[f"๋จ์์
๋ฌด{i+1}"] = r.get("๋จ์์
๋ฌด", "")
|
| 498 |
+
mapping[f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}"] = r.get("๊ธฐ๋ก๋ฌผ์ฒ ", "")
|
| 499 |
+
mapping[f"๋ชฉ๋ก{i+1}"] = r.get("๋ชฉ๋ก", "")
|
| 500 |
+
title_val = r.get("์ ๋ชฉ", "")
|
| 501 |
+
mapping[f"์ ๋ชฉ{i+1}"] = title_val
|
| 502 |
mapping[f"์
๋ฌด๋ช
{i+1}"] = title_val
|
| 503 |
else:
|
| 504 |
+
for k in keys:
|
| 505 |
+
mapping[f"{k}{i+1}"] = ""
|
| 506 |
|
| 507 |
if p == 0:
|
|
|
|
| 508 |
merged_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
|
| 509 |
else:
|
|
|
|
| 510 |
page_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
|
| 511 |
merged_hwpx = merge_hwpx_pages(merged_hwpx, page_hwpx)
|
|
|
|
|
|
|
| 512 |
|
| 513 |
+
debug_all.append({"page": p + 1, "stats": dbg})
|
| 514 |
+
|
| 515 |
+
# ํ์ผ๋ช
|
| 516 |
first_box = records[0].get("๋ฐ์ค๋ฒํธ", "0000") if records else "0000"
|
| 517 |
last_box = records[-1].get("๋ฐ์ค๋ฒํธ", "0000") if records else "0000"
|
| 518 |
+
filename = (
|
| 519 |
+
f"labels_{first_box}to{last_box}.hwpx" if first_box != last_box else f"labels_{first_box}.hwpx"
|
| 520 |
+
)
|
| 521 |
+
|
| 522 |
+
st.download_button(
|
| 523 |
+
"โฌ๏ธ ํตํฉ HWPX ๋ค์ด๋ก๋",
|
| 524 |
+
data=merged_hwpx,
|
| 525 |
+
file_name=filename,
|
| 526 |
+
mime="application/vnd.hancom.hwpx",
|
| 527 |
+
)
|
| 528 |
+
st.download_button(
|
| 529 |
+
"โฌ๏ธ ๋๋ฒ๊ทธ(JSON)",
|
| 530 |
+
data=json.dumps(debug_all, ensure_ascii=False, indent=2),
|
| 531 |
+
file_name="debug.json",
|
| 532 |
+
mime="application/json",
|
| 533 |
+
)
|