Update app.py
Browse files
app.py
CHANGED
|
@@ -236,7 +236,7 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, d
|
|
| 236 |
return out_buf.getvalue(), dbg
|
| 237 |
|
| 238 |
def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
|
| 239 |
-
"""
|
| 240 |
import time
|
| 241 |
|
| 242 |
base_zip = zipfile.ZipFile(io.BytesIO(base_hwpx), "r")
|
|
@@ -257,15 +257,15 @@ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
|
|
| 257 |
zi.flag_bits = 0
|
| 258 |
out_zip.writestr(zi, base_zip.read("mimetype"))
|
| 259 |
|
| 260 |
-
#
|
| 261 |
section_files = {}
|
| 262 |
|
| 263 |
-
# ๋ฒ ์ด์ค์์ ๋ชจ๋ ํ์ผ
|
| 264 |
for filename in base_zip.namelist():
|
| 265 |
if filename == "mimetype":
|
| 266 |
continue
|
| 267 |
if filename.startswith("Contents/section") and filename.endswith(".xml"):
|
| 268 |
-
section_files[filename] =
|
| 269 |
else:
|
| 270 |
zi = zipfile.ZipInfo(filename)
|
| 271 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
|
@@ -275,29 +275,22 @@ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
|
|
| 275 |
zi.flag_bits = 0
|
| 276 |
out_zip.writestr(zi, base_zip.read(filename))
|
| 277 |
|
| 278 |
-
# ์ถ๊ฐ ํ์ผ์์ ์น์
|
| 279 |
for filename in add_zip.namelist():
|
| 280 |
if filename.startswith("Contents/section") and filename.endswith(".xml"):
|
| 281 |
-
|
| 282 |
if filename in section_files:
|
| 283 |
-
section_files[filename]
|
| 284 |
-
else:
|
| 285 |
-
section_files[filename] = [content]
|
| 286 |
|
| 287 |
-
# ์น์
ํ์ผ๋ค
|
| 288 |
-
for filename,
|
| 289 |
-
if len(contents) == 1:
|
| 290 |
-
merged_content = contents[0]
|
| 291 |
-
else:
|
| 292 |
-
merged_content = merge_section_xml_list(contents)
|
| 293 |
-
|
| 294 |
zi = zipfile.ZipInfo(filename)
|
| 295 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 296 |
zi.external_attr = 0o100666 << 16
|
| 297 |
zi.create_system = 0
|
| 298 |
zi.date_time = now[:6]
|
| 299 |
zi.flag_bits = 0
|
| 300 |
-
out_zip.writestr(zi,
|
| 301 |
|
| 302 |
base_zip.close()
|
| 303 |
add_zip.close()
|
|
@@ -306,33 +299,67 @@ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
|
|
| 306 |
|
| 307 |
return out_buf.getvalue()
|
| 308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
def merge_section_xml_list(xml_list: list) -> str:
|
| 310 |
-
"""์ฌ๋ฌ ์น์
XML์ ํ๋๋ก ๋ณํฉ"""
|
| 311 |
if len(xml_list) <= 1:
|
| 312 |
return xml_list[0] if xml_list else ""
|
| 313 |
|
| 314 |
base_xml = xml_list[0]
|
| 315 |
-
|
| 316 |
for additional_xml in xml_list[1:]:
|
| 317 |
-
|
| 318 |
-
page_pattern = re.compile(
|
| 319 |
-
r'<(?P<prefix>[a-zA-Z0-9_]+):page\b[^>]*>.*?</(?P=prefix):page>',
|
| 320 |
-
re.DOTALL
|
| 321 |
-
)
|
| 322 |
-
|
| 323 |
-
add_pages = page_pattern.findall(additional_xml)
|
| 324 |
-
if not add_pages:
|
| 325 |
-
continue
|
| 326 |
-
|
| 327 |
-
# ํ์ด์ง ์ ์ฒด ๋งค์น
|
| 328 |
-
page_matches = page_pattern.finditer(additional_xml)
|
| 329 |
-
pages_to_add = [match.group(0) for match in page_matches]
|
| 330 |
-
|
| 331 |
-
if pages_to_add:
|
| 332 |
-
# ๋ฒ ์ด์ค XML์ </prefix:pages> ํ๊ทธ ์ฐพ์์ ๊ทธ ์์ ์ฝ์
|
| 333 |
-
pages_close_pattern = re.compile(r'(</[a-zA-Z0-9_]+:pages>)')
|
| 334 |
-
pages_str = ''.join(pages_to_add)
|
| 335 |
-
base_xml = pages_close_pattern.sub(pages_str + r'\1', base_xml)
|
| 336 |
|
| 337 |
return base_xml
|
| 338 |
|
|
@@ -377,6 +404,8 @@ def merge_section_content(base_xml: str, add_xml: str) -> str:
|
|
| 377 |
# -------------------- UI --------------------
|
| 378 |
with st.expander("์ฌ์ฉ๋ฒ", expanded=True):
|
| 379 |
st.markdown("""
|
|
|
|
|
|
|
| 380 |
- **๋ค์ค ์ค(๋ชฉ๋ก/์ ๋ชฉ)์ ๋ถ๋ชจ ๋ฌธ๋จ์ ์ฌ๋ฌ ๋ฌธ๋จ์ผ๋ก ๊ต์ฒด**ํ์ฌ ๊ฒน์นจ ์์ด ํ์ํฉ๋๋ค.
|
| 381 |
- **์
๋ฌด๋ช
์ ๋จ์ผ์ค๋ก ์ฒ๋ฆฌ**ํ์ฌ ์๋ณธ ํฐํธ ์คํ์ผ์ ์ ์งํฉ๋๋ค.
|
| 382 |
- **์์ฑ๋ HWPX ํ์ผ์ ์ฝ๊ธฐ์ ์ฉ ์์ฑ์ด ํด์ **๋์ด ํธ์ง ๊ฐ๋ฅํฉ๋๋ค.
|
|
|
|
| 236 |
return out_buf.getvalue(), dbg
|
| 237 |
|
| 238 |
def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
|
| 239 |
+
"""HWPX์ ์ ํ์ด์ง ์ถ๊ฐ (ํ
ํ๋ฆฟ ๋ณต์ ๋ฐฉ์)"""
|
| 240 |
import time
|
| 241 |
|
| 242 |
base_zip = zipfile.ZipFile(io.BytesIO(base_hwpx), "r")
|
|
|
|
| 257 |
zi.flag_bits = 0
|
| 258 |
out_zip.writestr(zi, base_zip.read("mimetype"))
|
| 259 |
|
| 260 |
+
# ์น์
ํ์ผ๋ค์ ๋ฐ๋ก ์ฒ๋ฆฌ
|
| 261 |
section_files = {}
|
| 262 |
|
| 263 |
+
# ๋ฒ ์ด์ค์์ ๋ชจ๋ ํ์ผ ์ฒ๋ฆฌ
|
| 264 |
for filename in base_zip.namelist():
|
| 265 |
if filename == "mimetype":
|
| 266 |
continue
|
| 267 |
if filename.startswith("Contents/section") and filename.endswith(".xml"):
|
| 268 |
+
section_files[filename] = base_zip.read(filename).decode("utf-8", errors="ignore")
|
| 269 |
else:
|
| 270 |
zi = zipfile.ZipInfo(filename)
|
| 271 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
|
|
|
| 275 |
zi.flag_bits = 0
|
| 276 |
out_zip.writestr(zi, base_zip.read(filename))
|
| 277 |
|
| 278 |
+
# ์ถ๊ฐ ํ์ผ์์ ํ์ด์ง ์ถ์ถํด์ ๊ธฐ์กด ์น์
์ ์ถ๊ฐ
|
| 279 |
for filename in add_zip.namelist():
|
| 280 |
if filename.startswith("Contents/section") and filename.endswith(".xml"):
|
| 281 |
+
add_content = add_zip.read(filename).decode("utf-8", errors="ignore")
|
| 282 |
if filename in section_files:
|
| 283 |
+
section_files[filename] = add_page_to_section(section_files[filename], add_content)
|
|
|
|
|
|
|
| 284 |
|
| 285 |
+
# ์์ ๋ ์น์
ํ์ผ๋ค ์ ์ฅ
|
| 286 |
+
for filename, content in section_files.items():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
zi = zipfile.ZipInfo(filename)
|
| 288 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 289 |
zi.external_attr = 0o100666 << 16
|
| 290 |
zi.create_system = 0
|
| 291 |
zi.date_time = now[:6]
|
| 292 |
zi.flag_bits = 0
|
| 293 |
+
out_zip.writestr(zi, content.encode("utf-8"))
|
| 294 |
|
| 295 |
base_zip.close()
|
| 296 |
add_zip.close()
|
|
|
|
| 299 |
|
| 300 |
return out_buf.getvalue()
|
| 301 |
|
| 302 |
+
def add_page_to_section(base_xml: str, add_xml: str) -> str:
|
| 303 |
+
"""์น์
์ ์ ํ์ด์ง ์ถ๊ฐ"""
|
| 304 |
+
# ์ถ๊ฐํ XML์์ ์ฒซ ๋ฒ์งธ ํ์ด์ง ์ถ์ถ
|
| 305 |
+
page_pattern = re.compile(
|
| 306 |
+
r'<(?P<prefix>[a-zA-Z0-9_]+):page\b[^>]*>.*?</(?P=prefix):page>',
|
| 307 |
+
re.DOTALL
|
| 308 |
+
)
|
| 309 |
+
|
| 310 |
+
page_match = page_pattern.search(add_xml)
|
| 311 |
+
if not page_match:
|
| 312 |
+
return base_xml
|
| 313 |
+
|
| 314 |
+
new_page = page_match.group(0)
|
| 315 |
+
|
| 316 |
+
# ๋ฒ ์ด์ค XML์์ </prefix:pages> ํ๊ทธ ์ฐพ์์ ๊ทธ ์์ ์ ํ์ด์ง ์ฝ์
|
| 317 |
+
pages_close_pattern = re.compile(r'(</[a-zA-Z0-9_]+:pages>)')
|
| 318 |
+
|
| 319 |
+
# ํ์ด์ง ID ์
๋ฐ์ดํธ (์ค๋ณต ๋ฐฉ์ง)
|
| 320 |
+
new_page = update_page_id(base_xml, new_page)
|
| 321 |
+
|
| 322 |
+
# ํ์ด์ง ์ฝ์
|
| 323 |
+
result = pages_close_pattern.sub(new_page + r'\1', base_xml)
|
| 324 |
+
|
| 325 |
+
return result
|
| 326 |
+
|
| 327 |
+
def update_page_id(base_xml: str, new_page: str) -> str:
|
| 328 |
+
"""ํ์ด์ง ID๋ฅผ ์ค๋ณต๋์ง ์๊ฒ ์
๋ฐ์ดํธ"""
|
| 329 |
+
# ๊ธฐ์กด ํ์ด์ง๋ค์ ID ์ถ์ถ
|
| 330 |
+
existing_ids = set()
|
| 331 |
+
id_pattern = re.compile(r'<[a-zA-Z0-9_]+:page\b[^>]*\bid="([^"]*)"')
|
| 332 |
+
|
| 333 |
+
for match in id_pattern.finditer(base_xml):
|
| 334 |
+
existing_ids.add(match.group(1))
|
| 335 |
+
|
| 336 |
+
# ์ ํ์ด์ง์ ID ์ฐพ๊ธฐ
|
| 337 |
+
new_page_id_match = id_pattern.search(new_page)
|
| 338 |
+
if not new_page_id_match:
|
| 339 |
+
return new_page
|
| 340 |
+
|
| 341 |
+
original_id = new_page_id_match.group(1)
|
| 342 |
+
|
| 343 |
+
# ์ค๋ณต๋์ง ์๋ ์ ID ์์ฑ
|
| 344 |
+
counter = 2
|
| 345 |
+
new_id = f"{original_id}_{counter}"
|
| 346 |
+
while new_id in existing_ids:
|
| 347 |
+
counter += 1
|
| 348 |
+
new_id = f"{original_id}_{counter}"
|
| 349 |
+
|
| 350 |
+
# ID ๊ต์ฒด
|
| 351 |
+
updated_page = new_page.replace(f'id="{original_id}"', f'id="{new_id}"')
|
| 352 |
+
|
| 353 |
+
return updated_page
|
| 354 |
+
|
| 355 |
def merge_section_xml_list(xml_list: list) -> str:
|
| 356 |
+
"""์ฌ๋ฌ ์น์
XML์ ํ๋๋ก ๋ณํฉ (์ฌ์ฉํ์ง ์์ง๋ง ํธํ์ฑ ์ ์ง)"""
|
| 357 |
if len(xml_list) <= 1:
|
| 358 |
return xml_list[0] if xml_list else ""
|
| 359 |
|
| 360 |
base_xml = xml_list[0]
|
|
|
|
| 361 |
for additional_xml in xml_list[1:]:
|
| 362 |
+
base_xml = add_page_to_section(base_xml, additional_xml)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
|
| 364 |
return base_xml
|
| 365 |
|
|
|
|
| 404 |
# -------------------- UI --------------------
|
| 405 |
with st.expander("์ฌ์ฉ๋ฒ", expanded=True):
|
| 406 |
st.markdown("""
|
| 407 |
+
- **ํ
ํ๋ฆฟ์ 1ํ์ด์ง์ N๊ฐ ๋ผ๋ฒจ**์ด ์๋ ํ ํํ๋ฅผ ์ฌ์ฉํฉ๋๋ค.
|
| 408 |
+
- **๋ฐ์ค ์๊ฐ N๊ฐ๋ฅผ ์ด๊ณผํ๋ฉด ์ ํ์ด์ง๊ฐ ์๋ ์ถ๊ฐ**๋ฉ๋๋ค.
|
| 409 |
- **๋ค์ค ์ค(๋ชฉ๋ก/์ ๋ชฉ)์ ๋ถ๋ชจ ๋ฌธ๋จ์ ์ฌ๋ฌ ๋ฌธ๋จ์ผ๋ก ๊ต์ฒด**ํ์ฌ ๊ฒน์นจ ์์ด ํ์ํฉ๋๋ค.
|
| 410 |
- **์
๋ฌด๋ช
์ ๋จ์ผ์ค๋ก ์ฒ๋ฆฌ**ํ์ฌ ์๋ณธ ํฐํธ ์คํ์ผ์ ์ ์งํฉ๋๋ค.
|
| 411 |
- **์์ฑ๋ HWPX ํ์ผ์ ์ฝ๊ธฐ์ ์ฉ ์์ฑ์ด ํด์ **๋์ด ํธ์ง ๊ฐ๋ฅํฉ๋๋ค.
|