Update app.py
Browse files
app.py
CHANGED
|
@@ -236,7 +236,7 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, d
|
|
| 236 |
return out_buf.getvalue(), dbg
|
| 237 |
|
| 238 |
def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
|
| 239 |
-
"""HWPX
|
| 240 |
import time
|
| 241 |
|
| 242 |
base_zip = zipfile.ZipFile(io.BytesIO(base_hwpx), "r")
|
|
@@ -257,33 +257,43 @@ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
|
|
| 257 |
zi.flag_bits = 0
|
| 258 |
out_zip.writestr(zi, base_zip.read("mimetype"))
|
| 259 |
|
| 260 |
-
#
|
| 261 |
-
|
|
|
|
| 262 |
|
| 263 |
-
# ๋ฒ ์ด์ค์์ ๋ชจ๋ ํ์ผ ์ฒ๋ฆฌ
|
| 264 |
for filename in base_zip.namelist():
|
| 265 |
if filename == "mimetype":
|
| 266 |
continue
|
|
|
|
|
|
|
| 267 |
if filename.startswith("Contents/section") and filename.endswith(".xml"):
|
| 268 |
-
|
| 269 |
else:
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
zi.flag_bits = 0
|
| 276 |
-
out_zip.writestr(zi, base_zip.read(filename))
|
| 277 |
|
| 278 |
-
# ์ถ๊ฐ ํ์ผ์์ ํ์ด์ง ์ถ์ถํด์ ๊ธฐ์กด ์น์
์ ์ถ๊ฐ
|
| 279 |
for filename in add_zip.namelist():
|
| 280 |
if filename.startswith("Contents/section") and filename.endswith(".xml"):
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
|
|
|
| 284 |
|
| 285 |
-
#
|
| 286 |
-
for filename,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
zi = zipfile.ZipInfo(filename)
|
| 288 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 289 |
zi.external_attr = 0o100666 << 16
|
|
@@ -292,6 +302,29 @@ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
|
|
| 292 |
zi.flag_bits = 0
|
| 293 |
out_zip.writestr(zi, content.encode("utf-8"))
|
| 294 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
base_zip.close()
|
| 296 |
add_zip.close()
|
| 297 |
out_zip.close()
|
|
@@ -299,58 +332,33 @@ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
|
|
| 299 |
|
| 300 |
return out_buf.getvalue()
|
| 301 |
|
| 302 |
-
def
|
| 303 |
-
"""
|
| 304 |
-
#
|
| 305 |
-
|
| 306 |
-
r'<(?P<prefix>[a-zA-Z0-9_]+):page\b[^>]*>.*?</(?P=prefix):page>',
|
| 307 |
-
re.DOTALL
|
| 308 |
-
)
|
| 309 |
-
|
| 310 |
-
page_match = page_pattern.search(add_xml)
|
| 311 |
-
if not page_match:
|
| 312 |
-
return base_xml
|
| 313 |
-
|
| 314 |
-
new_page = page_match.group(0)
|
| 315 |
|
| 316 |
-
|
| 317 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
|
| 319 |
-
|
| 320 |
-
|
|
|
|
|
|
|
|
|
|
| 321 |
|
| 322 |
-
|
| 323 |
-
result = pages_close_pattern.sub(new_page + r'\1', base_xml)
|
| 324 |
-
|
| 325 |
-
return result
|
| 326 |
|
| 327 |
def update_page_id(base_xml: str, new_page: str) -> str:
|
| 328 |
-
"""ํ์ด์ง ID๋ฅผ ์ค๋ณต๋์ง ์๊ฒ ์
๋ฐ์ดํธ"""
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
existing_ids.add(match.group(1))
|
| 335 |
-
|
| 336 |
-
# ์ ํ์ด์ง์ ID ์ฐพ๊ธฐ
|
| 337 |
-
new_page_id_match = id_pattern.search(new_page)
|
| 338 |
-
if not new_page_id_match:
|
| 339 |
-
return new_page
|
| 340 |
-
|
| 341 |
-
original_id = new_page_id_match.group(1)
|
| 342 |
-
|
| 343 |
-
# ์ค๋ณต๋์ง ์๋ ์ ID ์์ฑ
|
| 344 |
-
counter = 2
|
| 345 |
-
new_id = f"{original_id}_{counter}"
|
| 346 |
-
while new_id in existing_ids:
|
| 347 |
-
counter += 1
|
| 348 |
-
new_id = f"{original_id}_{counter}"
|
| 349 |
-
|
| 350 |
-
# ID ๊ต์ฒด
|
| 351 |
-
updated_page = new_page.replace(f'id="{original_id}"', f'id="{new_id}"')
|
| 352 |
-
|
| 353 |
-
return updated_page
|
| 354 |
|
| 355 |
def merge_section_xml_list(xml_list: list) -> str:
|
| 356 |
"""์ฌ๋ฌ ์น์
XML์ ํ๋๋ก ๋ณํฉ (์ฌ์ฉํ์ง ์์ง๋ง ํธํ์ฑ ์ ์ง)"""
|
|
|
|
| 236 |
return out_buf.getvalue(), dbg
|
| 237 |
|
| 238 |
def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
|
| 239 |
+
"""HWPX ํ์ผ๋ค์ ์น์
๋จ์๋ก ๋ณํฉ (COM InsertFile๊ณผ ์ ์ฌํ ๋ฐฉ์)"""
|
| 240 |
import time
|
| 241 |
|
| 242 |
base_zip = zipfile.ZipFile(io.BytesIO(base_hwpx), "r")
|
|
|
|
| 257 |
zi.flag_bits = 0
|
| 258 |
out_zip.writestr(zi, base_zip.read("mimetype"))
|
| 259 |
|
| 260 |
+
# ๋ฒ ์ด์ค ํ์ผ๋ค ์ฒ๋ฆฌ
|
| 261 |
+
base_sections = {}
|
| 262 |
+
base_files = {}
|
| 263 |
|
|
|
|
| 264 |
for filename in base_zip.namelist():
|
| 265 |
if filename == "mimetype":
|
| 266 |
continue
|
| 267 |
+
data = base_zip.read(filename)
|
| 268 |
+
|
| 269 |
if filename.startswith("Contents/section") and filename.endswith(".xml"):
|
| 270 |
+
base_sections[filename] = data.decode("utf-8", errors="ignore")
|
| 271 |
else:
|
| 272 |
+
base_files[filename] = data
|
| 273 |
+
|
| 274 |
+
# ์ถ๊ฐ ํ์ผ์ ์น์
๋ค ์์ง
|
| 275 |
+
add_sections = {}
|
| 276 |
+
next_section_num = len(base_sections) + 1
|
|
|
|
|
|
|
| 277 |
|
|
|
|
| 278 |
for filename in add_zip.namelist():
|
| 279 |
if filename.startswith("Contents/section") and filename.endswith(".xml"):
|
| 280 |
+
# ์๋ก์ด ์น์
๋ฒํธ๋ก ๋ณ๊ฒฝ
|
| 281 |
+
new_filename = f"Contents/section{next_section_num}.xml"
|
| 282 |
+
add_sections[new_filename] = add_zip.read(filename).decode("utf-8", errors="ignore")
|
| 283 |
+
next_section_num += 1
|
| 284 |
|
| 285 |
+
# ๋ชจ๋ ๋ฒ ์ด์ค ํ์ผ๋ค ๋ณต์ฌ
|
| 286 |
+
for filename, data in base_files.items():
|
| 287 |
+
zi = zipfile.ZipInfo(filename)
|
| 288 |
+
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 289 |
+
zi.external_attr = 0o100666 << 16
|
| 290 |
+
zi.create_system = 0
|
| 291 |
+
zi.date_time = now[:6]
|
| 292 |
+
zi.flag_bits = 0
|
| 293 |
+
out_zip.writestr(zi, data)
|
| 294 |
+
|
| 295 |
+
# ๋ฒ ์ด์ค ์น์
๋ค ๋ณต์ฌ
|
| 296 |
+
for filename, content in base_sections.items():
|
| 297 |
zi = zipfile.ZipInfo(filename)
|
| 298 |
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 299 |
zi.external_attr = 0o100666 << 16
|
|
|
|
| 302 |
zi.flag_bits = 0
|
| 303 |
out_zip.writestr(zi, content.encode("utf-8"))
|
| 304 |
|
| 305 |
+
# ์๋ก์ด ์น์
๋ค ์ถ๊ฐ
|
| 306 |
+
for filename, content in add_sections.items():
|
| 307 |
+
zi = zipfile.ZipInfo(filename)
|
| 308 |
+
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 309 |
+
zi.external_attr = 0o100666 << 16
|
| 310 |
+
zi.create_system = 0
|
| 311 |
+
zi.date_time = now[:6]
|
| 312 |
+
zi.flag_bits = 0
|
| 313 |
+
out_zip.writestr(zi, content.encode("utf-8"))
|
| 314 |
+
|
| 315 |
+
# BodyText ์
๋ฐ์ดํธ (์ ์น์
์ฐธ์กฐ ์ถ๊ฐ)
|
| 316 |
+
if "Contents/bodytext.xml" in base_files:
|
| 317 |
+
bodytext = base_files["Contents/bodytext.xml"].decode("utf-8", errors="ignore")
|
| 318 |
+
updated_bodytext = add_sections_to_bodytext(bodytext, list(add_sections.keys()))
|
| 319 |
+
|
| 320 |
+
zi = zipfile.ZipInfo("Contents/bodytext.xml")
|
| 321 |
+
zi.compress_type = zipfile.ZIP_DEFLATED
|
| 322 |
+
zi.external_attr = 0o100666 << 16
|
| 323 |
+
zi.create_system = 0
|
| 324 |
+
zi.date_time = now[:6]
|
| 325 |
+
zi.flag_bits = 0
|
| 326 |
+
out_zip.writestr(zi, updated_bodytext.encode("utf-8"))
|
| 327 |
+
|
| 328 |
base_zip.close()
|
| 329 |
add_zip.close()
|
| 330 |
out_zip.close()
|
|
|
|
| 332 |
|
| 333 |
return out_buf.getvalue()
|
| 334 |
|
| 335 |
+
def add_sections_to_bodytext(bodytext: str, new_section_files: list) -> str:
|
| 336 |
+
"""BodyText์ ์ ์น์
์ฐธ์กฐ ์ถ๊ฐ"""
|
| 337 |
+
# ๋ง์ง๋ง ์น์
๋ค์ ์ ์น์
๋ค ์ถ๊ฐ
|
| 338 |
+
# </hml:body> ํ๊ทธ ์์ ์ ์น์
์ฐธ์กฐ ์ฝ์
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
|
| 340 |
+
section_refs = []
|
| 341 |
+
for section_file in new_section_files:
|
| 342 |
+
# section1.xml -> 1 ์ถ์ถ
|
| 343 |
+
section_num = section_file.split("section")[1].split(".xml")[0]
|
| 344 |
+
section_ref = f'<hml:secDef><hml:secPtr hml:hRef="../Contents/section{section_num}.xml#0"/></hml:secDef>'
|
| 345 |
+
section_refs.append(section_ref)
|
| 346 |
|
| 347 |
+
if section_refs:
|
| 348 |
+
# </hml:body> ์์ ์ฝ์
|
| 349 |
+
body_close_pattern = re.compile(r'(</hml:body>)')
|
| 350 |
+
new_sections_xml = ''.join(section_refs)
|
| 351 |
+
bodytext = body_close_pattern.sub(new_sections_xml + r'\1', bodytext)
|
| 352 |
|
| 353 |
+
return bodytext
|
|
|
|
|
|
|
|
|
|
| 354 |
|
| 355 |
def update_page_id(base_xml: str, new_page: str) -> str:
|
| 356 |
+
"""ํ์ด์ง ID๋ฅผ ์ค๋ณต๋์ง ์๊ฒ ์
๋ฐ์ดํธ (๋ ์ด์ ์ฌ์ฉํ์ง ์์)"""
|
| 357 |
+
return new_page
|
| 358 |
+
|
| 359 |
+
def add_page_to_section(base_xml: str, add_xml: str) -> str:
|
| 360 |
+
"""์น์
์ ์ ํ์ด์ง ์ถ๊ฐ (๋ ์ด์ ์ฌ์ฉํ์ง ์์)"""
|
| 361 |
+
return base_xml
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
|
| 363 |
def merge_section_xml_list(xml_list: list) -> str:
|
| 364 |
"""์ฌ๋ฌ ์น์
XML์ ํ๋๋ก ๋ณํฉ (์ฌ์ฉํ์ง ์์ง๋ง ํธํ์ฑ ์ ์ง)"""
|