dohyune commited on
Commit
3902f45
ยท
verified ยท
1 Parent(s): ad0a525

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -65
app.py CHANGED
@@ -236,7 +236,7 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, d
236
  return out_buf.getvalue(), dbg
237
 
238
  def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
239
- """HWPX์— ์ƒˆ ํŽ˜์ด์ง€ ์ถ”๊ฐ€ (ํ…œํ”Œ๋ฆฟ ๋ณต์ œ ๋ฐฉ์‹)"""
240
  import time
241
 
242
  base_zip = zipfile.ZipFile(io.BytesIO(base_hwpx), "r")
@@ -257,33 +257,43 @@ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
257
  zi.flag_bits = 0
258
  out_zip.writestr(zi, base_zip.read("mimetype"))
259
 
260
- # ์„น์…˜ ํŒŒ์ผ๋“ค์„ ๋”ฐ๋กœ ์ฒ˜๋ฆฌ
261
- section_files = {}
 
262
 
263
- # ๋ฒ ์ด์Šค์—์„œ ๋ชจ๋“  ํŒŒ์ผ ์ฒ˜๋ฆฌ
264
  for filename in base_zip.namelist():
265
  if filename == "mimetype":
266
  continue
 
 
267
  if filename.startswith("Contents/section") and filename.endswith(".xml"):
268
- section_files[filename] = base_zip.read(filename).decode("utf-8", errors="ignore")
269
  else:
270
- zi = zipfile.ZipInfo(filename)
271
- zi.compress_type = zipfile.ZIP_DEFLATED
272
- zi.external_attr = 0o100666 << 16
273
- zi.create_system = 0
274
- zi.date_time = now[:6]
275
- zi.flag_bits = 0
276
- out_zip.writestr(zi, base_zip.read(filename))
277
 
278
- # ์ถ”๊ฐ€ ํŒŒ์ผ์—์„œ ํŽ˜์ด์ง€ ์ถ”์ถœํ•ด์„œ ๊ธฐ์กด ์„น์…˜์— ์ถ”๊ฐ€
279
  for filename in add_zip.namelist():
280
  if filename.startswith("Contents/section") and filename.endswith(".xml"):
281
- add_content = add_zip.read(filename).decode("utf-8", errors="ignore")
282
- if filename in section_files:
283
- section_files[filename] = add_page_to_section(section_files[filename], add_content)
 
284
 
285
- # ์ˆ˜์ •๋œ ์„น์…˜ ํŒŒ์ผ๋“ค ์ €์žฅ
286
- for filename, content in section_files.items():
 
 
 
 
 
 
 
 
 
 
287
  zi = zipfile.ZipInfo(filename)
288
  zi.compress_type = zipfile.ZIP_DEFLATED
289
  zi.external_attr = 0o100666 << 16
@@ -292,6 +302,29 @@ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
292
  zi.flag_bits = 0
293
  out_zip.writestr(zi, content.encode("utf-8"))
294
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  base_zip.close()
296
  add_zip.close()
297
  out_zip.close()
@@ -299,58 +332,33 @@ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
299
 
300
  return out_buf.getvalue()
301
 
302
- def add_page_to_section(base_xml: str, add_xml: str) -> str:
303
- """์„น์…˜์— ์ƒˆ ํŽ˜์ด์ง€ ์ถ”๊ฐ€"""
304
- # ์ถ”๊ฐ€ํ•  XML์—์„œ ์ฒซ ๋ฒˆ์งธ ํŽ˜์ด์ง€ ์ถ”์ถœ
305
- page_pattern = re.compile(
306
- r'<(?P<prefix>[a-zA-Z0-9_]+):page\b[^>]*>.*?</(?P=prefix):page>',
307
- re.DOTALL
308
- )
309
-
310
- page_match = page_pattern.search(add_xml)
311
- if not page_match:
312
- return base_xml
313
-
314
- new_page = page_match.group(0)
315
 
316
- # ๋ฒ ์ด์Šค XML์—์„œ </prefix:pages> ํƒœ๊ทธ ์ฐพ์•„์„œ ๊ทธ ์•ž์— ์ƒˆ ํŽ˜์ด์ง€ ์‚ฝ์ž…
317
- pages_close_pattern = re.compile(r'(</[a-zA-Z0-9_]+:pages>)')
 
 
 
 
318
 
319
- # ํŽ˜์ด์ง€ ID ์—…๋ฐ์ดํŠธ (์ค‘๋ณต ๋ฐฉ์ง€)
320
- new_page = update_page_id(base_xml, new_page)
 
 
 
321
 
322
- # ํŽ˜์ด์ง€ ์‚ฝ์ž…
323
- result = pages_close_pattern.sub(new_page + r'\1', base_xml)
324
-
325
- return result
326
 
327
  def update_page_id(base_xml: str, new_page: str) -> str:
328
- """ํŽ˜์ด์ง€ ID๋ฅผ ์ค‘๋ณต๋˜์ง€ ์•Š๊ฒŒ ์—…๋ฐ์ดํŠธ"""
329
- # ๊ธฐ์กด ํŽ˜์ด์ง€๋“ค์˜ ID ์ถ”์ถœ
330
- existing_ids = set()
331
- id_pattern = re.compile(r'<[a-zA-Z0-9_]+:page\b[^>]*\bid="([^"]*)"')
332
-
333
- for match in id_pattern.finditer(base_xml):
334
- existing_ids.add(match.group(1))
335
-
336
- # ์ƒˆ ํŽ˜์ด์ง€์˜ ID ์ฐพ๊ธฐ
337
- new_page_id_match = id_pattern.search(new_page)
338
- if not new_page_id_match:
339
- return new_page
340
-
341
- original_id = new_page_id_match.group(1)
342
-
343
- # ์ค‘๋ณต๋˜์ง€ ์•Š๋Š” ์ƒˆ ID ์ƒ์„ฑ
344
- counter = 2
345
- new_id = f"{original_id}_{counter}"
346
- while new_id in existing_ids:
347
- counter += 1
348
- new_id = f"{original_id}_{counter}"
349
-
350
- # ID ๊ต์ฒด
351
- updated_page = new_page.replace(f'id="{original_id}"', f'id="{new_id}"')
352
-
353
- return updated_page
354
 
355
  def merge_section_xml_list(xml_list: list) -> str:
356
  """์—ฌ๋Ÿฌ ์„น์…˜ XML์„ ํ•˜๋‚˜๋กœ ๋ณ‘ํ•ฉ (์‚ฌ์šฉํ•˜์ง€ ์•Š์ง€๋งŒ ํ˜ธํ™˜์„ฑ ์œ ์ง€)"""
 
236
  return out_buf.getvalue(), dbg
237
 
238
  def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
239
+ """HWPX ํŒŒ์ผ๋“ค์„ ์„น์…˜ ๋‹จ์œ„๋กœ ๋ณ‘ํ•ฉ (COM InsertFile๊ณผ ์œ ์‚ฌํ•œ ๋ฐฉ์‹)"""
240
  import time
241
 
242
  base_zip = zipfile.ZipFile(io.BytesIO(base_hwpx), "r")
 
257
  zi.flag_bits = 0
258
  out_zip.writestr(zi, base_zip.read("mimetype"))
259
 
260
+ # ๋ฒ ์ด์Šค ํŒŒ์ผ๋“ค ์ฒ˜๋ฆฌ
261
+ base_sections = {}
262
+ base_files = {}
263
 
 
264
  for filename in base_zip.namelist():
265
  if filename == "mimetype":
266
  continue
267
+ data = base_zip.read(filename)
268
+
269
  if filename.startswith("Contents/section") and filename.endswith(".xml"):
270
+ base_sections[filename] = data.decode("utf-8", errors="ignore")
271
  else:
272
+ base_files[filename] = data
273
+
274
+ # ์ถ”๊ฐ€ ํŒŒ์ผ์˜ ์„น์…˜๋“ค ์ˆ˜์ง‘
275
+ add_sections = {}
276
+ next_section_num = len(base_sections) + 1
 
 
277
 
 
278
  for filename in add_zip.namelist():
279
  if filename.startswith("Contents/section") and filename.endswith(".xml"):
280
+ # ์ƒˆ๋กœ์šด ์„น์…˜ ๋ฒˆํ˜ธ๋กœ ๋ณ€๊ฒฝ
281
+ new_filename = f"Contents/section{next_section_num}.xml"
282
+ add_sections[new_filename] = add_zip.read(filename).decode("utf-8", errors="ignore")
283
+ next_section_num += 1
284
 
285
+ # ๋ชจ๋“  ๋ฒ ์ด์Šค ํŒŒ์ผ๋“ค ๋ณต์‚ฌ
286
+ for filename, data in base_files.items():
287
+ zi = zipfile.ZipInfo(filename)
288
+ zi.compress_type = zipfile.ZIP_DEFLATED
289
+ zi.external_attr = 0o100666 << 16
290
+ zi.create_system = 0
291
+ zi.date_time = now[:6]
292
+ zi.flag_bits = 0
293
+ out_zip.writestr(zi, data)
294
+
295
+ # ๋ฒ ์ด์Šค ์„น์…˜๋“ค ๋ณต์‚ฌ
296
+ for filename, content in base_sections.items():
297
  zi = zipfile.ZipInfo(filename)
298
  zi.compress_type = zipfile.ZIP_DEFLATED
299
  zi.external_attr = 0o100666 << 16
 
302
  zi.flag_bits = 0
303
  out_zip.writestr(zi, content.encode("utf-8"))
304
 
305
+ # ์ƒˆ๋กœ์šด ์„น์…˜๋“ค ์ถ”๊ฐ€
306
+ for filename, content in add_sections.items():
307
+ zi = zipfile.ZipInfo(filename)
308
+ zi.compress_type = zipfile.ZIP_DEFLATED
309
+ zi.external_attr = 0o100666 << 16
310
+ zi.create_system = 0
311
+ zi.date_time = now[:6]
312
+ zi.flag_bits = 0
313
+ out_zip.writestr(zi, content.encode("utf-8"))
314
+
315
+ # BodyText ์—…๋ฐ์ดํŠธ (์ƒˆ ์„น์…˜ ์ฐธ์กฐ ์ถ”๊ฐ€)
316
+ if "Contents/bodytext.xml" in base_files:
317
+ bodytext = base_files["Contents/bodytext.xml"].decode("utf-8", errors="ignore")
318
+ updated_bodytext = add_sections_to_bodytext(bodytext, list(add_sections.keys()))
319
+
320
+ zi = zipfile.ZipInfo("Contents/bodytext.xml")
321
+ zi.compress_type = zipfile.ZIP_DEFLATED
322
+ zi.external_attr = 0o100666 << 16
323
+ zi.create_system = 0
324
+ zi.date_time = now[:6]
325
+ zi.flag_bits = 0
326
+ out_zip.writestr(zi, updated_bodytext.encode("utf-8"))
327
+
328
  base_zip.close()
329
  add_zip.close()
330
  out_zip.close()
 
332
 
333
  return out_buf.getvalue()
334
 
335
+ def add_sections_to_bodytext(bodytext: str, new_section_files: list) -> str:
336
+ """BodyText์— ์ƒˆ ์„น์…˜ ์ฐธ์กฐ ์ถ”๊ฐ€"""
337
+ # ๋งˆ์ง€๋ง‰ ์„น์…˜ ๋’ค์— ์ƒˆ ์„น์…˜๋“ค ์ถ”๊ฐ€
338
+ # </hml:body> ํƒœ๊ทธ ์•ž์— ์ƒˆ ์„น์…˜ ์ฐธ์กฐ ์‚ฝ์ž…
 
 
 
 
 
 
 
 
 
339
 
340
+ section_refs = []
341
+ for section_file in new_section_files:
342
+ # section1.xml -> 1 ์ถ”์ถœ
343
+ section_num = section_file.split("section")[1].split(".xml")[0]
344
+ section_ref = f'<hml:secDef><hml:secPtr hml:hRef="../Contents/section{section_num}.xml#0"/></hml:secDef>'
345
+ section_refs.append(section_ref)
346
 
347
+ if section_refs:
348
+ # </hml:body> ์•ž์— ์‚ฝ์ž…
349
+ body_close_pattern = re.compile(r'(</hml:body>)')
350
+ new_sections_xml = ''.join(section_refs)
351
+ bodytext = body_close_pattern.sub(new_sections_xml + r'\1', bodytext)
352
 
353
+ return bodytext
 
 
 
354
 
355
  def update_page_id(base_xml: str, new_page: str) -> str:
356
+ """ํŽ˜์ด์ง€ ID๋ฅผ ์ค‘๋ณต๋˜์ง€ ์•Š๊ฒŒ ์—…๋ฐ์ดํŠธ (๋” ์ด์ƒ ์‚ฌ์šฉํ•˜์ง€ ์•Š์Œ)"""
357
+ return new_page
358
+
359
+ def add_page_to_section(base_xml: str, add_xml: str) -> str:
360
+ """์„น์…˜์— ์ƒˆ ํŽ˜์ด์ง€ ์ถ”๊ฐ€ (๋” ์ด์ƒ ์‚ฌ์šฉํ•˜์ง€ ์•Š์Œ)"""
361
+ return base_xml
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
 
363
  def merge_section_xml_list(xml_list: list) -> str:
364
  """์—ฌ๋Ÿฌ ์„น์…˜ XML์„ ํ•˜๋‚˜๋กœ ๋ณ‘ํ•ฉ (์‚ฌ์šฉํ•˜์ง€ ์•Š์ง€๋งŒ ํ˜ธํ™˜์„ฑ ์œ ์ง€)"""