dohyune commited on
Commit
15cd71d
ยท
verified ยท
1 Parent(s): 0e4c56c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -24
app.py CHANGED
@@ -257,17 +257,16 @@ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
257
  zi.flag_bits = 0
258
  out_zip.writestr(zi, base_zip.read("mimetype"))
259
 
260
- # ๋ฒ ์ด์Šค HWPX์—์„œ ์„น์…˜ ํŒŒ์ผ๋“ค ์ฝ๊ธฐ
261
- base_sections = {}
262
- add_sections = {}
263
 
 
264
  for filename in base_zip.namelist():
265
  if filename == "mimetype":
266
  continue
267
  if filename.startswith("Contents/section") and filename.endswith(".xml"):
268
- base_sections[filename] = base_zip.read(filename).decode("utf-8", errors="ignore")
269
  else:
270
- # ๋‹ค๋ฅธ ํŒŒ์ผ๋“ค์€ ๊ทธ๋Œ€๋กœ ๋ณต์‚ฌ
271
  zi = zipfile.ZipInfo(filename)
272
  zi.compress_type = zipfile.ZIP_DEFLATED
273
  zi.external_attr = 0o100666 << 16
@@ -276,23 +275,29 @@ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
276
  zi.flag_bits = 0
277
  out_zip.writestr(zi, base_zip.read(filename))
278
 
279
- # ์ถ”๊ฐ€ํ•  HWPX์—์„œ ์„น์…˜ ํŒŒ์ผ๋“ค ์ฝ๊ธฐ
280
  for filename in add_zip.namelist():
281
  if filename.startswith("Contents/section") and filename.endswith(".xml"):
282
- add_sections[filename] = add_zip.read(filename).decode("utf-8", errors="ignore")
283
-
284
- # ์„น์…˜ ๋ณ‘ํ•ฉ ์ฒ˜๋ฆฌ
285
- merged_sections = merge_sections(base_sections, add_sections)
 
286
 
287
- # ๋ณ‘ํ•ฉ๋œ ์„น์…˜๋“ค์„ ZIP์— ์“ฐ๊ธฐ
288
- for filename, content in merged_sections.items():
 
 
 
 
 
289
  zi = zipfile.ZipInfo(filename)
290
  zi.compress_type = zipfile.ZIP_DEFLATED
291
  zi.external_attr = 0o100666 << 16
292
  zi.create_system = 0
293
  zi.date_time = now[:6]
294
  zi.flag_bits = 0
295
- out_zip.writestr(zi, content.encode("utf-8"))
296
 
297
  base_zip.close()
298
  add_zip.close()
@@ -301,6 +306,36 @@ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
301
 
302
  return out_buf.getvalue()
303
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  def merge_sections(base_sections: dict, add_sections: dict) -> dict:
305
  """์„น์…˜ XML๋“ค์„ ๋ณ‘ํ•ฉ"""
306
  merged = base_sections.copy()
@@ -317,22 +352,25 @@ def merge_sections(base_sections: dict, add_sections: dict) -> dict:
317
 
318
  def merge_section_content(base_xml: str, add_xml: str) -> str:
319
  """๋‹จ์ผ ์„น์…˜ XML ๋‚ด์šฉ์„ ๋ณ‘ํ•ฉ"""
320
- # ์ถ”๊ฐ€ํ•  XML์—์„œ ํŽ˜์ด์ง€๋“ค ์ถ”์ถœ
321
- page_pattern = re.compile(r'(<(?P<prefix>[a-zA-Z0-9_]+):page[^>]*>.*?</(?P=prefix):page>)', re.DOTALL)
322
- add_pages = page_pattern.findall(add_xml)
 
 
323
 
324
- if not add_pages:
 
 
325
  return base_xml
326
 
327
- # ๋ฒ ์ด์Šค XML์˜ ๋งˆ์ง€๋ง‰ ํŽ˜์ด์ง€ ๋’ค์— ์ƒˆ ํŽ˜์ด์ง€๋“ค ์‚ฝ์ž…
328
- # </hp:pages> ํƒœ๊ทธ ์•ž์— ์‚ฝ์ž…
329
- pages_end_pattern = re.compile(r'(</[a-zA-Z0-9_]+:pages>)')
330
 
331
- # ์ถ”๊ฐ€ํ•  ํŽ˜์ด์ง€๋“ค์„ ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜
332
- pages_to_add = ''.join([page[0] for page in add_pages])
 
333
 
334
- # ์‚ฝ์ž…
335
- merged_xml = pages_end_pattern.sub(pages_to_add + r'\1', base_xml)
336
 
337
  return merged_xml
338
 
 
257
  zi.flag_bits = 0
258
  out_zip.writestr(zi, base_zip.read("mimetype"))
259
 
260
+ # ๋ชจ๋“  ํŒŒ์ผ์„ ์ฒ˜๋ฆฌ - ์„น์…˜ ํŒŒ์ผ๋“ค์€ ๋ณ„๋„ ๋ณ‘ํ•ฉ
261
+ section_files = {}
 
262
 
263
+ # ๋ฒ ์ด์Šค์—์„œ ๋ชจ๋“  ํŒŒ์ผ ์ˆ˜์ง‘
264
  for filename in base_zip.namelist():
265
  if filename == "mimetype":
266
  continue
267
  if filename.startswith("Contents/section") and filename.endswith(".xml"):
268
+ section_files[filename] = [base_zip.read(filename).decode("utf-8", errors="ignore")]
269
  else:
 
270
  zi = zipfile.ZipInfo(filename)
271
  zi.compress_type = zipfile.ZIP_DEFLATED
272
  zi.external_attr = 0o100666 << 16
 
275
  zi.flag_bits = 0
276
  out_zip.writestr(zi, base_zip.read(filename))
277
 
278
+ # ์ถ”๊ฐ€ ํŒŒ์ผ์—์„œ ์„น์…˜๋งŒ ์ˆ˜์ง‘
279
  for filename in add_zip.namelist():
280
  if filename.startswith("Contents/section") and filename.endswith(".xml"):
281
+ content = add_zip.read(filename).decode("utf-8", errors="ignore")
282
+ if filename in section_files:
283
+ section_files[filename].append(content)
284
+ else:
285
+ section_files[filename] = [content]
286
 
287
+ # ์„น์…˜ ํŒŒ์ผ๋“ค ๋ณ‘ํ•ฉํ•ด์„œ ์“ฐ๊ธฐ
288
+ for filename, contents in section_files.items():
289
+ if len(contents) == 1:
290
+ merged_content = contents[0]
291
+ else:
292
+ merged_content = merge_section_xml_list(contents)
293
+
294
  zi = zipfile.ZipInfo(filename)
295
  zi.compress_type = zipfile.ZIP_DEFLATED
296
  zi.external_attr = 0o100666 << 16
297
  zi.create_system = 0
298
  zi.date_time = now[:6]
299
  zi.flag_bits = 0
300
+ out_zip.writestr(zi, merged_content.encode("utf-8"))
301
 
302
  base_zip.close()
303
  add_zip.close()
 
306
 
307
  return out_buf.getvalue()
308
 
309
+ def merge_section_xml_list(xml_list: list) -> str:
310
+ """์—ฌ๋Ÿฌ ์„น์…˜ XML์„ ํ•˜๋‚˜๋กœ ๋ณ‘ํ•ฉ"""
311
+ if len(xml_list) <= 1:
312
+ return xml_list[0] if xml_list else ""
313
+
314
+ base_xml = xml_list[0]
315
+
316
+ for additional_xml in xml_list[1:]:
317
+ # ๊ฐ XML์—์„œ ํŽ˜์ด์ง€ ์ถ”์ถœ
318
+ page_pattern = re.compile(
319
+ r'<(?P<prefix>[a-zA-Z0-9_]+):page\b[^>]*>.*?</(?P=prefix):page>',
320
+ re.DOTALL
321
+ )
322
+
323
+ add_pages = page_pattern.findall(additional_xml)
324
+ if not add_pages:
325
+ continue
326
+
327
+ # ํŽ˜์ด์ง€ ์ „์ฒด ๋งค์น˜
328
+ page_matches = page_pattern.finditer(additional_xml)
329
+ pages_to_add = [match.group(0) for match in page_matches]
330
+
331
+ if pages_to_add:
332
+ # ๋ฒ ์ด์Šค XML์˜ </prefix:pages> ํƒœ๊ทธ ์ฐพ์•„์„œ ๊ทธ ์•ž์— ์‚ฝ์ž…
333
+ pages_close_pattern = re.compile(r'(</[a-zA-Z0-9_]+:pages>)')
334
+ pages_str = ''.join(pages_to_add)
335
+ base_xml = pages_close_pattern.sub(pages_str + r'\1', base_xml)
336
+
337
+ return base_xml
338
+
339
  def merge_sections(base_sections: dict, add_sections: dict) -> dict:
340
  """์„น์…˜ XML๋“ค์„ ๋ณ‘ํ•ฉ"""
341
  merged = base_sections.copy()
 
352
 
353
  def merge_section_content(base_xml: str, add_xml: str) -> str:
354
  """๋‹จ์ผ ์„น์…˜ XML ๋‚ด์šฉ์„ ๋ณ‘ํ•ฉ"""
355
+ # ์ถ”๊ฐ€ํ•  XML์—์„œ ํŽ˜์ด์ง€๋“ค ์ถ”์ถœ - ๋” ์ •ํ™•ํ•œ ํŒจํ„ด
356
+ page_pattern = re.compile(
357
+ r'<(?P<prefix>[a-zA-Z0-9_]+):page\b[^>]*>.*?</(?P=prefix):page>',
358
+ re.DOTALL
359
+ )
360
 
361
+ # ํŽ˜์ด์ง€ ๋งค์นญ
362
+ page_matches = list(page_pattern.finditer(add_xml))
363
+ if not page_matches:
364
  return base_xml
365
 
366
+ # ์ถ”๊ฐ€ํ•  ํŽ˜์ด์ง€๋“ค
367
+ pages_to_add = [match.group(0) for match in page_matches]
 
368
 
369
+ # ๋ฒ ์ด์Šค XML์˜ </hp:pages> ๋˜๋Š” </hml:pages> ํƒœ๊ทธ ์•ž์— ์‚ฝ์ž…
370
+ pages_end_pattern = re.compile(r'(</[a-zA-Z0-9_]+:pages>)')
371
+ pages_str = ''.join(pages_to_add)
372
 
373
+ merged_xml = pages_end_pattern.sub(pages_str + r'\1', base_xml)
 
374
 
375
  return merged_xml
376