dohyune commited on
Commit
ad0a525
ยท
verified ยท
1 Parent(s): 15cd71d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -38
app.py CHANGED
@@ -236,7 +236,7 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, d
236
  return out_buf.getvalue(), dbg
237
 
238
  def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
239
- """๋‘ ๊ฐœ์˜ HWPX ํŒŒ์ผ์„ ๋ณ‘ํ•ฉํ•˜์—ฌ ํ•˜๋‚˜์˜ HWPX๋กœ ๋งŒ๋“ฆ"""
240
  import time
241
 
242
  base_zip = zipfile.ZipFile(io.BytesIO(base_hwpx), "r")
@@ -257,15 +257,15 @@ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
257
  zi.flag_bits = 0
258
  out_zip.writestr(zi, base_zip.read("mimetype"))
259
 
260
- # ๋ชจ๋“  ํŒŒ์ผ์„ ์ฒ˜๋ฆฌ - ์„น์…˜ ํŒŒ์ผ๋“ค์€ ๋ณ„๋„ ๋ณ‘ํ•ฉ
261
  section_files = {}
262
 
263
- # ๋ฒ ์ด์Šค์—์„œ ๋ชจ๋“  ํŒŒ์ผ ์ˆ˜์ง‘
264
  for filename in base_zip.namelist():
265
  if filename == "mimetype":
266
  continue
267
  if filename.startswith("Contents/section") and filename.endswith(".xml"):
268
- section_files[filename] = [base_zip.read(filename).decode("utf-8", errors="ignore")]
269
  else:
270
  zi = zipfile.ZipInfo(filename)
271
  zi.compress_type = zipfile.ZIP_DEFLATED
@@ -275,29 +275,22 @@ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
275
  zi.flag_bits = 0
276
  out_zip.writestr(zi, base_zip.read(filename))
277
 
278
- # ์ถ”๊ฐ€ ํŒŒ์ผ์—์„œ ์„น์…˜๋งŒ ์ˆ˜์ง‘
279
  for filename in add_zip.namelist():
280
  if filename.startswith("Contents/section") and filename.endswith(".xml"):
281
- content = add_zip.read(filename).decode("utf-8", errors="ignore")
282
  if filename in section_files:
283
- section_files[filename].append(content)
284
- else:
285
- section_files[filename] = [content]
286
 
287
- # ์„น์…˜ ํŒŒ์ผ๋“ค ๋ณ‘ํ•ฉํ•ด์„œ ์“ฐ๊ธฐ
288
- for filename, contents in section_files.items():
289
- if len(contents) == 1:
290
- merged_content = contents[0]
291
- else:
292
- merged_content = merge_section_xml_list(contents)
293
-
294
  zi = zipfile.ZipInfo(filename)
295
  zi.compress_type = zipfile.ZIP_DEFLATED
296
  zi.external_attr = 0o100666 << 16
297
  zi.create_system = 0
298
  zi.date_time = now[:6]
299
  zi.flag_bits = 0
300
- out_zip.writestr(zi, merged_content.encode("utf-8"))
301
 
302
  base_zip.close()
303
  add_zip.close()
@@ -306,33 +299,67 @@ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
306
 
307
  return out_buf.getvalue()
308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  def merge_section_xml_list(xml_list: list) -> str:
310
- """์—ฌ๋Ÿฌ ์„น์…˜ XML์„ ํ•˜๋‚˜๋กœ ๋ณ‘ํ•ฉ"""
311
  if len(xml_list) <= 1:
312
  return xml_list[0] if xml_list else ""
313
 
314
  base_xml = xml_list[0]
315
-
316
  for additional_xml in xml_list[1:]:
317
- # ๊ฐ XML์—์„œ ํŽ˜์ด์ง€ ์ถ”์ถœ
318
- page_pattern = re.compile(
319
- r'<(?P<prefix>[a-zA-Z0-9_]+):page\b[^>]*>.*?</(?P=prefix):page>',
320
- re.DOTALL
321
- )
322
-
323
- add_pages = page_pattern.findall(additional_xml)
324
- if not add_pages:
325
- continue
326
-
327
- # ํŽ˜์ด์ง€ ์ „์ฒด ๋งค์น˜
328
- page_matches = page_pattern.finditer(additional_xml)
329
- pages_to_add = [match.group(0) for match in page_matches]
330
-
331
- if pages_to_add:
332
- # ๋ฒ ์ด์Šค XML์˜ </prefix:pages> ํƒœ๊ทธ ์ฐพ์•„์„œ ๊ทธ ์•ž์— ์‚ฝ์ž…
333
- pages_close_pattern = re.compile(r'(</[a-zA-Z0-9_]+:pages>)')
334
- pages_str = ''.join(pages_to_add)
335
- base_xml = pages_close_pattern.sub(pages_str + r'\1', base_xml)
336
 
337
  return base_xml
338
 
@@ -377,6 +404,8 @@ def merge_section_content(base_xml: str, add_xml: str) -> str:
377
  # -------------------- UI --------------------
378
  with st.expander("์‚ฌ์šฉ๋ฒ•", expanded=True):
379
  st.markdown("""
 
 
380
  - **๋‹ค์ค‘ ์ค„(๋ชฉ๋ก/์ œ๋ชฉ)์€ ๋ถ€๋ชจ ๋ฌธ๋‹จ์„ ์—ฌ๋Ÿฌ ๋ฌธ๋‹จ์œผ๋กœ ๊ต์ฒด**ํ•˜์—ฌ ๊ฒน์นจ ์—†์ด ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค.
381
  - **์—…๋ฌด๋ช…์€ ๋‹จ์ผ์ค„๋กœ ์ฒ˜๋ฆฌ**ํ•˜์—ฌ ์›๋ณธ ํฐํŠธ ์Šคํƒ€์ผ์„ ์œ ์ง€ํ•ฉ๋‹ˆ๋‹ค.
382
  - **์ƒ์„ฑ๋œ HWPX ํŒŒ์ผ์˜ ์ฝ๊ธฐ์ „์šฉ ์†์„ฑ์ด ํ•ด์ œ**๋˜์–ด ํŽธ์ง‘ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.
 
236
  return out_buf.getvalue(), dbg
237
 
238
  def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
239
+ """HWPX์— ์ƒˆ ํŽ˜์ด์ง€ ์ถ”๊ฐ€ (ํ…œํ”Œ๋ฆฟ ๋ณต์ œ ๋ฐฉ์‹)"""
240
  import time
241
 
242
  base_zip = zipfile.ZipFile(io.BytesIO(base_hwpx), "r")
 
257
  zi.flag_bits = 0
258
  out_zip.writestr(zi, base_zip.read("mimetype"))
259
 
260
+ # ์„น์…˜ ํŒŒ์ผ๋“ค์„ ๋”ฐ๋กœ ์ฒ˜๋ฆฌ
261
  section_files = {}
262
 
263
+ # ๋ฒ ์ด์Šค์—์„œ ๋ชจ๋“  ํŒŒ์ผ ์ฒ˜๋ฆฌ
264
  for filename in base_zip.namelist():
265
  if filename == "mimetype":
266
  continue
267
  if filename.startswith("Contents/section") and filename.endswith(".xml"):
268
+ section_files[filename] = base_zip.read(filename).decode("utf-8", errors="ignore")
269
  else:
270
  zi = zipfile.ZipInfo(filename)
271
  zi.compress_type = zipfile.ZIP_DEFLATED
 
275
  zi.flag_bits = 0
276
  out_zip.writestr(zi, base_zip.read(filename))
277
 
278
+ # ์ถ”๊ฐ€ ํŒŒ์ผ์—์„œ ํŽ˜์ด์ง€ ์ถ”์ถœํ•ด์„œ ๊ธฐ์กด ์„น์…˜์— ์ถ”๊ฐ€
279
  for filename in add_zip.namelist():
280
  if filename.startswith("Contents/section") and filename.endswith(".xml"):
281
+ add_content = add_zip.read(filename).decode("utf-8", errors="ignore")
282
  if filename in section_files:
283
+ section_files[filename] = add_page_to_section(section_files[filename], add_content)
 
 
284
 
285
+ # ์ˆ˜์ •๋œ ์„น์…˜ ํŒŒ์ผ๋“ค ์ €์žฅ
286
+ for filename, content in section_files.items():
 
 
 
 
 
287
  zi = zipfile.ZipInfo(filename)
288
  zi.compress_type = zipfile.ZIP_DEFLATED
289
  zi.external_attr = 0o100666 << 16
290
  zi.create_system = 0
291
  zi.date_time = now[:6]
292
  zi.flag_bits = 0
293
+ out_zip.writestr(zi, content.encode("utf-8"))
294
 
295
  base_zip.close()
296
  add_zip.close()
 
299
 
300
  return out_buf.getvalue()
301
 
302
+ def add_page_to_section(base_xml: str, add_xml: str) -> str:
303
+ """์„น์…˜์— ์ƒˆ ํŽ˜์ด์ง€ ์ถ”๊ฐ€"""
304
+ # ์ถ”๊ฐ€ํ•  XML์—์„œ ์ฒซ ๋ฒˆ์งธ ํŽ˜์ด์ง€ ์ถ”์ถœ
305
+ page_pattern = re.compile(
306
+ r'<(?P<prefix>[a-zA-Z0-9_]+):page\b[^>]*>.*?</(?P=prefix):page>',
307
+ re.DOTALL
308
+ )
309
+
310
+ page_match = page_pattern.search(add_xml)
311
+ if not page_match:
312
+ return base_xml
313
+
314
+ new_page = page_match.group(0)
315
+
316
+ # ๋ฒ ์ด์Šค XML์—์„œ </prefix:pages> ํƒœ๊ทธ ์ฐพ์•„์„œ ๊ทธ ์•ž์— ์ƒˆ ํŽ˜์ด์ง€ ์‚ฝ์ž…
317
+ pages_close_pattern = re.compile(r'(</[a-zA-Z0-9_]+:pages>)')
318
+
319
+ # ํŽ˜์ด์ง€ ID ์—…๋ฐ์ดํŠธ (์ค‘๋ณต ๋ฐฉ์ง€)
320
+ new_page = update_page_id(base_xml, new_page)
321
+
322
+ # ํŽ˜์ด์ง€ ์‚ฝ์ž…
323
+ result = pages_close_pattern.sub(new_page + r'\1', base_xml)
324
+
325
+ return result
326
+
327
+ def update_page_id(base_xml: str, new_page: str) -> str:
328
+ """ํŽ˜์ด์ง€ ID๋ฅผ ์ค‘๋ณต๋˜์ง€ ์•Š๊ฒŒ ์—…๋ฐ์ดํŠธ"""
329
+ # ๊ธฐ์กด ํŽ˜์ด์ง€๋“ค์˜ ID ์ถ”์ถœ
330
+ existing_ids = set()
331
+ id_pattern = re.compile(r'<[a-zA-Z0-9_]+:page\b[^>]*\bid="([^"]*)"')
332
+
333
+ for match in id_pattern.finditer(base_xml):
334
+ existing_ids.add(match.group(1))
335
+
336
+ # ์ƒˆ ํŽ˜์ด์ง€์˜ ID ์ฐพ๊ธฐ
337
+ new_page_id_match = id_pattern.search(new_page)
338
+ if not new_page_id_match:
339
+ return new_page
340
+
341
+ original_id = new_page_id_match.group(1)
342
+
343
+ # ์ค‘๋ณต๋˜์ง€ ์•Š๋Š” ์ƒˆ ID ์ƒ์„ฑ
344
+ counter = 2
345
+ new_id = f"{original_id}_{counter}"
346
+ while new_id in existing_ids:
347
+ counter += 1
348
+ new_id = f"{original_id}_{counter}"
349
+
350
+ # ID ๊ต์ฒด
351
+ updated_page = new_page.replace(f'id="{original_id}"', f'id="{new_id}"')
352
+
353
+ return updated_page
354
+
355
  def merge_section_xml_list(xml_list: list) -> str:
356
+ """์—ฌ๋Ÿฌ ์„น์…˜ XML์„ ํ•˜๋‚˜๋กœ ๋ณ‘ํ•ฉ (์‚ฌ์šฉํ•˜์ง€ ์•Š์ง€๋งŒ ํ˜ธํ™˜์„ฑ ์œ ์ง€)"""
357
  if len(xml_list) <= 1:
358
  return xml_list[0] if xml_list else ""
359
 
360
  base_xml = xml_list[0]
 
361
  for additional_xml in xml_list[1:]:
362
+ base_xml = add_page_to_section(base_xml, additional_xml)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
 
364
  return base_xml
365
 
 
404
  # -------------------- UI --------------------
405
  with st.expander("์‚ฌ์šฉ๋ฒ•", expanded=True):
406
  st.markdown("""
407
+ - **ํ…œํ”Œ๋ฆฟ์€ 1ํŽ˜์ด์ง€์— N๊ฐœ ๋ผ๋ฒจ**์ด ์žˆ๋Š” ํ‘œ ํ˜•ํƒœ๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
408
+ - **๋ฐ•์Šค ์ˆ˜๊ฐ€ N๊ฐœ๋ฅผ ์ดˆ๊ณผํ•˜๋ฉด ์ƒˆ ํŽ˜์ด์ง€๊ฐ€ ์ž๋™ ์ถ”๊ฐ€**๋ฉ๋‹ˆ๋‹ค.
409
  - **๋‹ค์ค‘ ์ค„(๋ชฉ๋ก/์ œ๋ชฉ)์€ ๋ถ€๋ชจ ๋ฌธ๋‹จ์„ ์—ฌ๋Ÿฌ ๋ฌธ๋‹จ์œผ๋กœ ๊ต์ฒด**ํ•˜์—ฌ ๊ฒน์นจ ์—†์ด ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค.
410
  - **์—…๋ฌด๋ช…์€ ๋‹จ์ผ์ค„๋กœ ์ฒ˜๋ฆฌ**ํ•˜์—ฌ ์›๋ณธ ํฐํŠธ ์Šคํƒ€์ผ์„ ์œ ์ง€ํ•ฉ๋‹ˆ๋‹ค.
411
  - **์ƒ์„ฑ๋œ HWPX ํŒŒ์ผ์˜ ์ฝ๊ธฐ์ „์šฉ ์†์„ฑ์ด ํ•ด์ œ**๋˜์–ด ํŽธ์ง‘ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.