dohyune commited on
Commit
0e4c56c
ยท
verified ยท
1 Parent(s): dd891b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -8
app.py CHANGED
@@ -235,6 +235,107 @@ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, d
235
  zin.close()
236
  return out_buf.getvalue(), dbg
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  # -------------------- UI --------------------
239
  with st.expander("์‚ฌ์šฉ๋ฒ•", expanded=True):
240
  st.markdown("""
@@ -290,11 +391,13 @@ if tpl and data:
290
  st.dataframe(pd.DataFrame([{"ํ‚ค":k, "๊ฐ’ ์•ž๋ถ€๋ถ„":str(v)[:120]} for k,v in sorted(mapping_preview.items())]),
291
  use_container_width=True, height=320)
292
 
293
- if st.button("๐Ÿš€ ๋ผ๋ฒจ ์ƒ์„ฑ (ํŽ˜์ด์ง€๋ณ„ HWPX ZIP)"):
294
- mem = io.BytesIO(); zout = zipfile.ZipFile(mem, "w", zipfile.ZIP_DEFLATED)
295
  pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
296
  debug_all = []
297
-
 
 
 
298
  for p in range(pages):
299
  chunk = records[p*int(n_per_page):(p+1)*int(n_per_page)]
300
  mapping = {}
@@ -313,12 +416,21 @@ if tpl and data:
313
  else:
314
  for k in keys: mapping[f"{k}{i+1}"] = ""
315
 
316
- out_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
 
 
 
 
 
 
 
317
  debug_all.append({"page": p+1, "stats": dbg})
318
- name = "_".join([r.get("๋ฐ•์Šค๋ฒˆํ˜ธ","") for r in chunk]) if chunk else f"empty_{p+1}"
319
- zout.writestr(f"label_{name}.hwpx", out_hwpx)
320
 
321
- zout.close(); mem.seek(0)
322
- st.download_button("โฌ‡๏ธ ZIP ๋‹ค์šด๋กœ๋“œ", data=mem, file_name="labels_by_page.zip", mime="application/zip")
 
 
 
 
323
  st.download_button("โฌ‡๏ธ ๋””๋ฒ„๊ทธ(JSON)", data=json.dumps(debug_all, ensure_ascii=False, indent=2),
324
  file_name="debug.json", mime="application/json")
 
235
  zin.close()
236
  return out_buf.getvalue(), dbg
237
 
238
+ def merge_hwpx_pages(base_hwpx: bytes, additional_hwpx: bytes) -> bytes:
239
+ """๋‘ ๊ฐœ์˜ HWPX ํŒŒ์ผ์„ ๋ณ‘ํ•ฉํ•˜์—ฌ ํ•˜๋‚˜์˜ HWPX๋กœ ๋งŒ๋“ฆ"""
240
+ import time
241
+
242
+ base_zip = zipfile.ZipFile(io.BytesIO(base_hwpx), "r")
243
+ add_zip = zipfile.ZipFile(io.BytesIO(additional_hwpx), "r")
244
+
245
+ out_buf = io.BytesIO()
246
+ out_zip = zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6)
247
+
248
+ now = time.localtime()
249
+
250
+ # mimetype ๋จผ์ € ์ฒ˜๋ฆฌ
251
+ if "mimetype" in base_zip.namelist():
252
+ zi = zipfile.ZipInfo("mimetype")
253
+ zi.compress_type = zipfile.ZIP_STORED
254
+ zi.external_attr = 0o100666 << 16
255
+ zi.create_system = 0
256
+ zi.date_time = now[:6]
257
+ zi.flag_bits = 0
258
+ out_zip.writestr(zi, base_zip.read("mimetype"))
259
+
260
+ # ๋ฒ ์ด์Šค HWPX์—์„œ ์„น์…˜ ํŒŒ์ผ๋“ค ์ฝ๊ธฐ
261
+ base_sections = {}
262
+ add_sections = {}
263
+
264
+ for filename in base_zip.namelist():
265
+ if filename == "mimetype":
266
+ continue
267
+ if filename.startswith("Contents/section") and filename.endswith(".xml"):
268
+ base_sections[filename] = base_zip.read(filename).decode("utf-8", errors="ignore")
269
+ else:
270
+ # ๋‹ค๋ฅธ ํŒŒ์ผ๋“ค์€ ๊ทธ๋Œ€๋กœ ๋ณต์‚ฌ
271
+ zi = zipfile.ZipInfo(filename)
272
+ zi.compress_type = zipfile.ZIP_DEFLATED
273
+ zi.external_attr = 0o100666 << 16
274
+ zi.create_system = 0
275
+ zi.date_time = now[:6]
276
+ zi.flag_bits = 0
277
+ out_zip.writestr(zi, base_zip.read(filename))
278
+
279
+ # ์ถ”๊ฐ€ํ•  HWPX์—์„œ ์„น์…˜ ํŒŒ์ผ๋“ค ์ฝ๊ธฐ
280
+ for filename in add_zip.namelist():
281
+ if filename.startswith("Contents/section") and filename.endswith(".xml"):
282
+ add_sections[filename] = add_zip.read(filename).decode("utf-8", errors="ignore")
283
+
284
+ # ์„น์…˜ ๋ณ‘ํ•ฉ ์ฒ˜๋ฆฌ
285
+ merged_sections = merge_sections(base_sections, add_sections)
286
+
287
+ # ๋ณ‘ํ•ฉ๋œ ์„น์…˜๋“ค์„ ZIP์— ์“ฐ๊ธฐ
288
+ for filename, content in merged_sections.items():
289
+ zi = zipfile.ZipInfo(filename)
290
+ zi.compress_type = zipfile.ZIP_DEFLATED
291
+ zi.external_attr = 0o100666 << 16
292
+ zi.create_system = 0
293
+ zi.date_time = now[:6]
294
+ zi.flag_bits = 0
295
+ out_zip.writestr(zi, content.encode("utf-8"))
296
+
297
+ base_zip.close()
298
+ add_zip.close()
299
+ out_zip.close()
300
+ out_buf.seek(0)
301
+
302
+ return out_buf.getvalue()
303
+
304
+ def merge_sections(base_sections: dict, add_sections: dict) -> dict:
305
+ """์„น์…˜ XML๋“ค์„ ๋ณ‘ํ•ฉ"""
306
+ merged = base_sections.copy()
307
+
308
+ for filename, add_content in add_sections.items():
309
+ if filename in merged:
310
+ # ๊ธฐ์กด ์„น์…˜์— ํŽ˜์ด์ง€ ์ถ”๊ฐ€
311
+ merged[filename] = merge_section_content(merged[filename], add_content)
312
+ else:
313
+ # ์ƒˆ๋กœ์šด ์„น์…˜ ์ถ”๊ฐ€
314
+ merged[filename] = add_content
315
+
316
+ return merged
317
+
318
+ def merge_section_content(base_xml: str, add_xml: str) -> str:
319
+ """๋‹จ์ผ ์„น์…˜ XML ๋‚ด์šฉ์„ ๋ณ‘ํ•ฉ"""
320
+ # ์ถ”๊ฐ€ํ•  XML์—์„œ ํŽ˜์ด์ง€๋“ค ์ถ”์ถœ
321
+ page_pattern = re.compile(r'(<(?P<prefix>[a-zA-Z0-9_]+):page[^>]*>.*?</(?P=prefix):page>)', re.DOTALL)
322
+ add_pages = page_pattern.findall(add_xml)
323
+
324
+ if not add_pages:
325
+ return base_xml
326
+
327
+ # ๋ฒ ์ด์Šค XML์˜ ๋งˆ์ง€๋ง‰ ํŽ˜์ด์ง€ ๋’ค์— ์ƒˆ ํŽ˜์ด์ง€๋“ค ์‚ฝ์ž…
328
+ # </hp:pages> ํƒœ๊ทธ ์•ž์— ์‚ฝ์ž…
329
+ pages_end_pattern = re.compile(r'(</[a-zA-Z0-9_]+:pages>)')
330
+
331
+ # ์ถ”๊ฐ€ํ•  ํŽ˜์ด์ง€๋“ค์„ ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜
332
+ pages_to_add = ''.join([page[0] for page in add_pages])
333
+
334
+ # ์‚ฝ์ž…
335
+ merged_xml = pages_end_pattern.sub(pages_to_add + r'\1', base_xml)
336
+
337
+ return merged_xml
338
+
339
  # -------------------- UI --------------------
340
  with st.expander("์‚ฌ์šฉ๋ฒ•", expanded=True):
341
  st.markdown("""
 
391
  st.dataframe(pd.DataFrame([{"ํ‚ค":k, "๊ฐ’ ์•ž๋ถ€๋ถ„":str(v)[:120]} for k,v in sorted(mapping_preview.items())]),
392
  use_container_width=True, height=320)
393
 
394
+ if st.button("๐Ÿš€ ํ†ตํ•ฉ ๋ผ๋ฒจ ์ƒ์„ฑ (๋‹จ์ผ HWPX ํŒŒ์ผ)"):
 
395
  pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
396
  debug_all = []
397
+
398
+ # ์ฒซ ํŽ˜์ด์ง€๋กœ ์‹œ์ž‘
399
+ merged_hwpx = None
400
+
401
  for p in range(pages):
402
  chunk = records[p*int(n_per_page):(p+1)*int(n_per_page)]
403
  mapping = {}
 
416
  else:
417
  for k in keys: mapping[f"{k}{i+1}"] = ""
418
 
419
+ if p == 0:
420
+ # ์ฒซ ํŽ˜์ด์ง€: ํ…œํ”Œ๋ฆฟ ๊ธฐ๋ฐ˜์œผ๋กœ ์ƒ์„ฑ
421
+ merged_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
422
+ else:
423
+ # ๋‘ ๋ฒˆ์งธ ํŽ˜์ด์ง€๋ถ€ํ„ฐ: ๊ธฐ์กด HWPX์— ํŽ˜์ด์ง€ ์ถ”๊ฐ€
424
+ page_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
425
+ merged_hwpx = merge_hwpx_pages(merged_hwpx, page_hwpx)
426
+
427
  debug_all.append({"page": p+1, "stats": dbg})
 
 
428
 
429
+ # ๋ฐ•์Šค๋ฒˆํ˜ธ ๋ฒ”์œ„๋กœ ํŒŒ์ผ๋ช… ์ƒ์„ฑ
430
+ first_box = records[0].get("๋ฐ•์Šค๋ฒˆํ˜ธ", "0000") if records else "0000"
431
+ last_box = records[-1].get("๋ฐ•์Šค๋ฒˆํ˜ธ", "0000") if records else "0000"
432
+ filename = f"labels_{first_box}to{last_box}.hwpx" if first_box != last_box else f"labels_{first_box}.hwpx"
433
+
434
+ st.download_button("โฌ‡๏ธ ํ†ตํ•ฉ HWPX ๋‹ค์šด๋กœ๋“œ", data=merged_hwpx, file_name=filename, mime="application/zip")
435
  st.download_button("โฌ‡๏ธ ๋””๋ฒ„๊ทธ(JSON)", data=json.dumps(debug_all, ensure_ascii=False, indent=2),
436
  file_name="debug.json", mime="application/json")