dohyune commited on
Commit
57565d7
ยท
verified ยท
1 Parent(s): fbfae11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +192 -220
app.py CHANGED
@@ -1,240 +1,212 @@
1
  import streamlit as st
2
  import pandas as pd
3
- from reportlab.pdfgen import canvas
4
- from reportlab.pdfbase import pdfmetrics
5
- from reportlab.pdfbase.ttfonts import TTFont
6
- from reportlab.lib.pagesizes import A4
7
- from reportlab.lib.units import mm
8
- from io import BytesIO
9
- import math
10
-
11
- st.set_page_config(page_title="๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ PDF ์ถœ๋ ฅ๊ธฐ", layout="wide")
12
- st.title("๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ PDF ์ถœ๋ ฅ๊ธฐ (๋ผ๋ฒจ ๊ทœ๊ฒฉ ์ปค์Šคํ…€ / ํ•œ๊ตญ์–ด ํฐํŠธ ์—…๋กœ๋“œ)")
13
-
14
- with st.expander("์‚ฌ์šฉ ๋ฐฉ๋ฒ•", expanded=True):
15
- st.markdown("""
16
- 1. **์—‘์…€/CSV ์—…๋กœ๋“œ** โ†’ ํ•„์ˆ˜ ์ปฌ๋Ÿผ: `๋ฐ•์Šค๋ฒˆํ˜ธ` / ๊ถŒ์žฅ: `์ข…๋ฃŒ์—ฐ๋„`, `๋ณด์กด๊ธฐ๊ฐ„`, `๋‹จ์œ„์—…๋ฌด`, `๊ธฐ๋ก๋ฌผ์ฒ `, `์ œ๋ชฉ`, `๊ด€๋ฆฌ๋ฒˆํ˜ธ`
17
- 2. (์„ ํƒ) **TTF ํฐํŠธ ์—…๋กœ๋“œ**(์˜ˆ: ๋‚˜๋ˆ”๊ณ ๋”•, ๋ณธ๊ณ ๋”•, ๋ง‘์€ ๊ณ ๋”• ๋“ฑ). ์—…๋กœ๋“œ ์•ˆ ํ•˜๋ฉด ๊ธฐ๋ณธ ํฐํŠธ ์‚ฌ์šฉ(์˜๋ฌธ ์œ„์ฃผ).
18
- 3. **๋ผ๋ฒจ ๊ทœ๊ฒฉ**(ํŽ˜์ด์ง€ ์—ฌ๋ฐฑ, ๋ผ๋ฒจ ๊ฐ€๋กœ/์„ธ๋กœ, ํ–‰/์—ด, ๋ผ๋ฒจ ๊ฐ„๊ฒฉ)์„ ์ž…๋ ฅ.
19
- 4. **ํ…์ŠคํŠธ ๋ฐฐ์น˜**(๋ผ๋ฒจ ์•ˆ์ชฝ ํŒจ๋”ฉ, ํฐํŠธ ํฌ๊ธฐ, ์ค„ ๊ฐ„๊ฒฉ ๋“ฑ) ์กฐ์ •.
20
- 5. **PDF ์ƒ์„ฑ** โ†’ ๋ผ๋ฒจ ์šฉ์ง€(Formtec ๋“ฑ)์— ์ธ์‡„.
21
- """)
22
-
23
- # -----------------
24
- # ๋ฐ์ดํ„ฐ ๋กœ๋“œ
25
- # -----------------
26
- file = st.file_uploader("๐Ÿ“Š ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ (Excel/CSV)", type=["xlsx","xls","csv"])
27
- df = None
28
- if file:
29
- if file.name.lower().endswith(".csv"):
30
- df = pd.read_csv(file)
31
- else:
32
- df = pd.read_excel(file)
33
-
34
- # ํ•„์ˆ˜ ์ปฌ๋Ÿผ ๊ฒ€์‚ฌ
35
- if df is not None and "๋ฐ•์Šค๋ฒˆํ˜ธ" not in df.columns:
36
- st.error("โŒ ํ•„์ˆ˜ ์ปฌ๋Ÿผ '๋ฐ•์Šค๋ฒˆํ˜ธ'๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
37
- st.stop()
38
-
39
- # -----------------
40
- # ํฐํŠธ ์„ค์ •
41
- # -----------------
42
- st.subheader("๐Ÿ”ค ํฐํŠธ ์„ค์ •")
43
- font_file = st.file_uploader("ํ•œ๊ตญ์–ด ํฐํŠธ(TTF) ์—…๋กœ๋“œ (์˜ˆ: NanumGothic.ttf / MalgunGothic.ttf)", type=["ttf"])
44
- font_name = "BaseFont"
45
- if font_file:
46
- try:
47
- font_bytes = font_file.read()
48
- # ๋ฉ”๋ชจ๋ฆฌ ๋“ฑ๋ก: ReportLab์€ ํŒŒ์ผ ๊ฒฝ๋กœ๊ฐ€ ํ•„์š” โ†’ ์ž„์‹œ ํŒŒ์ผ ๋งŒ๋“ค๊ธฐ๋ณด๋‹ค ๋ฉ”๋ชจ๋ฆฌ ๋ ˆ์ง€์Šคํ„ฐ ํŠธ๋ฆญ
49
- # ํ•˜์ง€๋งŒ TTFont๋Š” ํŒŒ์ผ ๊ฒฝ๋กœ ์š”๊ตฌ โ†’ ์ž„์‹œํŒŒ์ผ ์ €์žฅ
50
- import tempfile
51
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".ttf")
52
- tmp.write(font_bytes); tmp.flush()
53
- pdfmetrics.registerFont(TTFont("UserKorean", tmp.name))
54
- font_name = "UserKorean"
55
- st.success("โœ… ํฐํŠธ ๋“ฑ๋ก ์™„๋ฃŒ: UserKorean")
56
- except Exception as e:
57
- st.warning(f"ํฐํŠธ ๋“ฑ๋ก ์‹คํŒจ. ๊ธฐ๋ณธ ํฐํŠธ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค. (์‚ฌ์œ : {e})")
58
- else:
59
- # ๋‚ด์žฅ ๊ธฐ๋ณธ ํฐํŠธ (์˜๋ฌธ ์ค‘์‹ฌ)
60
- font_name = "Helvetica"
61
-
62
- # -----------------
63
- # ๋ผ๋ฒจ/ํŽ˜์ด์ง€ ๋ ˆ์ด์•„์›ƒ
64
- # -----------------
65
- st.subheader("๐Ÿ“ ๋ผ๋ฒจ ๊ทœ๊ฒฉ (mm ๋‹จ์œ„)")
66
- colA, colB, colC = st.columns(3)
67
- with colA:
68
- page_size = st.selectbox("ํŽ˜์ด์ง€ ํฌ๊ธฐ", ["A4"], index=0)
69
- with colB:
70
- margin_left = st.number_input("์™ผ์ชฝ ์—ฌ๋ฐฑ(mm)", 5.0, 50.0, 10.0, 0.5)
71
- margin_top = st.number_input("์ƒ๋‹จ ์—ฌ๋ฐฑ(mm)", 5.0, 50.0, 10.0, 0.5)
72
- with colC:
73
- rows = st.number_input("ํ–‰ ์ˆ˜", 1, 20, 10, 1)
74
- cols = st.number_input("์—ด ์ˆ˜", 1, 10, 3, 1)
75
-
76
- colD, colE, colF = st.columns(3)
77
- with colD:
78
- label_w = st.number_input("๋ผ๋ฒจ ๊ฐ€๋กœ(mm)", 20.0, 210.0, 70.0, 0.5)
79
- with colE:
80
- label_h = st.number_input("๋ผ๋ฒจ ์„ธ๋กœ(mm)", 10.0, 297.0, 25.0, 0.5)
81
- with colF:
82
- gap_x = st.number_input("๊ฐ€๋กœ ๊ฐ„๊ฒฉ(mm)", 0.0, 20.0, 3.0, 0.5)
83
- gap_y = st.number_input("์„ธ๋กœ ๊ฐ„๊ฒฉ(mm)", 0.0, 20.0, 3.0, 0.5)
84
-
85
- # -----------------
86
- # ๋ผ๋ฒจ ๋‚ด๋ถ€ ํ…์ŠคํŠธ ๋ฐฐ์น˜
87
- # -----------------
88
- st.subheader("๐Ÿงฑ ๋ผ๋ฒจ ๋‚ด๋ถ€ ๋ ˆ์ด์•„์›ƒ")
89
- col1, col2, col3 = st.columns(3)
90
- with col1:
91
- pad_x = st.number_input("๋‚ด๋ถ€ ํŒจ๋”ฉ X(mm)", 0.0, 20.0, 2.0, 0.5)
92
- pad_y = st.number_input("๋‚ด๋ถ€ ํŒจ๋”ฉ Y(mm)", 0.0, 20.0, 2.0, 0.5)
93
- with col2:
94
- fs_big = st.number_input("ํฐํŠธ ํฌ๊ธฐ(ํฐ ์ œ๋ชฉ)", 6, 40, 16, 1)
95
- fs_mid = st.number_input("ํฐํŠธ ํฌ๊ธฐ(์ค‘๊ฐ„)", 6, 40, 11, 1)
96
- with col3:
97
- fs_small = st.number_input("ํฐํŠธ ํฌ๊ธฐ(์ž‘๊ฒŒ/๋ชฉ๋ก)", 6, 20, 9, 1)
98
- line_gap = st.number_input("์ค„ ๊ฐ„๊ฒฉ(๋ฐฐ์ˆ˜)", 0.8, 2.0, 1.2, 0.1)
99
-
100
- st.caption("๐Ÿ’ก Formtec 3203 ๋น„์Šทํ•œ ์„ค์ • ์˜ˆ์‹œ: ๊ฐ€๋กœ 70, ์„ธ๋กœ 25, ์—ด 3, ํ–‰ 10, ์—ฌ๋ฐฑ 10/10, ๊ฐ„๊ฒฉ 3/3 (ํ”„๋ฆฐํ„ฐ๋งˆ๋‹ค ์•ฝ๊ฐ„ ์กฐ์ •)")
101
-
102
- # -----------------
103
- # ํ…์ŠคํŠธ ์ƒ์„ฑ ํ•จ์ˆ˜
104
- # -----------------
105
- def year_range(series):
106
  s = series.astype(str).fillna("")
107
  v = s[~s.isin(["", "0", "0000"])]
108
- if len(v) == 0:
109
- return "0000-0000"
110
  nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
111
- if len(nums) == 0:
112
- return "0000-0000"
113
  return f"{nums.min():04d}-{nums.max():04d}"
114
 
115
- def build_records(df: pd.DataFrame):
116
  df = df.copy()
117
  df["๋ฐ•์Šค๋ฒˆํ˜ธ"] = df["๋ฐ•์Šค๋ฒˆํ˜ธ"].astype(str).str.zfill(4)
118
- # ์ƒ์‚ฐ์—ฐ๋„(๋ฒ”์œ„)
 
 
 
119
  if "์ข…๋ฃŒ์—ฐ๋„" in df.columns:
120
- yr = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ")["์ข…๋ฃŒ์—ฐ๋„"].apply(year_range).reset_index()
121
  yr.columns = ["๋ฐ•์Šค๋ฒˆํ˜ธ", "์ƒ์‚ฐ์—ฐ๋„"]
122
  else:
123
  yr = pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": df["๋ฐ•์Šค๋ฒˆํ˜ธ"].unique(), "์ƒ์‚ฐ์—ฐ๋„": "0000-0000"})
124
- # ๋ชฉ๋ก
 
125
  has_mgmt = "๊ด€๋ฆฌ๋ฒˆํ˜ธ" in df.columns
126
- list_rows = []
127
- for box, g in df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ"):
128
  lines = [f"- {r['๊ด€๋ฆฌ๋ฒˆํ˜ธ']} {r.get('์ œ๋ชฉ','')}" if has_mgmt else f"- {r.get('์ œ๋ชฉ','')}"
129
  for _, r in g.iterrows()]
130
- list_rows.append({"๋ฐ•์Šค๋ฒˆํ˜ธ": box, "๋ชฉ๋ก": "\n".join(lines)})
131
- list_df = pd.DataFrame(list_rows)
 
132
  # ๋Œ€ํ‘œ ๋ฉ”ํƒ€
133
- cols = ["๋ฐ•์Šค๋ฒˆํ˜ธ","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","์ œ๋ชฉ"]
134
- meta_exist = [c for c in cols if c in df.columns]
135
- meta = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ", as_index=False).first()[meta_exist] if meta_exist else pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": df["๋ฐ•์Šค๋ฒˆํ˜ธ"].unique()})
 
 
136
  merged = meta.merge(list_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left").merge(yr, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left")
137
- return merged.sort_values("๋ฐ•์Šค๋ฒˆํ˜ธ").to_dict(orient="records")
138
-
139
- def draw_label(c: canvas.Canvas, x, y, w, h, rec, font_name, fs_big, fs_mid, fs_small, line_gap):
140
- """
141
- ์ขŒํ‘œ๊ณ„: reportlab์€ ์ขŒํ•˜๋‹จ์ด ์›์ .
142
- x,y = ๋ผ๋ฒจ ์ขŒํ•˜๋‹จ. w,h = ๋ผ๋ฒจ ํฌ๊ธฐ.
143
- """
144
- # ์—ฌ๋ฐฑ
145
- inner_x = x + pad_x * mm
146
- inner_y = y + pad_y * mm
147
- inner_w = w - 2 * pad_x * mm
148
- inner_h = h - 2 * pad_y * mm
149
-
150
- # ์ƒ๋‹จ ๊ตต์€ ์ค„: ๋ฐ•์Šค๋ฒˆํ˜ธ
151
- c.setFont(font_name, fs_big)
152
- boxno = rec.get("๋ฐ•์Šค๋ฒˆํ˜ธ", "")
153
- c.drawString(inner_x, inner_y + inner_h - fs_big*1.1, f"{boxno}")
154
-
155
- # 2ํ–‰: (์ƒ์‚ฐ์—ฐ๋„/๋ณด์กด๊ธฐ๊ฐ„)
156
- c.setFont(font_name, fs_mid)
157
- prod = rec.get("์ƒ์‚ฐ์—ฐ๋„","")
158
- keep = rec.get("๋ณด์กด๊ธฐ๊ฐ„","") or ""
159
- line_y = inner_y + inner_h - fs_big*1.1 - fs_mid*1.5
160
- c.drawString(inner_x, line_y, f"{prod} {keep}")
161
-
162
- # 3ํ–‰: ๋‹จ์œ„์—…๋ฌด / ๊ธฐ๋ก๋ฌผ์ฒ  (์žˆ์œผ๋ฉด)
163
- line_y -= fs_mid * 1.2
164
- unit = rec.get("๋‹จ์œ„์—…๋ฌด","") or ""
165
- series = rec.get("๊ธฐ๋ก๋ฌผ์ฒ ","") or ""
166
- if unit or series:
167
- c.setFont(font_name, fs_mid)
168
- c.drawString(inner_x, line_y, f"{unit} {series}")
169
- line_y -= fs_mid * 1.0
170
-
171
- # ๋ชฉ๋ก(์—ฌ๋Ÿฌ ์ค„, ์ž‘์€ ๊ธ€์”จ)
172
- c.setFont(font_name, fs_small)
173
- list_text = rec.get("๋ชฉ๋ก","") or ""
174
- for ln in list_text.split("\n"):
175
- if line_y < inner_y + fs_small * 1.2: # ๋ผ๋ฒจ ํ•˜๋‹จ ๋„˜์–ด๊ฐ€๋ฉด ์ค‘๋‹จ
176
- break
177
- c.drawString(inner_x, line_y, ln)
178
- line_y -= fs_small * line_gap
179
-
180
- def make_pdf(records):
181
- buffer = BytesIO()
182
- if page_size == "A4":
183
- pw, ph = A4
184
- else:
185
- pw, ph = A4
186
-
187
- c = canvas.Canvas(buffer, pagesize=(pw, ph))
188
- c.setAuthor("BoxLabel")
189
- c.setTitle("Box Labels")
190
-
191
- pdfmetrics.getFont(font_name) # ensure registered
192
-
193
- # ์ขŒํ‘œ/ํฌ๊ธฐ(mm โ†’ pt)
194
- L = margin_left * mm
195
- T = margin_top * mm
196
- W = label_w * mm
197
- H = label_h * mm
198
- GX = gap_x * mm
199
- GY = gap_y * mm
200
-
201
- per_page = int(rows * cols)
202
- total_pages = math.ceil(len(records) / per_page) if records else 1
203
-
204
- idx = 0
205
- for p in range(total_pages):
206
- for r in range(int(rows)):
207
- for ccol in range(int(cols)):
208
- if idx >= len(records):
209
- break
210
- # ์ขŒํ‘œ ๊ณ„์‚ฐ (์ขŒํ•˜๋‹จ ์›์ ์ด๋ฏ€๋กœ ์ƒ๋‹จ์—์„œ ๋‚ด๋ ค์˜ค๊ฒŒ Y๋ฅผ ์กฐ์ •)
211
- x = L + ccol * (W + GX)
212
- y_top = ph - T - r * (H + GY)
213
- y = y_top - H
214
- draw_label(c, x, y, W, H, records[idx], font_name, fs_big, fs_mid, fs_small, line_gap)
215
- idx += 1
216
- if idx >= len(records):
217
- break
218
- c.showPage()
219
- c.save()
220
- buffer.seek(0)
221
- return buffer
222
-
223
- # -----------------
224
- # ๋ฉ”์ธ ๋™์ž‘
225
- # -----------------
226
- if df is not None:
227
- # ๋ฏธ๋ฆฌ๋ณด๊ธฐ
228
- st.subheader("๐Ÿ“‹ ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
229
- st.dataframe(df.head(10), use_container_width=True)
 
 
 
 
230
 
231
- records = build_records(df)
232
- st.write(f"์ด **{len(records)}**๊ฐœ ๋ฐ•์Šค๊ฐ€ ๊ฐ์ง€๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
233
- default_sel = [r["๋ฐ•์Šค๋ฒˆํ˜ธ"] for r in records]
234
- sel = st.multiselect("์ƒ์„ฑํ•  ๋ฐ•์Šค๋ฒˆํ˜ธ ์„ ํƒ (๋น„์šฐ๋ฉด ์ „์ฒด)", options=default_sel)
235
- if sel:
236
- records = [r for r in records if r["๋ฐ•์Šค๋ฒˆํ˜ธ"] in set(sel)]
 
 
 
 
237
 
238
- if st.button("๐Ÿš€ PDF ์ƒ์„ฑ"):
239
- pdf = make_pdf(records)
240
- st.download_button("โฌ‡๏ธ PDF ๋‹ค์šด๋กœ๋“œ", data=pdf.getvalue(), file_name="box_labels.pdf", mime="application/pdf")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import io, zipfile, re, html, json
4
+ from typing import Dict, Tuple, Optional
5
+
6
+ st.set_page_config(page_title="๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ(HWPX) โ€” ํ•„๋“œ ์™„์ „ ์น˜ํ™˜", layout="wide")
7
+ st.title("๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ ์ž๋™ ์ƒ์„ฑ๊ธฐ โ€” HWPX ํ•„๋“œ ์™„์ „ ์น˜ํ™˜(๋ชจ๋“  XML / ์ ‘๋‘์–ด ์™€์ผ๋“œ์นด๋“œ)")
8
+
9
+ # ================== ๋ฐ์ดํ„ฐ ์œ ํ‹ธ ==================
10
+ def _year_range(series: pd.Series) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  s = series.astype(str).fillna("")
12
  v = s[~s.isin(["", "0", "0000"])]
13
+ if v.empty: return "0000-0000"
 
14
  nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
15
+ if nums.empty: return "0000-0000"
 
16
  return f"{nums.min():04d}-{nums.max():04d}"
17
 
18
+ def build_rows(df: pd.DataFrame) -> pd.DataFrame:
19
  df = df.copy()
20
  df["๋ฐ•์Šค๋ฒˆํ˜ธ"] = df["๋ฐ•์Šค๋ฒˆํ˜ธ"].astype(str).str.zfill(4)
21
+ if "์ œ๋ชฉ" in df.columns:
22
+ df["์ œ๋ชฉ"] = df["์ œ๋ชฉ"].astype(str)
23
+
24
+ # ์ƒ์‚ฐ์—ฐ๋„(๋ฒ”์œ„) = ์ข…๋ฃŒ์—ฐ๋„ ๊ทธ๋ฃน ๋ฒ”์œ„
25
  if "์ข…๋ฃŒ์—ฐ๋„" in df.columns:
26
+ yr = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ")["์ข…๋ฃŒ์—ฐ๋„"].apply(_year_range).reset_index()
27
  yr.columns = ["๋ฐ•์Šค๋ฒˆํ˜ธ", "์ƒ์‚ฐ์—ฐ๋„"]
28
  else:
29
  yr = pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": df["๋ฐ•์Šค๋ฒˆํ˜ธ"].unique(), "์ƒ์‚ฐ์—ฐ๋„": "0000-0000"})
30
+
31
+ # ๋ชฉ๋ก(๊ด€๋ฆฌ๋ฒˆํ˜ธ + ์ œ๋ชฉ)
32
  has_mgmt = "๊ด€๋ฆฌ๋ฒˆํ˜ธ" in df.columns
33
+ lists = []
34
+ for b, g in df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ"):
35
  lines = [f"- {r['๊ด€๋ฆฌ๋ฒˆํ˜ธ']} {r.get('์ œ๋ชฉ','')}" if has_mgmt else f"- {r.get('์ œ๋ชฉ','')}"
36
  for _, r in g.iterrows()]
37
+ lists.append({"๋ฐ•์Šค๋ฒˆํ˜ธ": b, "๋ชฉ๋ก": "\r\n".join(lines)})
38
+ list_df = pd.DataFrame(lists)
39
+
40
  # ๋Œ€ํ‘œ ๋ฉ”ํƒ€
41
+ meta_cols = ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","์ œ๋ชฉ"]
42
+ meta_exist = [c for c in meta_cols if c in df.columns]
43
+ meta = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ", as_index=False).first()[meta_exist] if meta_exist \
44
+ else pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": df["๋ฐ•์Šค๋ฒˆํ˜ธ"].unique()})
45
+
46
  merged = meta.merge(list_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left").merge(yr, on="๋ฐ•์Šค๋ฒˆํ˜ธ", how="left")
47
+ return merged
48
+
49
+ # ================== ์น˜ํ™˜ ์œ ํ‹ธ ==================
50
+ # ์ ‘๋‘์–ด ์™€์ผ๋“œ์นด๋“œ: <hp:..> ๋ฟ ์•„๋‹ˆ๋ผ <hwp:..>, <h:..> ๋“ฑ ๋ชจ๋‘ ํ—ˆ์šฉ
51
+ FIELD_PAIR_RE_TMPL = (
52
+ r'<(?P<prefix>[a-zA-Z0-9_]+):fieldBegin\b[^>]*\bname="{name}"[^>]*/>'
53
+ r'(.*?)'
54
+ r'<(?P=prefix):fieldEnd\b[^>]*/>'
55
+ )
56
+
57
+ # ํ† ํฐ(๋ฐฑ์—… ๊ฒฝ๋กœ)
58
+ TOKEN_FMT = "{{{{{key}}}}}"
59
+
60
+ def _run_for_plain(text: str) -> str:
61
+ return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
62
+
63
+ def _run_for_list(text: str) -> str:
64
+ if text is None: return ""
65
+ lines = str(text).replace("\r\n","\n").split("\n")
66
+ parts = []
67
+ for i, ln in enumerate(lines):
68
+ if i>0: parts.append("<hp:lineBreak/>")
69
+ parts.append(f"<hp:run><hp:t>{html.escape(ln)}</hp:t></hp:run>")
70
+ return "".join(parts)
71
+
72
+ def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
73
+ changed_any = False
74
+
75
+ # 1) ํ•„๋“œ์Œ ์™„์ „ ์น˜ํ™˜ (๋ชจ๋“  ์ ‘๋‘์–ด, ๋ชจ๋“  XML ๋Œ€์ƒ)
76
+ for k, v in mapping.items():
77
+ is_list = bool(re.match(r"^(๋ชฉ๋ก|list)\d+$", k, re.IGNORECASE))
78
+ replacement = _run_for_list(v) if is_list else _run_for_plain(v)
79
+
80
+ pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(k)), re.DOTALL)
81
+ xml_new, n = pat.subn(replacement, xml)
82
+ if n:
83
+ dbg["field_hits"][k] = dbg["field_hits"].get(k, 0) + n
84
+ xml = xml_new
85
+ changed_any = True
86
+
87
+ # 2) ๋ฐฑ์—… ๊ฒฝ๋กœ: ํ† ํฐ ์น˜ํ™˜ ({{ํ‚ค}}๊ฐ€ ์–ด๋”˜๊ฐ€ ๋‚จ์•„์žˆ๋‹ค๋ฉด)
88
+ for k, v in mapping.items():
89
+ tok = TOKEN_FMT.format(key=k)
90
+ if tok in xml:
91
+ rep = _run_for_list(v) if re.match(r"^(๋ชฉ๋ก|list)\d+$", k, re.IGNORECASE) else html.escape("" if v is None else str(v))
92
+ xml = xml.replace(tok, rep)
93
+ dbg["token_hits"][k] = dbg["token_hits"].get(k, 0) + 1
94
+ changed_any = True
95
+
96
+ if changed_any:
97
+ dbg["files_touched"] = True
98
+ return xml
99
+
100
+ def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, dict]:
101
+ dbg = {"field_hits":{}, "token_hits":{}, "touched_files": []}
102
+ zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
103
+ out_buf = io.BytesIO()
104
+ zout = zipfile.ZipFile(out_buf, "w")
105
+
106
+ # mimetype ๋ฌด์••์ถ• + ๋งจ์•ž
107
+ names = zin.namelist()
108
+ if "mimetype" in names:
109
+ zi = zipfile.ZipInfo("mimetype"); zi.compress_type = zipfile.ZIP_STORED
110
+ zout.writestr(zi, zin.read("mimetype"))
111
+
112
+ for e in zin.infolist():
113
+ if e.filename == "mimetype":
114
+ continue
115
+ data = zin.read(e.filename)
116
+ if e.filename.lower().endswith(".xml"):
117
+ try:
118
+ s = data.decode("utf-8", errors="ignore")
119
+ before = s
120
+ s = _apply_to_xml(s, mapping, {"field_hits": dbg["field_hits"], "token_hits": dbg["token_hits"], "files_touched": False})
121
+ if s != before:
122
+ dbg["touched_files"].append(e.filename)
123
+ data = s.encode("utf-8")
124
+ except Exception:
125
+ pass
126
+ zi = zipfile.ZipInfo(e.filename); zi.compress_type = zipfile.ZIP_DEFLATED
127
+ zout.writestr(zi, data)
128
+
129
+ zout.close(); out_buf.seek(0); zin.close()
130
+ return out_buf.getvalue(), dbg
131
+
132
+ # ================== UI ==================
133
+ with st.expander("์‚ฌ์šฉ๋ฒ•", expanded=True):
134
+ st.markdown("""
135
+ - ํ…œํ”Œ๋ฆฟ์€ **HWPX(ํ•œ๊ธ€)**์ด๋ฉฐ, ํ•„๋“œ์ปจํŠธ๋กค `name="๋ฐ•์Šค๋ฒˆํ˜ธ1"` ๋“ฑ์˜ ์ด๋ฆ„์„ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
136
+ - ์ด ์•ฑ์€ ZIP ๋‚ด๋ถ€์˜ **๋ชจ๋“  XML**์„ ํ›‘์œผ๋ฉฐ, ์ ‘๋‘์–ด๊ฐ€ ๋ฌด์—‡์ด๋“ (`<hp:...>`, `<hwp:...>` ๋“ฑ)
137
+ **`fieldBegin(name=ํ‚ค)` ~ `fieldEnd` ์‚ฌ์ด ์ „์ฒด ๋ธ”๋ก์„ ๊ฐ’ run๋“ค๋กœ ๊ต์ฒด**ํ•ฉ๋‹ˆ๋‹ค.
138
+ - ํ† ํฐ(`{{๋ฐ•์Šค๋ฒˆํ˜ธ1}}`)์ด ๋‚จ์•„ ์žˆ์œผ๋ฉด **๋ฐฑ์—… ๊ฒฝ๋กœ**๋กœ ๊ทธ ์ž๋ฆฌ๋„ ์น˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
139
+ """)
140
+
141
+ tpl = st.file_uploader("๐Ÿ“„ HWPX ํ…œํ”Œ๋ฆฟ ์—…๋กœ๋“œ", type=["hwpx"])
142
+ n_per_page = st.number_input("ํ…œํ”Œ๋ฆฟ์˜ ๋ผ๋ฒจ ์„ธํŠธ ๊ฐœ์ˆ˜(ํ•œ ํŽ˜์ด์ง€ N๊ฐœ)", 1, 12, 3, 1)
143
+ data = st.file_uploader("๐Ÿ“Š ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ (Excel/CSV)", type=["xlsx","xls","csv"])
144
 
145
+ if tpl and data:
146
+ tpl_bytes = tpl.read()
147
+ df = pd.read_csv(data) if data.name.lower().endswith(".csv") else pd.read_excel(data)
148
+
149
+ if "๋ฐ•์Šค๋ฒˆํ˜ธ" not in df.columns:
150
+ st.error("โŒ ํ•„์ˆ˜ ์ปฌ๋Ÿผ '๋ฐ•์Šค๋ฒˆํ˜ธ'๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
151
+ st.stop()
152
+
153
+ st.success("โœ… ์œ„์น˜ ๋งคํ•‘ ์™„๋ฃŒ (์—‘์…€ ์ธก)")
154
+ st.dataframe(df.head(10), use_container_width=True)
155
 
156
+ merged = build_rows(df)
157
+ boxes = merged["๋ฐ•์Šค๋ฒˆํ˜ธ"].astype(str).str.zfill(4).unique().tolist()
158
+
159
+ st.subheader("๐Ÿ”Ž ์—…๋กœ๋“œ๋œ ๋ฐ•์Šค๋ฒˆํ˜ธ ๋ชฉ๋ก")
160
+ st.write(f"์ด **{len(boxes)}**๊ฐœ")
161
+ st.dataframe(pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": boxes}), use_container_width=True, height=240)
162
+
163
+ sel = st.multiselect("์ƒ์„ฑํ•  ๋ฐ•์Šค๋ฒˆํ˜ธ ์„ ํƒ (๋น„์šฐ๋ฉด ์ „์ฒด)", options=boxes)
164
+ work = merged[merged["๋ฐ•์Šค๋ฒˆํ˜ธ"].isin(sel)] if sel else merged
165
+ records = work.sort_values("๋ฐ•์Šค๋ฒˆํ˜ธ").to_dict(orient="records")
166
+
167
+ # 1ํŽ˜์ด์ง€ ๋งคํ•‘ ํ”„๋ฆฌ๋ทฐ
168
+ st.subheader("๐Ÿงช 1ํŽ˜์ด์ง€ ๋งคํ•‘ ํ”„๋ฆฌ๋ทฐ")
169
+ keys = ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๋ชฉ๋ก"]
170
+ mapping_preview = {}
171
+ for i in range(int(n_per_page)):
172
+ if i < len(records):
173
+ r = records[i]
174
+ for k in keys:
175
+ mapping_preview[f"{k}{i+1}"] = r.get("์ƒ์‚ฐ์—ฐ๋„","") if k=="์ข…๋ฃŒ์—ฐ๋„" else r.get(k,"")
176
+ else:
177
+ for k in keys:
178
+ mapping_preview[f"{k}{i+1}"] = ""
179
+ st.dataframe(pd.DataFrame([{"ํ‚ค":k, "๊ฐ’ ์•ž๋ถ€๋ถ„":str(v)[:120]} for k,v in sorted(mapping_preview.items())]),
180
+ use_container_width=True, height=320)
181
+
182
+ if st.button("๐Ÿš€ ๋ผ๋ฒจ ์ƒ์„ฑ (ํŽ˜์ด์ง€๋ณ„ HWPX ZIP)"):
183
+ mem = io.BytesIO(); zout = zipfile.ZipFile(mem, "w", zipfile.ZIP_DEFLATED)
184
+ pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
185
+ debug_all = []
186
+
187
+ for p in range(pages):
188
+ chunk = records[p*int(n_per_page):(p+1)*int(n_per_page)]
189
+ # ๋งคํ•‘ ๊ตฌ์ถ•
190
+ mapping = {}
191
+ for i in range(int(n_per_page)):
192
+ if i < len(chunk):
193
+ r = chunk[i]
194
+ mapping[f"๋ฐ•์Šค๋ฒˆํ˜ธ{i+1}"] = r.get("๋ฐ•์Šค๋ฒˆํ˜ธ","")
195
+ mapping[f"์ข…๋ฃŒ์—ฐ๋„{i+1}"] = r.get("์ƒ์‚ฐ์—ฐ๋„","")
196
+ mapping[f"๋ณด์กด๊ธฐ๊ฐ„{i+1}"] = r.get("๋ณด์กด๊ธฐ๊ฐ„","")
197
+ mapping[f"๋‹จ์œ„์—…๋ฌด{i+1}"] = r.get("๋‹จ์œ„์—…๋ฌด","")
198
+ mapping[f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}"] = r.get("๊ธฐ๋ก๋ฌผ์ฒ ","")
199
+ mapping[f"๋ชฉ๋ก{i+1}"] = r.get("๋ชฉ๋ก","")
200
+ else:
201
+ for k in keys:
202
+ mapping[f"{k}{i+1}"] = ""
203
+
204
+ out_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
205
+ debug_all.append({"page": p+1, "stats": dbg})
206
+ name = "_".join([r.get("๋ฐ•์Šค๋ฒˆํ˜ธ","") for r in chunk]) if chunk else f"empty_{p+1}"
207
+ zout.writestr(f"label_{name}.hwpx", out_hwpx)
208
+
209
+ zout.close(); mem.seek(0)
210
+ st.download_button("โฌ‡๏ธ ZIP ๋‹ค์šด๋กœ๋“œ", data=mem, file_name="labels_by_page.zip", mime="application/zip")
211
+ st.download_button("โฌ‡๏ธ ๋””๋ฒ„๊ทธ(JSON)", data=json.dumps(debug_all, ensure_ascii=False, indent=2),
212
+ file_name="debug.json", mime="application/json")