dohyune commited on
Commit
45cb3a7
ยท
verified ยท
1 Parent(s): 94d047b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +307 -210
app.py CHANGED
@@ -1,210 +1,307 @@
1
- import io, os, zipfile, re
2
- import streamlit as st
3
- import pandas as pd
4
- from reportlab.pdfgen import canvas
5
- from reportlab.lib.pagesizes import A4
6
- from reportlab.lib.units import mm
7
-
8
- def replace_placeholders_in_hwpx(hwpx_bytes: bytes, mapping: dict) -> bytes:
9
- mem_in = io.BytesIO(hwpx_bytes)
10
- mem_out = io.BytesIO()
11
- zin = zipfile.ZipFile(mem_in, "r")
12
- zout = zipfile.ZipFile(mem_out, "w", zipfile.ZIP_DEFLATED)
13
- alias = {
14
- "box2": "๋ฐ•์Šค๋ฒˆํ˜ธ2","year2": "์ข…๋ฃŒ์—ฐ๋„2","duration2": "๋ณด์กด๊ธฐ๊ฐ„2","task2": "๋‹จ์œ„์—…๋ฌด2","file2": "๊ธฐ๋ก๋ฌผ์ฒ 2","list2": "๋ชฉ๋ก2",
15
- "box3": "๋ฐ•์Šค๋ฒˆํ˜ธ3","year3": "์ข…๋ฃŒ์—ฐ๋„3","duration3": "๋ณด์กด๊ธฐ๊ฐ„3","task3": "๋‹จ์œ„์—…๋ฌด3","file3": "๊ธฐ๋ก๋ฌผ์ฒ 3","list3": "๋ชฉ๋ก3",
16
- }
17
- expanded_map = dict(mapping)
18
- for k, v in list(mapping.items()):
19
- for a, tgt in alias.items():
20
- if k == tgt and a not in expanded_map:
21
- expanded_map[a] = v
22
- def repl_xml(txt: str, kv: dict) -> str:
23
- any_token = any(("{{"+k+"}}" in txt) for k in kv.keys())
24
- if any_token:
25
- for k, v in kv.items():
26
- txt = txt.replace(f"{{{{{k}}}}}", "" if v is None else str(v))
27
- return txt
28
- for k, v in kv.items():
29
- pattern = re.compile(rf'(<hp:fieldBegin[^>]*name="{re.escape(k)}"[^>]*>.*?</hp:fieldBegin>)(.*?)(<hp:fieldEnd[^>]*/>)', re.DOTALL)
30
- def _repl(m):
31
- val = "" if v is None else str(v)
32
- return f'{m.group(1)}<hp:run><hp:t>{val}</hp:t></hp:run>{m.group(3)}'
33
- txt, _ = pattern.subn(_repl, txt, count=20)
34
- return txt
35
- for item in zin.infolist():
36
- data = zin.read(item.filename)
37
- if item.filename.startswith("Contents/") and item.filename.endswith(".xml"):
38
- try:
39
- txt = data.decode("utf-8", errors="ignore")
40
- txt = repl_xml(txt, expanded_map)
41
- data = txt.encode("utf-8")
42
- except Exception:
43
- pass
44
- zout.writestr(item, data)
45
- zin.close()
46
- zout.close()
47
- mem_out.seek(0)
48
- return mem_out.getvalue()
49
-
50
- LABEL_W, LABEL_H = 89*mm, 38*mm
51
- MARGIN_L, MARGIN_T = 10*mm, 10*mm
52
- COL_GAP, ROW_GAP = 5*mm, 5*mm
53
- COLS, ROWS = 2, 5
54
-
55
- def draw_label(c, x, y, row_dict):
56
- c.setFont("Helvetica", 9)
57
- lines = [
58
- f"๋ฐ•์Šค๋ฒˆํ˜ธ: {row_dict.get('๋ฐ•์Šค๋ฒˆํ˜ธ','')}",
59
- f"์ข…๋ฃŒ์—ฐ๋„(๋ฒ”์œ„): {row_dict.get('์ƒ์‚ฐ์—ฐ๋„','')}",
60
- f"๋ณด์กด๊ธฐ๊ฐ„: {row_dict.get('๋ณด์กด๊ธฐ๊ฐ„','')}",
61
- f"๋‹จ์œ„์—…๋ฌด: {row_dict.get('๋‹จ์œ„์—…๋ฌด','')}",
62
- f"๊ธฐ๋ก๋ฌผ์ฒ : {row_dict.get('๊ธฐ๋ก๋ฌผ์ฒ ','')}",
63
- f"๋ชฉ๋ก(์š”์•ฝ): {str(row_dict.get('๋ชฉ๋ก',''))[:40]}",
64
- ]
65
- ty = y + LABEL_H - 8*mm
66
- for ln in lines:
67
- c.drawString(x + 5*mm, ty, ln)
68
- ty -= 5*mm
69
- c.rect(x, y, LABEL_W, LABEL_H)
70
-
71
- def make_preview_pdf(rows: list) -> bytes:
72
- buf = io.BytesIO()
73
- c = canvas.Canvas(buf, pagesize=A4)
74
- page_i, i = 0, 0
75
- while i < len(rows):
76
- c.setFont("Helvetica-Bold", 10)
77
- c.drawString(15*mm, 287*mm, f"Formtec 3203 Preview โ€” Page {page_i+1}")
78
- for r in range(ROWS):
79
- for col in range(COLS):
80
- if i >= len(rows): break
81
- x = MARGIN_L + col*(LABEL_W + COL_GAP)
82
- y = (A4[1] - MARGIN_T - LABEL_H) - r*(LABEL_H + ROW_GAP)
83
- draw_label(c, x, y, rows[i])
84
- i += 1
85
- c.showPage()
86
- page_i += 1
87
- c.save()
88
- buf.seek(0)
89
- return buf.read()
90
-
91
- def compute_year_range(series: pd.Series):
92
- s = series.astype(str).fillna("")
93
- valid = s[~s.isin(["", "0", "0000"])]
94
- if len(valid) == 0:
95
- return "0000-0000"
96
- valid_int = pd.to_numeric(valid, errors="coerce").dropna().astype(int)
97
- if len(valid_int) == 0:
98
- return "0000-0000"
99
- return f"{valid_int.min():04d}-{valid_int.max():04d}"
100
-
101
- def build_merged_df(df: pd.DataFrame) -> pd.DataFrame:
102
- df = df.copy()
103
- if "์ œ๋ชฉ" in df.columns:
104
- df["์ œ๋ชฉ"] = df["์ œ๋ชฉ"].astype(str)
105
- if "๋ฐ•์Šค๋ฒˆํ˜ธ" in df.columns:
106
- df["๋ฐ•์Šค๋ฒˆํ˜ธ"] = df["๋ฐ•์Šค๋ฒˆํ˜ธ"].astype(str).str.zfill(4)
107
- if "์ข…๋ฃŒ์—ฐ๋„" in df.columns:
108
- prod_df = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ")["์ข…๋ฃŒ์—ฐ๋„"].apply(compute_year_range).reset_index()
109
- prod_df.columns = ["๋ฐ•์Šค๋ฒˆํ˜ธ", "์ƒ์‚ฐ์—ฐ๋„"]
110
- else:
111
- prod_df = pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": df["๋ฐ•์Šค๋ฒˆํ˜ธ"].unique(), "์ƒ์‚ฐ์—ฐ๋„": "0000-0000"})
112
- has_mgmt = "๊ด€๋ฆฌ๋ฒˆํ˜ธ" in df.columns
113
- list_data = []
114
- for box_num, group in df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ"):
115
- if has_mgmt:
116
- lines = [f"- {row['๊ด€๋ฆฌ๋ฒˆํ˜ธ']} {row['์ œ๋ชฉ']}" for _, row in group.iterrows()]
117
- else:
118
- lines = [f"- {row['์ œ๋ชฉ']}" for _, row in group.iterrows()]
119
- list_data.append({"๋ฐ•์Šค๋ฒˆํ˜ธ": box_num, "๋ชฉ๋ก": "\r\n".join(lines)})
120
- list_df = pd.DataFrame(list_data)
121
- meta_cols = ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","์ œ๋ชฉ"]
122
- meta_exist = [c for c in meta_cols if c in df.columns]
123
- meta_df = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ", as_index=False).first()[meta_exist]
124
- merged = meta_df.merge(list_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ").merge(prod_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ")
125
- return merged
126
-
127
- st.set_page_config(page_title="๋ฐ•์Šค๋ผ๋ฒจ(.HWPX) ์ƒ์„ฑ๊ธฐ โ€” Hugging Face", layout="wide")
128
- st.title("๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ ์ž๋™ ์ƒ์„ฑ๊ธฐ (.HWPX ํ…œํ”Œ๋ฆฟ)")
129
-
130
- with st.expander("์‚ฌ์šฉ ๋ฐฉ๋ฒ•", expanded=True):
131
- st.markdown("""
132
- `.hwpx` ํ…œํ”Œ๋ฆฟ์„ ์—…๋กœ๋“œํ•˜๊ณ , ์—‘์…€/CSV ๋ฐ์ดํ„ฐ๋ฅผ ์—ฐ๊ฒฐํ•ด ๋ผ๋ฒจ์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค. ํ…œํ”Œ๋ฆฟ์—๋Š” `๋ฐ•์Šค๋ฒˆํ˜ธยท์ข…๋ฃŒ์—ฐ๋„ยท๋ณด์กด๊ธฐ๊ฐ„ยท๋‹จ์œ„์—…๋ฌดยท๊ธฐ๋ก๋ฌผ์ฒ ยท๋ชฉ๋ก` ํ•„๋“œ๊ฐ€ ์„ธํŠธ๋ณ„๋กœ ์กด์žฌํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค(์˜ˆ: 1~3์„ธํŠธ).
133
- """)
134
-
135
- tpl_file = st.file_uploader("HWPX ํ…œํ”Œ๋ฆฟ ์—…๋กœ๋“œ", type=["hwpx"])
136
- data_file = st.file_uploader("๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ (Excel/CSV)", type=["xlsx","xls","csv"])
137
- batch_size = st.number_input("ํ…œํ”Œ๋ฆฟ์˜ ๋ผ๋ฒจ ์„ธํŠธ ๊ฐœ์ˆ˜", min_value=1, max_value=12, value=3, step=1)
138
- preview_pdf = st.checkbox("PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋„ ์ƒ์„ฑ", value=False)
139
-
140
- default_fields = ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๋ชฉ๋ก"]
141
-
142
- if data_file:
143
- if data_file.name.lower().endswith(".csv"):
144
- df = pd.read_csv(data_file)
145
- else:
146
- df = pd.read_excel(data_file)
147
- st.write("๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
148
- st.dataframe(df.head(10), use_container_width=True)
149
- st.subheader("์ปฌ๋Ÿผ ๋งคํ•‘ (์—‘์…€/CSV โ†’ ๋ผ๋ฒจ ํ•„๋“œ)")
150
- col_map = {}
151
- for f in ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๊ด€๋ฆฌ๋ฒˆํ˜ธ","์ œ๋ชฉ"]:
152
- options = ["(์—†์Œ)"] + list(df.columns)
153
- default_idx = options.index(f) if f in df.columns else 0
154
- col_map[f] = st.selectbox(f"{f} โ†’", options, index=default_idx, key=f"map_{f}")
155
- renames = {v: k for k, v in col_map.items() if v != "(์—†์Œ)"}
156
- df_std = df.rename(columns=renames)
157
- if "๋ฐ•์Šค๋ฒˆํ˜ธ" not in df_std.columns:
158
- st.error("ํ•„์ˆ˜ ์ปฌ๋Ÿผ '๋ฐ•์Šค๋ฒˆํ˜ธ'๊ฐ€ ๋งคํ•‘๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
159
- else:
160
- merged_df = build_merged_df(df_std)
161
- st.text_input("์ถœ๋ ฅํ•  ๋ฐ•์Šค๋ฒˆํ˜ธ (์‰ผํ‘œ ๊ตฌ๋ถ„, ๋น„์šฐ๋ฉด ์ „์ฒด)", key="sel_boxes")
162
- run = st.button("๋ผ๋ฒจ ์ƒ์„ฑ (HWPX ZIP)")
163
- if run:
164
- if not tpl_file:
165
- st.error("ํ…œํ”Œ๋ฆฟ(.hwpx)์„ ์—…๋กœ๋“œํ•ด ์ฃผ์„ธ์š”.")
166
- else:
167
- tpl_bytes = tpl_file.read()
168
- user_input = st.session_state.get("sel_boxes", "").strip()
169
- work_df = merged_df.copy()
170
- if user_input:
171
- targets = [s.strip().zfill(4) for s in user_input.split(",") if s.strip()]
172
- work_df = work_df[work_df["๋ฐ•์Šค๋ฒˆํ˜ธ"].isin(targets)]
173
- if len(work_df) == 0:
174
- st.warning("์กฐ๊ฑด์— ํ•ด๋‹นํ•˜๋Š” ๋ฐ•์Šค๋ฒˆํ˜ธ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
175
- else:
176
- rows = work_df.to_dict(orient="records")
177
- zip_buf = io.BytesIO()
178
- with zipfile.ZipFile(zip_buf, "w", zipfile.ZIP_DEFLATED) as zout:
179
- num_boxes = len(rows)
180
- num_pages = (num_boxes + batch_size - 1) // batch_size
181
- for i in range(num_pages):
182
- mapping_all = {}
183
- page_rows = []
184
- for j in range(batch_size):
185
- idx = i * batch_size + j
186
- if idx >= num_boxes:
187
- for prefix in default_fields:
188
- mapping_all[f"{prefix}{j+1}"] = ""
189
- continue
190
- r = rows[idx]
191
- page_rows.append(r)
192
- for prefix in default_fields:
193
- if prefix == "์ข…๋ฃŒ์—ฐ๋„":
194
- mapping_all[f"{prefix}{j+1}"] = r.get("์ƒ์‚ฐ์—ฐ๋„","")
195
- else:
196
- mapping_all[f"{prefix}{j+1}"] = r.get(prefix,"")
197
- out_hwpx = replace_placeholders_in_hwpx(tpl_bytes, mapping_all)
198
- page_nums = [str(x.get("๋ฐ•์Šค๋ฒˆํ˜ธ","")) for x in page_rows]
199
- safe_name = "_".join(page_nums) if page_nums else f"empty_{i+1}"
200
- zout.writestr(f"label_{safe_name}.hwpx", out_hwpx)
201
- zip_buf.seek(0)
202
- st.download_button("๐Ÿ“ฅ HWPX ์ผ๊ด„ ๋‹ค์šด๋กœ๋“œ (ZIP)", data=zip_buf, file_name="boxlabels_hwpx.zip", mime="application/zip")
203
- if preview_pdf:
204
- try:
205
- pdf_bytes = make_preview_pdf(rows)
206
- st.download_button("๐Ÿ‘€ PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ ๋‹ค์šด๋กœ๋“œ", data=pdf_bytes, file_name="preview.pdf", mime="application/pdf")
207
- except Exception as e:
208
- st.warning(f"PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ ์ƒ์„ฑ ์‹คํŒจ: {e}")
209
-
210
- st.caption("โ€ป ํ…œํ”Œ๋ฆฟ์€ ๋ฐ˜๋“œ์‹œ .HWPX ํŒŒ์ผ์ด์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. ํ•œ๊ธ€์—์„œ '๋‹ค๋ฅธ ์ด๋ฆ„์œผ๋กœ ์ €์žฅ' โ†’ HWPX.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io, os, zipfile, re
2
+ import streamlit as st
3
+ import pandas as pd
4
+
5
+ # (์„ ํƒ) PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ์šฉ
6
+ from reportlab.pdfgen import canvas
7
+ from reportlab.lib.pagesizes import A4
8
+ from reportlab.lib.units import mm
9
+
10
+ # =========================
11
+ # HWPX ํ…œํ”Œ๋ฆฟ ์น˜ํ™˜ ์œ ํ‹ธ (ํ•„๋“œ์ปจํŠธ๋กค & {{ํ† ํฐ}} ์ง€์›)
12
+ # =========================
13
+ def replace_placeholders_in_hwpx(hwpx_bytes: bytes, mapping: dict) -> bytes:
14
+ mem_in = io.BytesIO(hwpx_bytes)
15
+ mem_out = io.BytesIO()
16
+ zin = zipfile.ZipFile(mem_in, "r")
17
+ zout = zipfile.ZipFile(mem_out, "w", zipfile.ZIP_DEFLATED)
18
+
19
+ # ํ…œํ”Œ๋ฆฟ์— ์žˆ๋Š” ์˜๋ฌธ ๋ณ„์นญ โ†’ ํ•œ๊ธ€ ํ•„๋“œ ๋™์‹œ ์ฃผ์ž…
20
+ alias = {
21
+ # ์„ธํŠธ2
22
+ "box2": "๋ฐ•์Šค๋ฒˆํ˜ธ2","year2": "์ข…๋ฃŒ์—ฐ๋„2","duration2": "๋ณด์กด๊ธฐ๊ฐ„2","task2": "๋‹จ์œ„์—…๋ฌด2","file2": "๊ธฐ๋ก๋ฌผ์ฒ 2","list2": "๋ชฉ๋ก2",
23
+ # ์„ธํŠธ3
24
+ "box3": "๋ฐ•์Šค๋ฒˆํ˜ธ3","year3": "์ข…๋ฃŒ์—ฐ๋„3","duration3": "๋ณด์กด๊ธฐ๊ฐ„3","task3": "๋‹จ์œ„์—…๋ฌด3","file3": "๊ธฐ๋ก๋ฌผ์ฒ 3","list3": "๋ชฉ๋ก3",
25
+ }
26
+ expanded_map = dict(mapping)
27
+ for k, v in list(mapping.items()):
28
+ for a, tgt in alias.items():
29
+ if k == tgt and a not in expanded_map:
30
+ expanded_map[a] = v
31
+
32
+ def repl_xml(txt: str, kv: dict) -> str:
33
+ # 1) {{ํ‚ค}} ์น˜ํ™˜
34
+ any_token = any(("{{"+k+"}}" in txt) for k in kv.keys())
35
+ if any_token:
36
+ for k, v in kv.items():
37
+ txt = txt.replace(f"{{{{{k}}}}}", "" if v is None else str(v))
38
+ return txt
39
+ # 2) ํ•„๋“œ์ปจํŠธ๋กค ๋ณธ๋ฌธ ์น˜ํ™˜
40
+ for k, v in kv.items():
41
+ pattern = re.compile(
42
+ rf'(<hp:fieldBegin[^>]*name="{re.escape(k)}"[^>]*>.*?</hp:fieldBegin>)(.*?)(<hp:fieldEnd[^>]*/>)',
43
+ re.DOTALL
44
+ )
45
+ def _repl(m):
46
+ val = "" if v is None else str(v)
47
+ return f'{m.group(1)}<hp:run><hp:t>{val}</hp:t></hp:run>{m.group(3)}'
48
+ txt, _ = pattern.subn(_repl, txt, count=50)
49
+ return txt
50
+
51
+ for item in zin.infolist():
52
+ data = zin.read(item.filename)
53
+ if item.filename.startswith("Contents/") and item.filename.endswith(".xml"):
54
+ try:
55
+ txt = data.decode("utf-8", errors="ignore")
56
+ txt = repl_xml(txt, expanded_map)
57
+ data = txt.encode("utf-8")
58
+ except Exception:
59
+ pass
60
+ zout.writestr(item, data)
61
+
62
+ zin.close()
63
+ zout.close()
64
+ mem_out.seek(0)
65
+ return mem_out.getvalue()
66
+
67
+ # =========================
68
+ # PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ (ํผํ… 3203 ์˜ˆ์‹œ)
69
+ # =========================
70
+ LABEL_W, LABEL_H = 89*mm, 38*mm
71
+ MARGIN_L, MARGIN_T = 10*mm, 10*mm
72
+ COL_GAP, ROW_GAP = 5*mm, 5*mm
73
+ COLS, ROWS = 2, 5
74
+
75
+ def draw_label(c, x, y, row_dict):
76
+ c.setFont("Helvetica", 9)
77
+ lines = [
78
+ f"๋ฐ•์Šค๋ฒˆํ˜ธ: {row_dict.get('๋ฐ•์Šค๋ฒˆํ˜ธ','')}",
79
+ f"์ข…๋ฃŒ์—ฐ๋„(๋ฒ”์œ„): {row_dict.get('์ƒ์‚ฐ์—ฐ๋„','')}",
80
+ f"๋ณด์กด๊ธฐ๊ฐ„: {row_dict.get('๋ณด์กด๊ธฐ๊ฐ„','')}",
81
+ f"๋‹จ์œ„์—…๋ฌด: {row_dict.get('๋‹จ์œ„์—…๋ฌด','')}",
82
+ f"๊ธฐ๋ก๋ฌผ์ฒ : {row_dict.get('๊ธฐ๋ก๋ฌผ์ฒ ','')}",
83
+ f"๋ชฉ๋ก(์š”์•ฝ): {str(row_dict.get('๋ชฉ๋ก',''))[:40]}",
84
+ ]
85
+ ty = y + LABEL_H - 8*mm
86
+ for ln in lines:
87
+ c.drawString(x + 5*mm, ty, ln)
88
+ ty -= 5*mm
89
+ c.rect(x, y, LABEL_W, LABEL_H)
90
+
91
+ def make_preview_pdf(rows: list) -> bytes:
92
+ buf = io.BytesIO()
93
+ c = canvas.Canvas(buf, pagesize=A4)
94
+ page_i, i = 0, 0
95
+ while i < len(rows):
96
+ c.setFont("Helvetica-Bold", 10)
97
+ c.drawString(15*mm, 287*mm, f"Formtec 3203 Preview โ€” Page {page_i+1}")
98
+ for r in range(ROWS):
99
+ for col in range(COLS):
100
+ if i >= len(rows): break
101
+ x = MARGIN_L + col*(LABEL_W + COL_GAP)
102
+ y = (A4[1] - MARGIN_T - LABEL_H) - r*(LABEL_H + ROW_GAP)
103
+ draw_label(c, x, y, rows[i])
104
+ i += 1
105
+ c.showPage()
106
+ page_i += 1
107
+ c.save()
108
+ buf.seek(0)
109
+ return buf.read()
110
+
111
+ # =========================
112
+ # ์—…๋กœ๋“œ ๋ชจ๋“œ: ์—‘์…€ ๋ฐ์ดํ„ฐ ๋ณ‘ํ•ฉ ๋กœ์ง
113
+ # =========================
114
+ def compute_year_range(series: pd.Series):
115
+ s = series.astype(str).fillna("")
116
+ valid = s[~s.isin(["", "0", "0000"])]
117
+ if len(valid) == 0:
118
+ return "0000-0000"
119
+ valid_int = pd.to_numeric(valid, errors="coerce").dropna().astype(int)
120
+ if len(valid_int) == 0:
121
+ return "0000-0000"
122
+ return f"{valid_int.min():04d}-{valid_int.max():04d}"
123
+
124
+ def build_merged_df(df: pd.DataFrame) -> pd.DataFrame:
125
+ df = df.copy()
126
+ if "์ œ๏ฟฝ๏ฟฝ" in df.columns:
127
+ df["์ œ๋ชฉ"] = df["์ œ๋ชฉ"].astype(str)
128
+ if "๋ฐ•์Šค๋ฒˆํ˜ธ" in df.columns:
129
+ df["๋ฐ•์Šค๋ฒˆํ˜ธ"] = df["๋ฐ•์Šค๋ฒˆํ˜ธ"].astype(str).str.zfill(4)
130
+
131
+ # ์ƒ์‚ฐ์—ฐ๋„(๋ฒ”์œ„) ๊ณ„์‚ฐ
132
+ if "์ข…๋ฃŒ์—ฐ๋„" in df.columns:
133
+ prod_df = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ")["์ข…๋ฃŒ์—ฐ๋„"].apply(compute_year_range).reset_index()
134
+ prod_df.columns = ["๋ฐ•์Šค๋ฒˆํ˜ธ", "์ƒ์‚ฐ์—ฐ๋„"]
135
+ else:
136
+ prod_df = pd.DataFrame({"๋ฐ•์Šค๋ฒˆํ˜ธ": df["๋ฐ•์Šค๋ฒˆํ˜ธ"].unique(), "์ƒ์‚ฐ์—ฐ๋„": "0000-0000"})
137
+
138
+ # ๋ชฉ๋ก(๊ด€๋ฆฌ๋ฒˆํ˜ธ + ์ œ๋ชฉ)
139
+ has_mgmt = "๊ด€๋ฆฌ๋ฒˆํ˜ธ" in df.columns
140
+ list_data = []
141
+ for box_num, group in df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ"):
142
+ if has_mgmt:
143
+ lines = [f"- {row['๊ด€๋ฆฌ๋ฒˆํ˜ธ']} {row['์ œ๋ชฉ']}" for _, row in group.iterrows()]
144
+ else:
145
+ lines = [f"- {row['์ œ๋ชฉ']}" for _, row in group.iterrows()]
146
+ list_data.append({"๋ฐ•์Šค๋ฒˆํ˜ธ": box_num, "๋ชฉ๋ก": "\r\n".join(lines)})
147
+ list_df = pd.DataFrame(list_data)
148
+
149
+ meta_cols = ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","์ œ๋ชฉ"]
150
+ meta_exist = [c for c in meta_cols if c in df.columns]
151
+ meta_df = df.groupby("๋ฐ•์Šค๋ฒˆํ˜ธ", as_index=False).first()[meta_exist]
152
+
153
+ merged = meta_df.merge(list_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ").merge(prod_df, on="๋ฐ•์Šค๋ฒˆํ˜ธ")
154
+ return merged
155
+
156
+ # =========================
157
+ # Streamlit UI
158
+ # =========================
159
+ st.set_page_config(page_title="๋ฐ•์Šค๋ผ๋ฒจ(.HWPX) ์ƒ์„ฑ๊ธฐ โ€” Hugging Face", layout="wide")
160
+ st.title("๐Ÿ“ฆ ๋ฐ•์Šค๋ผ๋ฒจ ์ž๋™ ์ƒ์„ฑ๊ธฐ (.HWPX ํ…œํ”Œ๋ฆฟ)")
161
+
162
+ with st.expander("์‚ฌ์šฉ ๋ฐฉ๋ฒ•", expanded=True):
163
+ st.markdown("""
164
+ - **์–‘์‹(.HWPX ํ…œํ”Œ๋ฆฟ)** ์„ ์—…๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค. (ํ•œ๊ธ€์—์„œ '๋‹ค๋ฅธ ์ด๋ฆ„์œผ๋กœ ์ €์žฅ' โ†’ HWPX)
165
+ - **๋ฐ์ดํ„ฐ ์ž…๋ ฅ ๋ฐฉ์‹**์„ ์„ ํƒํ•ฉ๋‹ˆ๋‹ค.
166
+ 1) **ํŒŒ์ผ ์—…๋กœ๋“œ(์—‘์…€/CSV)**: ๊ธฐ์กด ๋ฐ์ดํ„ฐ๋กœ ์ผ๊ด„ ์ƒ์„ฑ
167
+ 2) **์ง์ ‘ ์ž…๋ ฅ(๋ชฉ๋ก & ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ)**: ํ™”๋ฉด์—์„œ ๋ฐ•์Šค๋ณ„ ์ •๋ณด๋ฅผ ์ˆ˜๋™์œผ๋กœ ์ž…๋ ฅ
168
+ - ํ…œํ”Œ๋ฆฟ์— ์„ธํŠธ๋ณ„ ํ•„๋“œ(์˜ˆ: `๋ฐ•์Šค๋ฒˆํ˜ธ1~3`, `์ข…๋ฃŒ์—ฐ๋„1~3`, `๋ณด์กด๊ธฐ๊ฐ„1~3`, `๋‹จ์œ„์—…๋ฌด1~3`, `๊ธฐ๋ก๋ฌผ์ฒ 1~3`, `๋ชฉ๋ก1~3`)๊ฐ€ ์žˆ์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
169
+ """)
170
+
171
+ tpl_file = st.file_uploader("HWPX ํ…œํ”Œ๋ฆฟ ์—…๋กœ๋“œ", type=["hwpx"])
172
+ batch_size = st.number_input("ํ…œํ”Œ๋ฆฟ์˜ ๋ผ๋ฒจ ์„ธํŠธ ๊ฐœ์ˆ˜", min_value=1, max_value=12, value=3, step=1)
173
+ preview_pdf = st.checkbox("PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋„ ์ƒ์„ฑ", value=False)
174
+
175
+ mode = st.radio("๋ฐ์ดํ„ฐ ์ž…๋ ฅ ๋ฐฉ์‹", ["ํŒŒ์ผ ์—…๋กœ๋“œ(์—‘์…€/CSV)", "์ง์ ‘ ์ž…๋ ฅ(๋ชฉ๋ก & ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ)"], horizontal=True)
176
+ default_fields = ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๋ชฉ๋ก"]
177
+
178
+ rows = None # ์ตœ์ข… ์ƒ์„ฑ์— ์‚ฌ์šฉํ•  row dict ๋ฆฌ์ŠคํŠธ
179
+
180
+ # ----- ๋ชจ๋“œ 1: ํŒŒ์ผ ์—…๋กœ๋“œ -----
181
+ if mode == "ํŒŒ์ผ ์—…๋กœ๋“œ(์—‘์…€/CSV)":
182
+ data_file = st.file_uploader("๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ (Excel/CSV)", type=["xlsx","xls","csv"])
183
+ if data_file:
184
+ if data_file.name.lower().endswith(".csv"):
185
+ df = pd.read_csv(data_file)
186
+ else:
187
+ df = pd.read_excel(data_file)
188
+ st.write("๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
189
+ st.dataframe(df.head(10), use_container_width=True)
190
+
191
+ # ์ปฌ๋Ÿผ ๋งคํ•‘
192
+ st.subheader("์ปฌ๋Ÿผ ๋งคํ•‘ (์—‘์…€/CSV โ†’ ๋ผ๋ฒจ ํ•„๋“œ)")
193
+ col_map = {}
194
+ for f in ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๊ด€๋ฆฌ๋ฒˆํ˜ธ","์ œ๋ชฉ"]:
195
+ options = ["(์—†์Œ)"] + list(df.columns)
196
+ default_idx = options.index(f) if f in df.columns else 0
197
+ col_map[f] = st.selectbox(f"{f} โ†’", options, index=default_idx, key=f"map_{f}")
198
+ renames = {v: k for k, v in col_map.items() if v != "(์—†์Œ)"}
199
+ df_std = df.rename(columns=renames)
200
+
201
+ if "๋ฐ•์Šค๋ฒˆํ˜ธ" not in df_std.columns:
202
+ st.error("ํ•„์ˆ˜ ์ปฌ๋Ÿผ '๋ฐ•์Šค๋ฒˆํ˜ธ'๊ฐ€ ๋งคํ•‘๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
203
+ else:
204
+ merged_df = build_merged_df(df_std)
205
+ st.text_input("์ถœ๋ ฅํ•  ๋ฐ•์Šค๋ฒˆํ˜ธ (์‰ผํ‘œ ๊ตฌ๋ถ„, ๋น„์šฐ๋ฉด ์ „์ฒด)", key="sel_boxes")
206
+ if st.button("๋ผ๋ฒจ ์ƒ์„ฑ (HWPX ZIP)", key="btn_upload"):
207
+ if not tpl_file:
208
+ st.error("ํ…œํ”Œ๋ฆฟ(.hwpx)์„ ์—…๋กœ๋“œํ•ด ์ฃผ์„ธ์š”.")
209
+ else:
210
+ tpl_bytes = tpl_file.read()
211
+ user_input = st.session_state.get("sel_boxes", "").strip()
212
+ work_df = merged_df.copy()
213
+ if user_input:
214
+ targets = [s.strip().zfill(4) for s in user_input.split(",") if s.strip()]
215
+ work_df = work_df[work_df["๋ฐ•์Šค๋ฒˆํ˜ธ"].isin(targets)]
216
+ if len(work_df) == 0:
217
+ st.warning("์กฐ๊ฑด์— ํ•ด๋‹นํ•˜๋Š” ๋ฐ•์Šค๋ฒˆํ˜ธ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
218
+ else:
219
+ rows = work_df.to_dict(orient="records")
220
+
221
+ # ----- ๋ชจ๋“œ 2: ์ง์ ‘ ์ž…๋ ฅ -----
222
+ else:
223
+ st.info("๋ฐ•์Šค๋ณ„ ์ •๋ณด๋ฅผ ์ง์ ‘ ์ž…๋ ฅํ•˜์„ธ์š”. **๋ชฉ๋ก**์€ ์—ฌ๋Ÿฌ ์ค„๋กœ ์ž…๋ ฅํ•˜๋ฉด ์ค„๋ฐ”๊ฟˆ ์œ ์ง€๋ฉ๋‹ˆ๋‹ค.")
224
+ num_boxes = st.number_input("๋ฐ•์Šค ๊ฐœ์ˆ˜", min_value=1, max_value=100, value=3, step=1)
225
+ manual_rows = []
226
+ for i in range(num_boxes):
227
+ st.subheader(f"๋ฐ•์Šค #{i+1}")
228
+ c1, c2, c3 = st.columns(3)
229
+ with c1:
230
+ box = st.text_input("๋ฐ•์Šค๋ฒˆํ˜ธ (์ˆซ์ž, ์ž๋™ 4์ž๋ฆฌ)", key=f"box_{i}")
231
+ with c2:
232
+ dur = st.text_input("๋ณด์กด๊ธฐ๊ฐ„", key=f"dur_{i}")
233
+ with c3:
234
+ task = st.text_input("๋‹จ์œ„์—…๋ฌด", key=f"task_{i}")
235
+ c4, c5 = st.columns(2)
236
+ with c4:
237
+ folder = st.text_input("๊ธฐ๋ก๋ฌผ์ฒ ", key=f"file_{i}")
238
+ with c5:
239
+ prod = st.text_input("์ƒ์‚ฐ์—ฐ๋„(๋ฒ”์œ„) ์˜ˆ: 2019-2022", key=f"prod_{i}")
240
+ memo = st.text_area("๋ชฉ๋ก (ํ•œ ์ค„๋‹น ํ•œ ํ•ญ๋ชฉ, ์•ž์— '- '๋Š” ์—†์–ด๋„ ๋ฉ๋‹ˆ๋‹ค)", key=f"list_{i}", height=120,
241
+ placeholder="์˜ˆ)\n2020-001 ์ด๋ฌด๋ถ€ ํšŒ์˜๋ก\n2020-002 ์˜ˆ์‚ฐ๊ฒฐ์‚ฐ์„œ\n...")
242
+
243
+ # ๋ชฉ๋ก ์ •๋ฆฌ: '- ' ์ ‘๋‘ ์—†์œผ๋ฉด ๋ถ™์—ฌ์คŒ, ๋นˆ ์ค„ ์ œ์™ธ
244
+ lines = [ln.strip() for ln in memo.splitlines() if ln.strip()]
245
+ norm_lines = [ln if ln.startswith("- ") else f"- {ln}" for ln in lines]
246
+ manual_rows.append({
247
+ "๋ฐ•์Šค๋ฒˆํ˜ธ": (box or "").strip().zfill(4) if box else "",
248
+ "๋ณด์กด๊ธฐ๊ฐ„": dur.strip(),
249
+ "๋‹จ์œ„์—…๋ฌด": task.strip(),
250
+ "๊ธฐ๋ก๋ฌผ์ฒ ": folder.strip(),
251
+ "์ƒ์‚ฐ์—ฐ๋„": prod.strip(),
252
+ "๋ชฉ๋ก": "\r\n".join(norm_lines)
253
+ })
254
+ if st.button("๋ผ๋ฒจ ์ƒ์„ฑ (HWPX ZIP)", key="btn_manual"):
255
+ if not tpl_file:
256
+ st.error("ํ…œํ”Œ๋ฆฟ(.hwpx)์„ ์—…๋กœ๋“œํ•ด ์ฃผ์„ธ์š”.")
257
+ else:
258
+ rows = manual_rows
259
+
260
+ # ===== ์‹ค์ œ ์ƒ์„ฑ ๊ณตํ†ต ๋ฃจํ‹ด =====
261
+ if rows is not None and len(rows) > 0:
262
+ tpl_bytes = tpl_file.read() if tpl_file else None
263
+ if not tpl_bytes:
264
+ st.error("ํ…œํ”Œ๋ฆฟ(.hwpx)์„ ์—…๋กœ๋“œํ•ด ์ฃผ์„ธ์š”.")
265
+ else:
266
+ # ZIP์œผ๋กœ ํŽ˜์ด์ง€ ๋‹จ์œ„ HWPX ์ƒ์„ฑ
267
+ default_fields = ["๋ฐ•์Šค๋ฒˆํ˜ธ","์ข…๋ฃŒ์—ฐ๋„","๋ณด์กด๊ธฐ๊ฐ„","๋‹จ์œ„์—…๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๋ชฉ๋ก"]
268
+ zip_buf = io.BytesIO()
269
+ with zipfile.ZipFile(zip_buf, "w", zipfile.ZIP_DEFLATED) as zout:
270
+ num_boxes = len(rows)
271
+ num_pages = (num_boxes + batch_size - 1) // batch_size
272
+ for i in range(num_pages):
273
+ mapping_all = {}
274
+ page_rows = []
275
+ for j in range(batch_size):
276
+ idx = i * batch_size + j
277
+ if idx >= num_boxes:
278
+ # ๋‚จ๋Š” ์นธ์€ ๊ณต๋ฐฑ
279
+ for prefix in default_fields:
280
+ mapping_all[f"{prefix}{j+1}"] = ""
281
+ continue
282
+ r = rows[idx]
283
+ page_rows.append(r)
284
+ for prefix in default_fields:
285
+ if prefix == "์ข…๋ฃŒ์—ฐ๋„":
286
+ # ํ…œํ”Œ๋ฆฟ์˜ "์ข…๋ฃŒ์—ฐ๋„" ์ž๋ฆฌ์—๋Š” '์ƒ์‚ฐ์—ฐ๋„(๋ฒ”์œ„)'๋ฅผ ๋„ฃ๋Š” ์„ค๊ณ„
287
+ mapping_all[f"{prefix}{j+1}"] = r.get("์ƒ์‚ฐ์—ฐ๋„","")
288
+ else:
289
+ mapping_all[f"{prefix}{j+1}"] = r.get(prefix,"")
290
+ out_hwpx = replace_placeholders_in_hwpx(tpl_bytes, mapping_all)
291
+ page_nums = [str(x.get("๋ฐ•์Šค๋ฒˆํ˜ธ","")) for x in page_rows]
292
+ safe_name = "_".join(page_nums) if page_nums else f"empty_{i+1}"
293
+ zout.writestr(f"label_{safe_name}.hwpx", out_hwpx)
294
+
295
+ zip_buf.seek(0)
296
+ st.download_button("๐Ÿ“ฅ HWPX ์ผ๊ด„ ๋‹ค์šด๋กœ๋“œ (ZIP)", data=zip_buf,
297
+ file_name="boxlabels_hwpx.zip", mime="application/zip")
298
+
299
+ if preview_pdf:
300
+ try:
301
+ pdf_bytes = make_preview_pdf(rows)
302
+ st.download_button("๐Ÿ‘€ PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ ๋‹ค์šด๋กœ๋“œ", data=pdf_bytes,
303
+ file_name="preview.pdf", mime="application/pdf")
304
+ except Exception as e:
305
+ st.warning(f"PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ ์ƒ์„ฑ ์‹คํŒจ: {e}")
306
+
307
+ st.caption("โ€ป ํ…œํ”Œ๋ฆฟ์€ ๋ฐ˜๋“œ์‹œ .HWPX ํŒŒ์ผ์ด์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. (.HWP ์‚ฌ์šฉ ๋ถˆ๊ฐ€) โ€ข '์ข…๋ฃŒ์—ฐ๋„' ์ž๋ฆฌ์—๋Š” '์ƒ์‚ฐ์—ฐ๋„(๋ฒ”์œ„)'๊ฐ€ ์ฑ„์›Œ์ง€๋„๋ก ์„ค๊ณ„๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.")