Update app.py
Browse files
app.py
CHANGED
|
@@ -1,240 +1,212 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
-
|
| 4 |
-
from
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
st.set_page_config(page_title="๐ฆ ๋ฐ์ค๋ผ๋ฒจ PDF ์ถ๋ ฅ๊ธฐ", layout="wide")
|
| 12 |
-
st.title("๐ฆ ๋ฐ์ค๋ผ๋ฒจ PDF ์ถ๋ ฅ๊ธฐ (๋ผ๋ฒจ ๊ท๊ฒฉ ์ปค์คํ
/ ํ๊ตญ์ด ํฐํธ ์
๋ก๋)")
|
| 13 |
-
|
| 14 |
-
with st.expander("์ฌ์ฉ ๋ฐฉ๋ฒ", expanded=True):
|
| 15 |
-
st.markdown("""
|
| 16 |
-
1. **์์
/CSV ์
๋ก๋** โ ํ์ ์ปฌ๋ผ: `๋ฐ์ค๋ฒํธ` / ๊ถ์ฅ: `์ข
๋ฃ์ฐ๋`, `๋ณด์กด๊ธฐ๊ฐ`, `๋จ์์
๋ฌด`, `๊ธฐ๋ก๋ฌผ์ฒ `, `์ ๋ชฉ`, `๊ด๋ฆฌ๋ฒํธ`
|
| 17 |
-
2. (์ ํ) **TTF ํฐํธ ์
๋ก๋**(์: ๋๋๊ณ ๋, ๋ณธ๊ณ ๋, ๋ง์ ๊ณ ๋ ๋ฑ). ์
๋ก๋ ์ ํ๋ฉด ๊ธฐ๋ณธ ํฐํธ ์ฌ์ฉ(์๋ฌธ ์์ฃผ).
|
| 18 |
-
3. **๋ผ๋ฒจ ๊ท๊ฒฉ**(ํ์ด์ง ์ฌ๋ฐฑ, ๋ผ๋ฒจ ๊ฐ๋ก/์ธ๋ก, ํ/์ด, ๋ผ๋ฒจ ๊ฐ๊ฒฉ)์ ์
๋ ฅ.
|
| 19 |
-
4. **ํ
์คํธ ๋ฐฐ์น**(๋ผ๋ฒจ ์์ชฝ ํจ๋ฉ, ํฐํธ ํฌ๊ธฐ, ์ค ๊ฐ๊ฒฉ ๋ฑ) ์กฐ์ .
|
| 20 |
-
5. **PDF ์์ฑ** โ ๋ผ๋ฒจ ์ฉ์ง(Formtec ๋ฑ)์ ์ธ์.
|
| 21 |
-
""")
|
| 22 |
-
|
| 23 |
-
# -----------------
|
| 24 |
-
# ๋ฐ์ดํฐ ๋ก๋
|
| 25 |
-
# -----------------
|
| 26 |
-
file = st.file_uploader("๐ ๋ฐ์ดํฐ ์
๋ก๋ (Excel/CSV)", type=["xlsx","xls","csv"])
|
| 27 |
-
df = None
|
| 28 |
-
if file:
|
| 29 |
-
if file.name.lower().endswith(".csv"):
|
| 30 |
-
df = pd.read_csv(file)
|
| 31 |
-
else:
|
| 32 |
-
df = pd.read_excel(file)
|
| 33 |
-
|
| 34 |
-
# ํ์ ์ปฌ๋ผ ๊ฒ์ฌ
|
| 35 |
-
if df is not None and "๋ฐ์ค๋ฒํธ" not in df.columns:
|
| 36 |
-
st.error("โ ํ์ ์ปฌ๋ผ '๋ฐ์ค๋ฒํธ'๊ฐ ์์ต๋๋ค.")
|
| 37 |
-
st.stop()
|
| 38 |
-
|
| 39 |
-
# -----------------
|
| 40 |
-
# ํฐํธ ์ค์
|
| 41 |
-
# -----------------
|
| 42 |
-
st.subheader("๐ค ํฐํธ ์ค์ ")
|
| 43 |
-
font_file = st.file_uploader("ํ๊ตญ์ด ํฐํธ(TTF) ์
๋ก๋ (์: NanumGothic.ttf / MalgunGothic.ttf)", type=["ttf"])
|
| 44 |
-
font_name = "BaseFont"
|
| 45 |
-
if font_file:
|
| 46 |
-
try:
|
| 47 |
-
font_bytes = font_file.read()
|
| 48 |
-
# ๋ฉ๋ชจ๋ฆฌ ๋ฑ๋ก: ReportLab์ ํ์ผ ๊ฒฝ๋ก๊ฐ ํ์ โ ์์ ํ์ผ ๋ง๋ค๊ธฐ๋ณด๋ค ๋ฉ๋ชจ๋ฆฌ ๋ ์ง์คํฐ ํธ๋ฆญ
|
| 49 |
-
# ํ์ง๋ง TTFont๋ ํ์ผ ๊ฒฝ๋ก ์๊ตฌ โ ์์ํ์ผ ์ ์ฅ
|
| 50 |
-
import tempfile
|
| 51 |
-
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".ttf")
|
| 52 |
-
tmp.write(font_bytes); tmp.flush()
|
| 53 |
-
pdfmetrics.registerFont(TTFont("UserKorean", tmp.name))
|
| 54 |
-
font_name = "UserKorean"
|
| 55 |
-
st.success("โ
ํฐํธ ๋ฑ๋ก ์๋ฃ: UserKorean")
|
| 56 |
-
except Exception as e:
|
| 57 |
-
st.warning(f"ํฐํธ ๋ฑ๋ก ์คํจ. ๊ธฐ๋ณธ ํฐํธ ์ฌ์ฉํฉ๋๋ค. (์ฌ์ : {e})")
|
| 58 |
-
else:
|
| 59 |
-
# ๋ด์ฅ ๊ธฐ๋ณธ ํฐํธ (์๋ฌธ ์ค์ฌ)
|
| 60 |
-
font_name = "Helvetica"
|
| 61 |
-
|
| 62 |
-
# -----------------
|
| 63 |
-
# ๋ผ๋ฒจ/ํ์ด์ง ๋ ์ด์์
|
| 64 |
-
# -----------------
|
| 65 |
-
st.subheader("๐ ๋ผ๋ฒจ ๊ท๊ฒฉ (mm ๋จ์)")
|
| 66 |
-
colA, colB, colC = st.columns(3)
|
| 67 |
-
with colA:
|
| 68 |
-
page_size = st.selectbox("ํ์ด์ง ํฌ๊ธฐ", ["A4"], index=0)
|
| 69 |
-
with colB:
|
| 70 |
-
margin_left = st.number_input("์ผ์ชฝ ์ฌ๋ฐฑ(mm)", 5.0, 50.0, 10.0, 0.5)
|
| 71 |
-
margin_top = st.number_input("์๋จ ์ฌ๋ฐฑ(mm)", 5.0, 50.0, 10.0, 0.5)
|
| 72 |
-
with colC:
|
| 73 |
-
rows = st.number_input("ํ ์", 1, 20, 10, 1)
|
| 74 |
-
cols = st.number_input("์ด ์", 1, 10, 3, 1)
|
| 75 |
-
|
| 76 |
-
colD, colE, colF = st.columns(3)
|
| 77 |
-
with colD:
|
| 78 |
-
label_w = st.number_input("๋ผ๋ฒจ ๊ฐ๋ก(mm)", 20.0, 210.0, 70.0, 0.5)
|
| 79 |
-
with colE:
|
| 80 |
-
label_h = st.number_input("๋ผ๋ฒจ ์ธ๋ก(mm)", 10.0, 297.0, 25.0, 0.5)
|
| 81 |
-
with colF:
|
| 82 |
-
gap_x = st.number_input("๊ฐ๋ก ๊ฐ๊ฒฉ(mm)", 0.0, 20.0, 3.0, 0.5)
|
| 83 |
-
gap_y = st.number_input("์ธ๋ก ๊ฐ๊ฒฉ(mm)", 0.0, 20.0, 3.0, 0.5)
|
| 84 |
-
|
| 85 |
-
# -----------------
|
| 86 |
-
# ๋ผ๋ฒจ ๋ด๋ถ ํ
์คํธ ๋ฐฐ์น
|
| 87 |
-
# -----------------
|
| 88 |
-
st.subheader("๐งฑ ๋ผ๋ฒจ ๋ด๋ถ ๋ ์ด์์")
|
| 89 |
-
col1, col2, col3 = st.columns(3)
|
| 90 |
-
with col1:
|
| 91 |
-
pad_x = st.number_input("๋ด๋ถ ํจ๋ฉ X(mm)", 0.0, 20.0, 2.0, 0.5)
|
| 92 |
-
pad_y = st.number_input("๋ด๋ถ ํจ๋ฉ Y(mm)", 0.0, 20.0, 2.0, 0.5)
|
| 93 |
-
with col2:
|
| 94 |
-
fs_big = st.number_input("ํฐํธ ํฌ๊ธฐ(ํฐ ์ ๋ชฉ)", 6, 40, 16, 1)
|
| 95 |
-
fs_mid = st.number_input("ํฐํธ ํฌ๊ธฐ(์ค๊ฐ)", 6, 40, 11, 1)
|
| 96 |
-
with col3:
|
| 97 |
-
fs_small = st.number_input("ํฐํธ ํฌ๊ธฐ(์๊ฒ/๋ชฉ๋ก)", 6, 20, 9, 1)
|
| 98 |
-
line_gap = st.number_input("์ค ๊ฐ๊ฒฉ(๋ฐฐ์)", 0.8, 2.0, 1.2, 0.1)
|
| 99 |
-
|
| 100 |
-
st.caption("๐ก Formtec 3203 ๋น์ทํ ์ค์ ์์: ๊ฐ๋ก 70, ์ธ๋ก 25, ์ด 3, ํ 10, ์ฌ๋ฐฑ 10/10, ๊ฐ๊ฒฉ 3/3 (ํ๋ฆฐํฐ๋ง๋ค ์ฝ๊ฐ ์กฐ์ )")
|
| 101 |
-
|
| 102 |
-
# -----------------
|
| 103 |
-
# ํ
์คํธ ์์ฑ ํจ์
|
| 104 |
-
# -----------------
|
| 105 |
-
def year_range(series):
|
| 106 |
s = series.astype(str).fillna("")
|
| 107 |
v = s[~s.isin(["", "0", "0000"])]
|
| 108 |
-
if
|
| 109 |
-
return "0000-0000"
|
| 110 |
nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
|
| 111 |
-
if
|
| 112 |
-
return "0000-0000"
|
| 113 |
return f"{nums.min():04d}-{nums.max():04d}"
|
| 114 |
|
| 115 |
-
def
|
| 116 |
df = df.copy()
|
| 117 |
df["๋ฐ์ค๋ฒํธ"] = df["๋ฐ์ค๋ฒํธ"].astype(str).str.zfill(4)
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
| 119 |
if "์ข
๋ฃ์ฐ๋" in df.columns:
|
| 120 |
-
yr = df.groupby("๋ฐ์ค๋ฒํธ")["์ข
๋ฃ์ฐ๋"].apply(
|
| 121 |
yr.columns = ["๋ฐ์ค๋ฒํธ", "์์ฐ์ฐ๋"]
|
| 122 |
else:
|
| 123 |
yr = pd.DataFrame({"๋ฐ์ค๋ฒํธ": df["๋ฐ์ค๋ฒํธ"].unique(), "์์ฐ์ฐ๋": "0000-0000"})
|
| 124 |
-
|
|
|
|
| 125 |
has_mgmt = "๊ด๋ฆฌ๋ฒํธ" in df.columns
|
| 126 |
-
|
| 127 |
-
for
|
| 128 |
lines = [f"- {r['๊ด๋ฆฌ๋ฒํธ']} {r.get('์ ๋ชฉ','')}" if has_mgmt else f"- {r.get('์ ๋ชฉ','')}"
|
| 129 |
for _, r in g.iterrows()]
|
| 130 |
-
|
| 131 |
-
list_df = pd.DataFrame(
|
|
|
|
| 132 |
# ๋ํ ๋ฉํ
|
| 133 |
-
|
| 134 |
-
meta_exist = [c for c in
|
| 135 |
-
meta = df.groupby("๋ฐ์ค๋ฒํธ", as_index=False).first()[meta_exist] if meta_exist
|
|
|
|
|
|
|
| 136 |
merged = meta.merge(list_df, on="๋ฐ์ค๋ฒํธ", how="left").merge(yr, on="๋ฐ์ค๋ฒํธ", how="left")
|
| 137 |
-
return merged
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
if
|
| 236 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
+
import io, zipfile, re, html, json
|
| 4 |
+
from typing import Dict, Tuple, Optional
|
| 5 |
+
|
| 6 |
+
st.set_page_config(page_title="๐ฆ ๋ฐ์ค๋ผ๋ฒจ(HWPX) โ ํ๋ ์์ ์นํ", layout="wide")
|
| 7 |
+
st.title("๐ฆ ๋ฐ์ค๋ผ๋ฒจ ์๋ ์์ฑ๊ธฐ โ HWPX ํ๋ ์์ ์นํ(๋ชจ๋ XML / ์ ๋์ด ์์ผ๋์นด๋)")
|
| 8 |
+
|
| 9 |
+
# ================== ๋ฐ์ดํฐ ์ ํธ ==================
|
| 10 |
+
def _year_range(series: pd.Series) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
s = series.astype(str).fillna("")
|
| 12 |
v = s[~s.isin(["", "0", "0000"])]
|
| 13 |
+
if v.empty: return "0000-0000"
|
|
|
|
| 14 |
nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
|
| 15 |
+
if nums.empty: return "0000-0000"
|
|
|
|
| 16 |
return f"{nums.min():04d}-{nums.max():04d}"
|
| 17 |
|
| 18 |
+
def build_rows(df: pd.DataFrame) -> pd.DataFrame:
|
| 19 |
df = df.copy()
|
| 20 |
df["๋ฐ์ค๋ฒํธ"] = df["๋ฐ์ค๋ฒํธ"].astype(str).str.zfill(4)
|
| 21 |
+
if "์ ๋ชฉ" in df.columns:
|
| 22 |
+
df["์ ๋ชฉ"] = df["์ ๋ชฉ"].astype(str)
|
| 23 |
+
|
| 24 |
+
# ์์ฐ์ฐ๋(๋ฒ์) = ์ข
๋ฃ์ฐ๋ ๊ทธ๋ฃน ๋ฒ์
|
| 25 |
if "์ข
๋ฃ์ฐ๋" in df.columns:
|
| 26 |
+
yr = df.groupby("๋ฐ์ค๋ฒํธ")["์ข
๋ฃ์ฐ๋"].apply(_year_range).reset_index()
|
| 27 |
yr.columns = ["๋ฐ์ค๋ฒํธ", "์์ฐ์ฐ๋"]
|
| 28 |
else:
|
| 29 |
yr = pd.DataFrame({"๋ฐ์ค๋ฒํธ": df["๋ฐ์ค๋ฒํธ"].unique(), "์์ฐ์ฐ๋": "0000-0000"})
|
| 30 |
+
|
| 31 |
+
# ๋ชฉ๋ก(๊ด๋ฆฌ๋ฒํธ + ์ ๋ชฉ)
|
| 32 |
has_mgmt = "๊ด๋ฆฌ๋ฒํธ" in df.columns
|
| 33 |
+
lists = []
|
| 34 |
+
for b, g in df.groupby("๋ฐ์ค๋ฒํธ"):
|
| 35 |
lines = [f"- {r['๊ด๋ฆฌ๋ฒํธ']} {r.get('์ ๋ชฉ','')}" if has_mgmt else f"- {r.get('์ ๋ชฉ','')}"
|
| 36 |
for _, r in g.iterrows()]
|
| 37 |
+
lists.append({"๋ฐ์ค๋ฒํธ": b, "๋ชฉ๋ก": "\r\n".join(lines)})
|
| 38 |
+
list_df = pd.DataFrame(lists)
|
| 39 |
+
|
| 40 |
# ๋ํ ๋ฉํ
|
| 41 |
+
meta_cols = ["๋ฐ์ค๋ฒํธ","์ข
๋ฃ์ฐ๋","๋ณด์กด๊ธฐ๊ฐ","๋จ์์
๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","์ ๋ชฉ"]
|
| 42 |
+
meta_exist = [c for c in meta_cols if c in df.columns]
|
| 43 |
+
meta = df.groupby("๋ฐ์ค๋ฒํธ", as_index=False).first()[meta_exist] if meta_exist \
|
| 44 |
+
else pd.DataFrame({"๋ฐ์ค๋ฒํธ": df["๋ฐ์ค๋ฒํธ"].unique()})
|
| 45 |
+
|
| 46 |
merged = meta.merge(list_df, on="๋ฐ์ค๋ฒํธ", how="left").merge(yr, on="๋ฐ์ค๋ฒํธ", how="left")
|
| 47 |
+
return merged
|
| 48 |
+
|
| 49 |
+
# ================== ์นํ ์ ํธ ==================
|
| 50 |
+
# ์ ๋์ด ์์ผ๋์นด๋: <hp:..> ๋ฟ ์๋๋ผ <hwp:..>, <h:..> ๋ฑ ๋ชจ๋ ํ์ฉ
|
| 51 |
+
FIELD_PAIR_RE_TMPL = (
|
| 52 |
+
r'<(?P<prefix>[a-zA-Z0-9_]+):fieldBegin\b[^>]*\bname="{name}"[^>]*/>'
|
| 53 |
+
r'(.*?)'
|
| 54 |
+
r'<(?P=prefix):fieldEnd\b[^>]*/>'
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
# ํ ํฐ(๋ฐฑ์
๊ฒฝ๋ก)
|
| 58 |
+
TOKEN_FMT = "{{{{{key}}}}}"
|
| 59 |
+
|
| 60 |
+
def _run_for_plain(text: str) -> str:
|
| 61 |
+
return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
|
| 62 |
+
|
| 63 |
+
def _run_for_list(text: str) -> str:
|
| 64 |
+
if text is None: return ""
|
| 65 |
+
lines = str(text).replace("\r\n","\n").split("\n")
|
| 66 |
+
parts = []
|
| 67 |
+
for i, ln in enumerate(lines):
|
| 68 |
+
if i>0: parts.append("<hp:lineBreak/>")
|
| 69 |
+
parts.append(f"<hp:run><hp:t>{html.escape(ln)}</hp:t></hp:run>")
|
| 70 |
+
return "".join(parts)
|
| 71 |
+
|
| 72 |
+
def _apply_to_xml(xml: str, mapping: Dict[str, str], dbg: dict) -> str:
|
| 73 |
+
changed_any = False
|
| 74 |
+
|
| 75 |
+
# 1) ํ๋์ ์์ ์นํ (๋ชจ๋ ์ ๋์ด, ๋ชจ๋ XML ๋์)
|
| 76 |
+
for k, v in mapping.items():
|
| 77 |
+
is_list = bool(re.match(r"^(๋ชฉ๋ก|list)\d+$", k, re.IGNORECASE))
|
| 78 |
+
replacement = _run_for_list(v) if is_list else _run_for_plain(v)
|
| 79 |
+
|
| 80 |
+
pat = re.compile(FIELD_PAIR_RE_TMPL.format(name=re.escape(k)), re.DOTALL)
|
| 81 |
+
xml_new, n = pat.subn(replacement, xml)
|
| 82 |
+
if n:
|
| 83 |
+
dbg["field_hits"][k] = dbg["field_hits"].get(k, 0) + n
|
| 84 |
+
xml = xml_new
|
| 85 |
+
changed_any = True
|
| 86 |
+
|
| 87 |
+
# 2) ๋ฐฑ์
๊ฒฝ๋ก: ํ ํฐ ์นํ ({{ํค}}๊ฐ ์ด๋๊ฐ ๋จ์์๋ค๋ฉด)
|
| 88 |
+
for k, v in mapping.items():
|
| 89 |
+
tok = TOKEN_FMT.format(key=k)
|
| 90 |
+
if tok in xml:
|
| 91 |
+
rep = _run_for_list(v) if re.match(r"^(๋ชฉ๋ก|list)\d+$", k, re.IGNORECASE) else html.escape("" if v is None else str(v))
|
| 92 |
+
xml = xml.replace(tok, rep)
|
| 93 |
+
dbg["token_hits"][k] = dbg["token_hits"].get(k, 0) + 1
|
| 94 |
+
changed_any = True
|
| 95 |
+
|
| 96 |
+
if changed_any:
|
| 97 |
+
dbg["files_touched"] = True
|
| 98 |
+
return xml
|
| 99 |
+
|
| 100 |
+
def replace_in_hwpx(hwpx_bytes: bytes, mapping: Dict[str,str]) -> Tuple[bytes, dict]:
|
| 101 |
+
dbg = {"field_hits":{}, "token_hits":{}, "touched_files": []}
|
| 102 |
+
zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
|
| 103 |
+
out_buf = io.BytesIO()
|
| 104 |
+
zout = zipfile.ZipFile(out_buf, "w")
|
| 105 |
+
|
| 106 |
+
# mimetype ๋ฌด์์ถ + ๋งจ์
|
| 107 |
+
names = zin.namelist()
|
| 108 |
+
if "mimetype" in names:
|
| 109 |
+
zi = zipfile.ZipInfo("mimetype"); zi.compress_type = zipfile.ZIP_STORED
|
| 110 |
+
zout.writestr(zi, zin.read("mimetype"))
|
| 111 |
+
|
| 112 |
+
for e in zin.infolist():
|
| 113 |
+
if e.filename == "mimetype":
|
| 114 |
+
continue
|
| 115 |
+
data = zin.read(e.filename)
|
| 116 |
+
if e.filename.lower().endswith(".xml"):
|
| 117 |
+
try:
|
| 118 |
+
s = data.decode("utf-8", errors="ignore")
|
| 119 |
+
before = s
|
| 120 |
+
s = _apply_to_xml(s, mapping, {"field_hits": dbg["field_hits"], "token_hits": dbg["token_hits"], "files_touched": False})
|
| 121 |
+
if s != before:
|
| 122 |
+
dbg["touched_files"].append(e.filename)
|
| 123 |
+
data = s.encode("utf-8")
|
| 124 |
+
except Exception:
|
| 125 |
+
pass
|
| 126 |
+
zi = zipfile.ZipInfo(e.filename); zi.compress_type = zipfile.ZIP_DEFLATED
|
| 127 |
+
zout.writestr(zi, data)
|
| 128 |
+
|
| 129 |
+
zout.close(); out_buf.seek(0); zin.close()
|
| 130 |
+
return out_buf.getvalue(), dbg
|
| 131 |
+
|
| 132 |
+
# ================== UI ==================
|
| 133 |
+
with st.expander("์ฌ์ฉ๋ฒ", expanded=True):
|
| 134 |
+
st.markdown("""
|
| 135 |
+
- ํ
ํ๋ฆฟ์ **HWPX(ํ๊ธ)**์ด๋ฉฐ, ํ๋์ปจํธ๋กค `name="๋ฐ์ค๋ฒํธ1"` ๋ฑ์ ์ด๋ฆ์ ๊ทธ๋๋ก ์ฌ์ฉํฉ๋๋ค.
|
| 136 |
+
- ์ด ์ฑ์ ZIP ๋ด๋ถ์ **๋ชจ๋ XML**์ ํ์ผ๋ฉฐ, ์ ๋์ด๊ฐ ๋ฌด์์ด๋ (`<hp:...>`, `<hwp:...>` ๋ฑ)
|
| 137 |
+
**`fieldBegin(name=ํค)` ~ `fieldEnd` ์ฌ์ด ์ ์ฒด ๋ธ๋ก์ ๊ฐ run๋ค๋ก ๊ต์ฒด**ํฉ๋๋ค.
|
| 138 |
+
- ํ ํฐ(`{{๋ฐ์ค๋ฒํธ1}}`)์ด ๋จ์ ์์ผ๋ฉด **๋ฐฑ์
๊ฒฝ๋ก**๋ก ๊ทธ ์๋ฆฌ๋ ์นํํฉ๋๋ค.
|
| 139 |
+
""")
|
| 140 |
+
|
| 141 |
+
tpl = st.file_uploader("๐ HWPX ํ
ํ๋ฆฟ ์
๋ก๋", type=["hwpx"])
|
| 142 |
+
n_per_page = st.number_input("ํ
ํ๋ฆฟ์ ๋ผ๋ฒจ ์ธํธ ๊ฐ์(ํ ํ์ด์ง N๊ฐ)", 1, 12, 3, 1)
|
| 143 |
+
data = st.file_uploader("๐ ๋ฐ์ดํฐ ์
๋ก๋ (Excel/CSV)", type=["xlsx","xls","csv"])
|
| 144 |
|
| 145 |
+
if tpl and data:
|
| 146 |
+
tpl_bytes = tpl.read()
|
| 147 |
+
df = pd.read_csv(data) if data.name.lower().endswith(".csv") else pd.read_excel(data)
|
| 148 |
+
|
| 149 |
+
if "๋ฐ์ค๋ฒํธ" not in df.columns:
|
| 150 |
+
st.error("โ ํ์ ์ปฌ๋ผ '๋ฐ์ค๋ฒํธ'๊ฐ ์์ต๋๋ค.")
|
| 151 |
+
st.stop()
|
| 152 |
+
|
| 153 |
+
st.success("โ
์์น ๋งคํ ์๋ฃ (์์
์ธก)")
|
| 154 |
+
st.dataframe(df.head(10), use_container_width=True)
|
| 155 |
|
| 156 |
+
merged = build_rows(df)
|
| 157 |
+
boxes = merged["๋ฐ์ค๋ฒํธ"].astype(str).str.zfill(4).unique().tolist()
|
| 158 |
+
|
| 159 |
+
st.subheader("๐ ์
๋ก๋๋ ๋ฐ์ค๋ฒํธ ๋ชฉ๋ก")
|
| 160 |
+
st.write(f"์ด **{len(boxes)}**๊ฐ")
|
| 161 |
+
st.dataframe(pd.DataFrame({"๋ฐ์ค๋ฒํธ": boxes}), use_container_width=True, height=240)
|
| 162 |
+
|
| 163 |
+
sel = st.multiselect("์์ฑํ ๋ฐ์ค๋ฒํธ ์ ํ (๋น์ฐ๋ฉด ์ ์ฒด)", options=boxes)
|
| 164 |
+
work = merged[merged["๋ฐ์ค๋ฒํธ"].isin(sel)] if sel else merged
|
| 165 |
+
records = work.sort_values("๋ฐ์ค๋ฒํธ").to_dict(orient="records")
|
| 166 |
+
|
| 167 |
+
# 1ํ์ด์ง ๋งคํ ํ๋ฆฌ๋ทฐ
|
| 168 |
+
st.subheader("๐งช 1ํ์ด์ง ๋งคํ ํ๋ฆฌ๋ทฐ")
|
| 169 |
+
keys = ["๋ฐ์ค๋ฒํธ","์ข
๋ฃ์ฐ๋","๋ณด์กด๊ธฐ๊ฐ","๋จ์์
๋ฌด","๊ธฐ๋ก๋ฌผ์ฒ ","๋ชฉ๋ก"]
|
| 170 |
+
mapping_preview = {}
|
| 171 |
+
for i in range(int(n_per_page)):
|
| 172 |
+
if i < len(records):
|
| 173 |
+
r = records[i]
|
| 174 |
+
for k in keys:
|
| 175 |
+
mapping_preview[f"{k}{i+1}"] = r.get("์์ฐ์ฐ๋","") if k=="์ข
๋ฃ์ฐ๋" else r.get(k,"")
|
| 176 |
+
else:
|
| 177 |
+
for k in keys:
|
| 178 |
+
mapping_preview[f"{k}{i+1}"] = ""
|
| 179 |
+
st.dataframe(pd.DataFrame([{"ํค":k, "๊ฐ ์๋ถ๋ถ":str(v)[:120]} for k,v in sorted(mapping_preview.items())]),
|
| 180 |
+
use_container_width=True, height=320)
|
| 181 |
+
|
| 182 |
+
if st.button("๐ ๋ผ๋ฒจ ์์ฑ (ํ์ด์ง๋ณ HWPX ZIP)"):
|
| 183 |
+
mem = io.BytesIO(); zout = zipfile.ZipFile(mem, "w", zipfile.ZIP_DEFLATED)
|
| 184 |
+
pages = (len(records) + int(n_per_page) - 1) // int(n_per_page)
|
| 185 |
+
debug_all = []
|
| 186 |
+
|
| 187 |
+
for p in range(pages):
|
| 188 |
+
chunk = records[p*int(n_per_page):(p+1)*int(n_per_page)]
|
| 189 |
+
# ๋งคํ ๊ตฌ์ถ
|
| 190 |
+
mapping = {}
|
| 191 |
+
for i in range(int(n_per_page)):
|
| 192 |
+
if i < len(chunk):
|
| 193 |
+
r = chunk[i]
|
| 194 |
+
mapping[f"๋ฐ์ค๋ฒํธ{i+1}"] = r.get("๋ฐ์ค๋ฒํธ","")
|
| 195 |
+
mapping[f"์ข
๋ฃ์ฐ๋{i+1}"] = r.get("์์ฐ์ฐ๋","")
|
| 196 |
+
mapping[f"๋ณด์กด๊ธฐ๊ฐ{i+1}"] = r.get("๋ณด์กด๊ธฐ๊ฐ","")
|
| 197 |
+
mapping[f"๋จ์์
๋ฌด{i+1}"] = r.get("๋จ์์
๋ฌด","")
|
| 198 |
+
mapping[f"๊ธฐ๋ก๋ฌผ์ฒ {i+1}"] = r.get("๊ธฐ๋ก๋ฌผ์ฒ ","")
|
| 199 |
+
mapping[f"๋ชฉ๋ก{i+1}"] = r.get("๋ชฉ๋ก","")
|
| 200 |
+
else:
|
| 201 |
+
for k in keys:
|
| 202 |
+
mapping[f"{k}{i+1}"] = ""
|
| 203 |
+
|
| 204 |
+
out_hwpx, dbg = replace_in_hwpx(tpl_bytes, mapping)
|
| 205 |
+
debug_all.append({"page": p+1, "stats": dbg})
|
| 206 |
+
name = "_".join([r.get("๋ฐ์ค๋ฒํธ","") for r in chunk]) if chunk else f"empty_{p+1}"
|
| 207 |
+
zout.writestr(f"label_{name}.hwpx", out_hwpx)
|
| 208 |
+
|
| 209 |
+
zout.close(); mem.seek(0)
|
| 210 |
+
st.download_button("โฌ๏ธ ZIP ๋ค์ด๋ก๋", data=mem, file_name="labels_by_page.zip", mime="application/zip")
|
| 211 |
+
st.download_button("โฌ๏ธ ๋๋ฒ๊ทธ(JSON)", data=json.dumps(debug_all, ensure_ascii=False, indent=2),
|
| 212 |
+
file_name="debug.json", mime="application/json")
|