Antonio0616 commited on
Commit
a1f9c4e
ยท
verified ยท
1 Parent(s): 6d79b8b

Update predict_blend.py

Browse files
Files changed (1) hide show
  1. predict_blend.py +88 -48
predict_blend.py CHANGED
@@ -1,21 +1,18 @@
1
  # predict_blend.py
2
  import os, json, numpy as np, pandas as pd, torch, lightgbm as lgb
3
  import torch.nn as nn
4
- from huggingface_hub import snapshot_download
5
- from pathlib import Path
6
- from itertools import product
7
-
8
- MODEL_REPO = "Antonio0616/FormingStar"
9
-
10
- # โœ… ๋ฐ˜๋“œ์‹œ dataset ์œผ๋กœ ๋ช…์‹œํ•ด์•ผ ํ•จ
11
 
12
- MODEL_DIR = snapshot_download(repo_id=MODEL_REPO, repo_type="dataset")
13
- ART_DIR = str(Path(MODEL_DIR).resolve())
 
 
 
 
14
  CAT_COL = "material"
15
  NUM_COLS = ["thickness","diameter","degree","upper_radius","lower_radius","LB","RB"]
16
 
17
  # =========================
18
- # FT-Transformer ์ •์˜
19
  # =========================
20
  class FTTransformer(nn.Module):
21
  def __init__(self, n_materials:int, n_num:int, d_model:int=192, nhead:int=8,
@@ -49,11 +46,35 @@ class FTTransformer(nn.Module):
49
  h = self.encoder(tokens)
50
  return self.head(h[:, 0, :])
51
 
 
52
  def _scale_like_fold(X_num: np.ndarray, mean: np.ndarray, scale: np.ndarray) -> np.ndarray:
53
  return ((X_num - mean) / scale).astype(np.float32)
54
 
55
  # =========================
56
- # ๋ชจ๋ธ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ ํ—ฌํผ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  # =========================
58
  def _first_existing(*paths):
59
  for p in paths:
@@ -108,6 +129,15 @@ def _load_json_like(art_dir: str, basename: str) -> dict:
108
  with open(p, "r", encoding="utf-8") as f:
109
  return json.load(f)
110
 
 
 
 
 
 
 
 
 
 
111
  # =========================
112
  # Predictor
113
  # =========================
@@ -116,22 +146,40 @@ class BlendPredictor:
116
  self.art_dir = art_dir
117
  self.folds_ft = _load_ft_folds(art_dir)
118
  self.boosters = _load_lgbm_folds(art_dir)
119
- self.materials = _load_json_like(art_dir, "materials")["materials"]
120
- self.best_alpha = float(_load_json_like(art_dir, "blend_alpha")["best_alpha"])
121
 
122
- self.materials = [str(m).strip() for m in self.materials]
123
- self.mat2id = {m: i for i, m in enumerate(self.materials)}
 
124
  self.unknown_policy = unknown_policy
125
 
126
  def _prep_df(self, df_new: pd.DataFrame) -> pd.DataFrame:
127
  df = df_new.copy()
 
 
 
 
 
128
  df[CAT_COL] = df[CAT_COL].astype(str).str.strip()
 
 
129
  if self.unknown_policy == "error":
130
- unknown = df.loc[~df[CAT_COL].isin(self.materials), CAT_COL].unique().tolist()
131
  if unknown:
132
- raise ValueError(f"Unknown materials in input {unknown}")
133
- df["_mat_id"] = df[CAT_COL].map(self.mat2id).fillna(0).astype(int)
 
 
 
 
 
 
 
134
  df[NUM_COLS] = df[NUM_COLS].apply(pd.to_numeric, errors="coerce")
 
 
 
135
  return df
136
 
137
  def predict_ft(self, df_new: pd.DataFrame) -> np.ndarray:
@@ -150,7 +198,7 @@ class BlendPredictor:
150
  def predict_lgbm(self, df_new: pd.DataFrame) -> np.ndarray:
151
  df = self._prep_df(df_new)
152
  X = df[[CAT_COL] + NUM_COLS].copy()
153
- X[CAT_COL] = pd.Categorical(df[CAT_COL], categories=self.materials)
154
  preds = [bst.predict(X, num_iteration=getattr(bst, "best_iteration", None))
155
  for bst in self.boosters]
156
  return np.mean(preds, axis=0)
@@ -162,32 +210,24 @@ class BlendPredictor:
162
  p_lgb = self.predict_lgbm(df_new)
163
  return alpha * p_dl + (1 - alpha) * p_lgb
164
 
165
- # โœ… ๋ฒ”์œ„ ์ž…๋ ฅ โ†’ ๊ฒฝ์šฐ์˜ ์ˆ˜ ์ƒ์„ฑ ์ง€์›
166
- def expand_ranges(self, cfg: dict) -> pd.DataFrame:
167
- # cfg: {"material": ["SPCC"], "min_thickness": 0.7, "max_thickness": 1.2, "thickness_step": 0.1, ...}
168
- keys = []
169
- values = []
170
-
171
- # ๋ฒ”์ฃผํ˜•
172
- keys.append("material")
173
- values.append(cfg["materials"])
174
-
175
- # ์—ฐ์†ํ˜•
176
- for col in ["thickness","diameter","degree","upper_radius","lower_radius"]:
177
- lo = cfg[f"min_{col}"]
178
- hi = cfg[f"max_{col}"]
179
- step = cfg[f"{col}_step"]
180
- values.append(np.arange(lo, hi+1e-9, step).round(3))
181
- keys.append(col)
182
-
183
- # ๋น„๋“œ (LB, RB ๋ณ€ํ™˜)
184
- bead_map = {
185
- "none": (0,0), "left": (1,0), "right": (0,1), "double": (1,1)
186
- }
187
- beads = [bead_map[b] for b in cfg.get("beads", ["none"])]
188
- LB, RB = zip(*beads)
189
- keys.extend(["LB","RB"])
190
- values.extend([LB, RB])
191
-
192
- combos = [dict(zip(keys, v)) for v in product(*values)]
193
- return pd.DataFrame(combos)
 
1
  # predict_blend.py
2
  import os, json, numpy as np, pandas as pd, torch, lightgbm as lgb
3
  import torch.nn as nn
 
 
 
 
 
 
 
4
 
5
+ # =========================
6
+ # Config
7
+ # =========================
8
+ from pathlib import Path
9
+ BASE_DIR = Path(__file__).resolve().parent
10
+ ART_DIR = str((BASE_DIR / "artifacts_blend").resolve())
11
  CAT_COL = "material"
12
  NUM_COLS = ["thickness","diameter","degree","upper_radius","lower_radius","LB","RB"]
13
 
14
  # =========================
15
+ # FT-Transformer
16
  # =========================
17
  class FTTransformer(nn.Module):
18
  def __init__(self, n_materials:int, n_num:int, d_model:int=192, nhead:int=8,
 
46
  h = self.encoder(tokens)
47
  return self.head(h[:, 0, :])
48
 
49
+
50
  def _scale_like_fold(X_num: np.ndarray, mean: np.ndarray, scale: np.ndarray) -> np.ndarray:
51
  return ((X_num - mean) / scale).astype(np.float32)
52
 
53
  # =========================
54
+ # Material label helpers
55
+ # =========================
56
+ def _canonize_list(materials):
57
+ return [str(m).strip() for m in materials]
58
+
59
+ def _build_alias2canon(canon_list):
60
+ alias2canon = {}
61
+ for c in canon_list:
62
+ alias2canon[c] = c
63
+ s = c.strip()
64
+ alias2canon[s] = c
65
+ if "." in s:
66
+ alias2canon[s.rstrip("0").rstrip(".")] = c
67
+ try:
68
+ v = float(s)
69
+ alias2canon[str(v)] = c
70
+ if v.is_integer():
71
+ alias2canon[str(int(v))] = c
72
+ except:
73
+ pass
74
+ return alias2canon
75
+
76
+ # =========================
77
+ # Loader helpers
78
  # =========================
79
  def _first_existing(*paths):
80
  for p in paths:
 
129
  with open(p, "r", encoding="utf-8") as f:
130
  return json.load(f)
131
 
132
+ def _load_materials(art_dir: str, folds_ft):
133
+ try:
134
+ return _load_json_like(art_dir, "materials")["materials"]
135
+ except FileNotFoundError:
136
+ return folds_ft[0]["materials"]
137
+
138
+ def _load_best_alpha(art_dir: str) -> float:
139
+ return float(_load_json_like(art_dir, "blend_alpha")["best_alpha"])
140
+
141
  # =========================
142
  # Predictor
143
  # =========================
 
146
  self.art_dir = art_dir
147
  self.folds_ft = _load_ft_folds(art_dir)
148
  self.boosters = _load_lgbm_folds(art_dir)
149
+ self.materials = _load_materials(art_dir, self.folds_ft)
150
+ self.best_alpha = _load_best_alpha(art_dir)
151
 
152
+ self.materials_canon = _canonize_list(self.materials)
153
+ self.alias2canon = _build_alias2canon(self.materials_canon)
154
+ self.mat2id = {m: i for i, m in enumerate(self.materials_canon)}
155
  self.unknown_policy = unknown_policy
156
 
157
  def _prep_df(self, df_new: pd.DataFrame) -> pd.DataFrame:
158
  df = df_new.copy()
159
+ need = [CAT_COL] + NUM_COLS
160
+ missing = [c for c in need if c not in df.columns]
161
+ if missing:
162
+ raise ValueError(f"Missing columns in input: {missing}")
163
+
164
  df[CAT_COL] = df[CAT_COL].astype(str).str.strip()
165
+ df["_mat_canon"] = df[CAT_COL].map(self.alias2canon)
166
+
167
  if self.unknown_policy == "error":
168
+ unknown = df.loc[df["_mat_canon"].isna(), CAT_COL].unique().tolist()
169
  if unknown:
170
+ raise ValueError(
171
+ f"Unknown materials in input {unknown}. "
172
+ f"Known materials: {self.materials_canon[:10]}{' ...' if len(self.materials_canon)>10 else ''}"
173
+ )
174
+ df["_mat_id"] = df["_mat_canon"].map(self.mat2id).astype(int)
175
+ else:
176
+ df["_mat_canon"] = df["_mat_canon"].fillna(self.materials_canon[0])
177
+ df["_mat_id"] = df["_mat_canon"].map(self.mat2id).astype(int)
178
+
179
  df[NUM_COLS] = df[NUM_COLS].apply(pd.to_numeric, errors="coerce")
180
+ if df[NUM_COLS].isnull().any().any():
181
+ bad = df[NUM_COLS].columns[df[NUM_COLS].isnull().any()].tolist()
182
+ raise ValueError(f"Non-numeric values detected in columns: {bad}")
183
  return df
184
 
185
  def predict_ft(self, df_new: pd.DataFrame) -> np.ndarray:
 
198
  def predict_lgbm(self, df_new: pd.DataFrame) -> np.ndarray:
199
  df = self._prep_df(df_new)
200
  X = df[[CAT_COL] + NUM_COLS].copy()
201
+ X[CAT_COL] = pd.Categorical(df["_mat_canon"], categories=self.materials_canon)
202
  preds = [bst.predict(X, num_iteration=getattr(bst, "best_iteration", None))
203
  for bst in self.boosters]
204
  return np.mean(preds, axis=0)
 
210
  p_lgb = self.predict_lgbm(df_new)
211
  return alpha * p_dl + (1 - alpha) * p_lgb
212
 
213
+ # =========================
214
+ # Example run
215
+ # =========================
216
+ if __name__ == "__main__":
217
+ base = {
218
+ "thickness": 1, "diameter": 20, "degree": 73,
219
+ "upper_radius": 3, "lower_radius": 2,
220
+ "LB": 0, "RB": 1,
221
+ }
222
+ df_new = pd.DataFrame([
223
+ {**base, "material": "590"},
224
+ {**base, "material": "440"},
225
+ ])
226
+
227
+ predictor = BlendPredictor(ART_DIR, unknown_policy="error")
228
+ print("materials (trained):", predictor.materials_canon[:10])
229
+ print("best_alpha:", predictor.best_alpha)
230
+
231
+ print("\nDL only :", predictor.predict_blend(df_new, alpha=1.0))
232
+ print("LGBM only:", predictor.predict_blend(df_new, alpha=0.0))
233
+ print("Blend :", predictor.predict_blend(df_new))