Antonio0616 commited on
Commit
52ce084
ยท
verified ยท
1 Parent(s): 3a63f19

Update predict_blend.py

Browse files
Files changed (1) hide show
  1. predict_blend.py +47 -63
predict_blend.py CHANGED
@@ -1,21 +1,23 @@
1
  # predict_blend.py
2
  import os, json, numpy as np, pandas as pd, torch, lightgbm as lgb
3
  import torch.nn as nn
 
4
  from pathlib import Path
5
- from huggingface_hub import snapshot_download # โœ… Hugging Face dataset ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
6
 
7
  # =========================
8
  # Config
9
  # =========================
10
- # โœ… Hugging Face Dataset์—์„œ ๋ชจ๋ธ ํŒŒ์ผ ์ž๋™ ๋‹ค์šด๋กœ๋“œ
11
- dataset_path = snapshot_download(repo_id="Antonio0616/foemingstar-model")
12
- ART_DIR = os.path.join(dataset_path, "") # artifacts_blend ํด๋” ๋Œ€์‹  Dataset ์‚ฌ์šฉ
 
13
 
14
  CAT_COL = "material"
15
  NUM_COLS = ["thickness","diameter","degree","upper_radius","lower_radius","LB","RB"]
16
 
17
  # =========================
18
- # FT-Transformer
19
  # =========================
20
  class FTTransformer(nn.Module):
21
  def __init__(self, n_materials:int, n_num:int, d_model:int=192, nhead:int=8,
@@ -49,10 +51,11 @@ class FTTransformer(nn.Module):
49
  h = self.encoder(tokens)
50
  return self.head(h[:, 0, :])
51
 
52
- # โœ… ์ดํ•˜ ๋ถ€๋ถ„์€ ๊ทธ๋Œ€๋กœ ์œ ์ง€ (Loader, BlendPredictor ๋“ฑ)
 
53
 
54
  # =========================
55
- # Loader helpers
56
  # =========================
57
  def _first_existing(*paths):
58
  for p in paths:
@@ -107,15 +110,6 @@ def _load_json_like(art_dir: str, basename: str) -> dict:
107
  with open(p, "r", encoding="utf-8") as f:
108
  return json.load(f)
109
 
110
- def _load_materials(art_dir: str, folds_ft):
111
- try:
112
- return _load_json_like(art_dir, "materials")["materials"]
113
- except FileNotFoundError:
114
- return folds_ft[0]["materials"]
115
-
116
- def _load_best_alpha(art_dir: str) -> float:
117
- return float(_load_json_like(art_dir, "blend_alpha")["best_alpha"])
118
-
119
  # =========================
120
  # Predictor
121
  # =========================
@@ -124,40 +118,22 @@ class BlendPredictor:
124
  self.art_dir = art_dir
125
  self.folds_ft = _load_ft_folds(art_dir)
126
  self.boosters = _load_lgbm_folds(art_dir)
127
- self.materials = _load_materials(art_dir, self.folds_ft)
128
- self.best_alpha = _load_best_alpha(art_dir)
129
 
130
- self.materials_canon = _canonize_list(self.materials)
131
- self.alias2canon = _build_alias2canon(self.materials_canon)
132
- self.mat2id = {m: i for i, m in enumerate(self.materials_canon)}
133
  self.unknown_policy = unknown_policy
134
 
135
  def _prep_df(self, df_new: pd.DataFrame) -> pd.DataFrame:
136
  df = df_new.copy()
137
- need = [CAT_COL] + NUM_COLS
138
- missing = [c for c in need if c not in df.columns]
139
- if missing:
140
- raise ValueError(f"Missing columns in input: {missing}")
141
-
142
  df[CAT_COL] = df[CAT_COL].astype(str).str.strip()
143
- df["_mat_canon"] = df[CAT_COL].map(self.alias2canon)
144
-
145
  if self.unknown_policy == "error":
146
- unknown = df.loc[df["_mat_canon"].isna(), CAT_COL].unique().tolist()
147
  if unknown:
148
- raise ValueError(
149
- f"Unknown materials in input {unknown}. "
150
- f"Known materials: {self.materials_canon[:10]}{' ...' if len(self.materials_canon)>10 else ''}"
151
- )
152
- df["_mat_id"] = df["_mat_canon"].map(self.mat2id).astype(int)
153
- else:
154
- df["_mat_canon"] = df["_mat_canon"].fillna(self.materials_canon[0])
155
- df["_mat_id"] = df["_mat_canon"].map(self.mat2id).astype(int)
156
-
157
  df[NUM_COLS] = df[NUM_COLS].apply(pd.to_numeric, errors="coerce")
158
- if df[NUM_COLS].isnull().any().any():
159
- bad = df[NUM_COLS].columns[df[NUM_COLS].isnull().any()].tolist()
160
- raise ValueError(f"Non-numeric values detected in columns: {bad}")
161
  return df
162
 
163
  def predict_ft(self, df_new: pd.DataFrame) -> np.ndarray:
@@ -176,7 +152,7 @@ class BlendPredictor:
176
  def predict_lgbm(self, df_new: pd.DataFrame) -> np.ndarray:
177
  df = self._prep_df(df_new)
178
  X = df[[CAT_COL] + NUM_COLS].copy()
179
- X[CAT_COL] = pd.Categorical(df["_mat_canon"], categories=self.materials_canon)
180
  preds = [bst.predict(X, num_iteration=getattr(bst, "best_iteration", None))
181
  for bst in self.boosters]
182
  return np.mean(preds, axis=0)
@@ -188,24 +164,32 @@ class BlendPredictor:
188
  p_lgb = self.predict_lgbm(df_new)
189
  return alpha * p_dl + (1 - alpha) * p_lgb
190
 
191
- # =========================
192
- # Example run
193
- # =========================
194
- if __name__ == "__main__":
195
- base = {
196
- "thickness": 1, "diameter": 20, "degree": 73,
197
- "upper_radius": 3, "lower_radius": 2,
198
- "LB": 0, "RB": 1,
199
- }
200
- df_new = pd.DataFrame([
201
- {**base, "material": "590"},
202
- {**base, "material": "440"},
203
- ])
204
-
205
- predictor = BlendPredictor(ART_DIR, unknown_policy="error")
206
- print("materials (trained):", predictor.materials_canon[:10])
207
- print("best_alpha:", predictor.best_alpha)
208
-
209
- print("\nDL only :", predictor.predict_blend(df_new, alpha=1.0))
210
- print("LGBM only:", predictor.predict_blend(df_new, alpha=0.0))
211
- print("Blend :", predictor.predict_blend(df_new))
 
 
 
 
 
 
 
 
 
1
  # predict_blend.py
2
  import os, json, numpy as np, pandas as pd, torch, lightgbm as lgb
3
  import torch.nn as nn
4
+ from huggingface_hub import snapshot_download
5
  from pathlib import Path
6
+ from itertools import product
7
 
8
  # =========================
9
  # Config
10
  # =========================
11
+ MODEL_REPO = "Antonio0616/foemingstar-model" # ์ œ์ž๋‹˜ repo ID
12
+ BASE_DIR = Path(__file__).resolve().parent
13
+ MODEL_DIR = snapshot_download(repo_id=MODEL_REPO) # Hugging Face์—์„œ ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ
14
+ ART_DIR = str(Path(MODEL_DIR).resolve())
15
 
16
  CAT_COL = "material"
17
  NUM_COLS = ["thickness","diameter","degree","upper_radius","lower_radius","LB","RB"]
18
 
19
  # =========================
20
+ # FT-Transformer ์ •์˜
21
  # =========================
22
  class FTTransformer(nn.Module):
23
  def __init__(self, n_materials:int, n_num:int, d_model:int=192, nhead:int=8,
 
51
  h = self.encoder(tokens)
52
  return self.head(h[:, 0, :])
53
 
54
+ def _scale_like_fold(X_num: np.ndarray, mean: np.ndarray, scale: np.ndarray) -> np.ndarray:
55
+ return ((X_num - mean) / scale).astype(np.float32)
56
 
57
  # =========================
58
+ # ๋ชจ๋ธ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ ํ—ฌํผ
59
  # =========================
60
  def _first_existing(*paths):
61
  for p in paths:
 
110
  with open(p, "r", encoding="utf-8") as f:
111
  return json.load(f)
112
 
 
 
 
 
 
 
 
 
 
113
  # =========================
114
  # Predictor
115
  # =========================
 
118
  self.art_dir = art_dir
119
  self.folds_ft = _load_ft_folds(art_dir)
120
  self.boosters = _load_lgbm_folds(art_dir)
121
+ self.materials = _load_json_like(art_dir, "materials")["materials"]
122
+ self.best_alpha = float(_load_json_like(art_dir, "blend_alpha")["best_alpha"])
123
 
124
+ self.materials = [str(m).strip() for m in self.materials]
125
+ self.mat2id = {m: i for i, m in enumerate(self.materials)}
 
126
  self.unknown_policy = unknown_policy
127
 
128
  def _prep_df(self, df_new: pd.DataFrame) -> pd.DataFrame:
129
  df = df_new.copy()
 
 
 
 
 
130
  df[CAT_COL] = df[CAT_COL].astype(str).str.strip()
 
 
131
  if self.unknown_policy == "error":
132
+ unknown = df.loc[~df[CAT_COL].isin(self.materials), CAT_COL].unique().tolist()
133
  if unknown:
134
+ raise ValueError(f"Unknown materials in input {unknown}")
135
+ df["_mat_id"] = df[CAT_COL].map(self.mat2id).fillna(0).astype(int)
 
 
 
 
 
 
 
136
  df[NUM_COLS] = df[NUM_COLS].apply(pd.to_numeric, errors="coerce")
 
 
 
137
  return df
138
 
139
  def predict_ft(self, df_new: pd.DataFrame) -> np.ndarray:
 
152
  def predict_lgbm(self, df_new: pd.DataFrame) -> np.ndarray:
153
  df = self._prep_df(df_new)
154
  X = df[[CAT_COL] + NUM_COLS].copy()
155
+ X[CAT_COL] = pd.Categorical(df[CAT_COL], categories=self.materials)
156
  preds = [bst.predict(X, num_iteration=getattr(bst, "best_iteration", None))
157
  for bst in self.boosters]
158
  return np.mean(preds, axis=0)
 
164
  p_lgb = self.predict_lgbm(df_new)
165
  return alpha * p_dl + (1 - alpha) * p_lgb
166
 
167
+ # โœ… ๋ฒ”์œ„ ์ž…๋ ฅ โ†’ ๊ฒฝ์šฐ์˜ ์ˆ˜ ์ƒ์„ฑ ์ง€์›
168
+ def expand_ranges(self, cfg: dict) -> pd.DataFrame:
169
+ # cfg: {"material": ["SPCC"], "min_thickness": 0.7, "max_thickness": 1.2, "thickness_step": 0.1, ...}
170
+ keys = []
171
+ values = []
172
+
173
+ # ๋ฒ”์ฃผํ˜•
174
+ keys.append("material")
175
+ values.append(cfg["materials"])
176
+
177
+ # ์—ฐ์†ํ˜•
178
+ for col in ["thickness","diameter","degree","upper_radius","lower_radius"]:
179
+ lo = cfg[f"min_{col}"]
180
+ hi = cfg[f"max_{col}"]
181
+ step = cfg[f"{col}_step"]
182
+ values.append(np.arange(lo, hi+1e-9, step).round(3))
183
+ keys.append(col)
184
+
185
+ # ๋น„๋“œ (LB, RB ๋ณ€ํ™˜)
186
+ bead_map = {
187
+ "none": (0,0), "left": (1,0), "right": (0,1), "double": (1,1)
188
+ }
189
+ beads = [bead_map[b] for b in cfg.get("beads", ["none"])]
190
+ LB, RB = zip(*beads)
191
+ keys.extend(["LB","RB"])
192
+ values.extend([LB, RB])
193
+
194
+ combos = [dict(zip(keys, v)) for v in product(*values)]
195
+ return pd.DataFrame(combos)