Spaces:
Running
fix: remaining v3 bugs - TFT n_feat detection, simulate URL, features from artifacts
Browse files- model_registry: _detect_n_feat now checks var_selection.softmax_proj.bias FIRST
(before lstm.weight_ih_l0) so TFT detection is correct: TFT's LSTM input is
d_model (64) not n_features, so the old order returned 64 instead of 12/18,
causing Missing key errors for feature_embeddings.12..63 on every boot
- model_registry: add _NON_FEATURE_COLS constant for CSV-to-feature filtering
- model_registry: add self.feature_cols (populated by load_all -> _load_feature_cols)
- model_registry: _load_feature_cols() reads battery_features.csv from
artifacts/{version}/features/, strips non-feature columns, validates against
scaler.n_features_in_; falls back to FEATURE_COLS_SCALAR for v1/v2/missing
- model_registry: _build_x now uses self.feature_cols (18 for v3, 12 for v1/v2)
so v3 classical models (all retrained with 18 features) get correct input
- api.ts: fix /api/api/v3/simulate -> /v3/simulate (baseApi already has /api prefix,
so prepending /api caused double prefix and 405 on every simulate call)
- itransformer.py: add build() stub to FeatureWiseMHA, TokenWiseMHA,
Conv1DFeedForward, DynamicGraphConv to silence Keras unbuilt-state warnings
- api/model_registry.py +53 -7
- frontend/src/api.ts +1 -1
- src/models/deep/itransformer.py +12 -0
|
@@ -70,6 +70,13 @@ FEATURE_COLS_SCALAR: list[str] = [
|
|
| 70 |
"cycle_duration", "Re", "Rct", "delta_capacity",
|
| 71 |
]
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
# ββ Model catalog (single source of truth for versions & metadata) ββββββββββββ
|
| 74 |
MODEL_CATALOG: dict[str, dict[str, Any]] = {
|
| 75 |
"random_forest": {"version": "3.0.0", "display_name": "Random Forest", "family": "classical", "algorithm": "RandomForestRegressor", "target": "soh", "r2": 0.9814},
|
|
@@ -160,6 +167,7 @@ class ModelRegistry:
|
|
| 160 |
self.scaler = None # kept for backward compat
|
| 161 |
self.linear_scaler = None # StandardScaler for Ridge/Lasso/SVR/KNN
|
| 162 |
self.sequence_scaler = None # StandardScaler for sequence deep models
|
|
|
|
| 163 |
self.device = "cpu"
|
| 164 |
self.version = version
|
| 165 |
# Set version-aware paths
|
|
@@ -180,6 +188,7 @@ class ModelRegistry:
|
|
| 180 |
return
|
| 181 |
self._detect_device()
|
| 182 |
self._load_scaler()
|
|
|
|
| 183 |
self._load_classical()
|
| 184 |
self._load_deep_pytorch()
|
| 185 |
self._load_deep_keras()
|
|
@@ -232,6 +241,11 @@ class ModelRegistry:
|
|
| 232 |
try:
|
| 233 |
import torch
|
| 234 |
state = torch.load(p, map_location="cpu", weights_only=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
# LSTM / BiLSTM / GRU: weight_ih_l0 shape is (gates*hidden, n_feat)
|
| 236 |
for key in ("lstm.weight_ih_l0", "encoder_lstm.weight_ih_l0", "gru.weight_ih_l0"):
|
| 237 |
if key in state:
|
|
@@ -239,9 +253,6 @@ class ModelRegistry:
|
|
| 239 |
# BatteryGPT: input_proj.weight shape is (d_model, n_feat)
|
| 240 |
if "input_proj.weight" in state:
|
| 241 |
return int(state["input_proj.weight"].shape[-1])
|
| 242 |
-
# TFT: softmax_proj.bias shape is (n_features,)
|
| 243 |
-
if "var_selection.softmax_proj.bias" in state:
|
| 244 |
-
return int(state["var_selection.softmax_proj.bias"].shape[0])
|
| 245 |
except Exception:
|
| 246 |
pass
|
| 247 |
return _N_FEAT
|
|
@@ -437,6 +448,40 @@ class ModelRegistry:
|
|
| 437 |
else:
|
| 438 |
log.warning("sequence_scaler.joblib not found β deep models will use raw features")
|
| 439 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 440 |
def _choose_default(self) -> None:
|
| 441 |
"""Select the highest-quality loaded model as the registry default."""
|
| 442 |
priority = [
|
|
@@ -517,11 +562,12 @@ class ModelRegistry:
|
|
| 517 |
def _build_x(self, features: dict[str, float]) -> np.ndarray:
|
| 518 |
"""Build raw (1, F) feature numpy array β NO scaling applied here.
|
| 519 |
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
|
|
|
| 523 |
"""
|
| 524 |
-
return np.array([[features.get(c, 0.0) for c in
|
| 525 |
|
| 526 |
@staticmethod
|
| 527 |
def _x_for_model(model: Any, x: np.ndarray) -> Any:
|
|
|
|
| 70 |
"cycle_duration", "Re", "Rct", "delta_capacity",
|
| 71 |
]
|
| 72 |
|
| 73 |
+
# Columns present in the features CSV that are NOT model inputs
|
| 74 |
+
# (targets, identifiers, or derived columns excluded from training)
|
| 75 |
+
_NON_FEATURE_COLS: frozenset[str] = frozenset({
|
| 76 |
+
"battery_id", "Capacity", "datetime", "SoH", "RUL",
|
| 77 |
+
"degradation_state", "soh_rolling_mean",
|
| 78 |
+
})
|
| 79 |
+
|
| 80 |
# ββ Model catalog (single source of truth for versions & metadata) ββββββββββββ
|
| 81 |
MODEL_CATALOG: dict[str, dict[str, Any]] = {
|
| 82 |
"random_forest": {"version": "3.0.0", "display_name": "Random Forest", "family": "classical", "algorithm": "RandomForestRegressor", "target": "soh", "r2": 0.9814},
|
|
|
|
| 167 |
self.scaler = None # kept for backward compat
|
| 168 |
self.linear_scaler = None # StandardScaler for Ridge/Lasso/SVR/KNN
|
| 169 |
self.sequence_scaler = None # StandardScaler for sequence deep models
|
| 170 |
+
self.feature_cols: list[str] = list(FEATURE_COLS_SCALAR) # updated by load_all
|
| 171 |
self.device = "cpu"
|
| 172 |
self.version = version
|
| 173 |
# Set version-aware paths
|
|
|
|
| 188 |
return
|
| 189 |
self._detect_device()
|
| 190 |
self._load_scaler()
|
| 191 |
+
self.feature_cols = self._load_feature_cols()
|
| 192 |
self._load_classical()
|
| 193 |
self._load_deep_pytorch()
|
| 194 |
self._load_deep_keras()
|
|
|
|
| 241 |
try:
|
| 242 |
import torch
|
| 243 |
state = torch.load(p, map_location="cpu", weights_only=True)
|
| 244 |
+
# TFT-specific check MUST come first: TFT also has lstm.weight_ih_l0
|
| 245 |
+
# but its LSTM takes d_model (not n_feat) as input, causing wrong detection.
|
| 246 |
+
# softmax_proj.bias shape is (n_features,) β the true feature count.
|
| 247 |
+
if "var_selection.softmax_proj.bias" in state:
|
| 248 |
+
return int(state["var_selection.softmax_proj.bias"].shape[0])
|
| 249 |
# LSTM / BiLSTM / GRU: weight_ih_l0 shape is (gates*hidden, n_feat)
|
| 250 |
for key in ("lstm.weight_ih_l0", "encoder_lstm.weight_ih_l0", "gru.weight_ih_l0"):
|
| 251 |
if key in state:
|
|
|
|
| 253 |
# BatteryGPT: input_proj.weight shape is (d_model, n_feat)
|
| 254 |
if "input_proj.weight" in state:
|
| 255 |
return int(state["input_proj.weight"].shape[-1])
|
|
|
|
|
|
|
|
|
|
| 256 |
except Exception:
|
| 257 |
pass
|
| 258 |
return _N_FEAT
|
|
|
|
| 448 |
else:
|
| 449 |
log.warning("sequence_scaler.joblib not found β deep models will use raw features")
|
| 450 |
|
| 451 |
+
def _load_feature_cols(self) -> list[str]:
|
| 452 |
+
"""Discover feature column names from artifacts features CSV.
|
| 453 |
+
|
| 454 |
+
Reads the features CSV for this version (if present), drops known
|
| 455 |
+
non-feature columns (targets, identifiers, derived labels), and
|
| 456 |
+
validates the count against the loaded scaler's ``n_features_in_``.
|
| 457 |
+
Falls back to the module-level ``FEATURE_COLS_SCALAR`` list.
|
| 458 |
+
"""
|
| 459 |
+
features_dir = self._artifacts / "features"
|
| 460 |
+
for fname in ("battery_features.csv", "train_split.csv"):
|
| 461 |
+
fpath = features_dir / fname
|
| 462 |
+
if not fpath.exists():
|
| 463 |
+
continue
|
| 464 |
+
try:
|
| 465 |
+
all_cols = pd.read_csv(fpath, nrows=0).columns.tolist()
|
| 466 |
+
feat_cols = [c for c in all_cols if c not in _NON_FEATURE_COLS]
|
| 467 |
+
n_expected = getattr(self.linear_scaler, "n_features_in_", None)
|
| 468 |
+
if n_expected and len(feat_cols) != n_expected:
|
| 469 |
+
log.warning(
|
| 470 |
+
"Feature col count mismatch: CSV=%d, scaler=%d β using scaler count",
|
| 471 |
+
len(feat_cols), n_expected,
|
| 472 |
+
)
|
| 473 |
+
feat_cols = feat_cols[:n_expected]
|
| 474 |
+
if feat_cols:
|
| 475 |
+
log.info(
|
| 476 |
+
"Feature columns loaded from artifacts (%d cols, source: %s)",
|
| 477 |
+
len(feat_cols), fname,
|
| 478 |
+
)
|
| 479 |
+
return feat_cols
|
| 480 |
+
except Exception as exc:
|
| 481 |
+
log.warning("Could not load feature cols from %s: %s", fname, exc)
|
| 482 |
+
log.info("Using default FEATURE_COLS_SCALAR (%d features)", len(FEATURE_COLS_SCALAR))
|
| 483 |
+
return list(FEATURE_COLS_SCALAR)
|
| 484 |
+
|
| 485 |
def _choose_default(self) -> None:
|
| 486 |
"""Select the highest-quality loaded model as the registry default."""
|
| 487 |
priority = [
|
|
|
|
| 562 |
def _build_x(self, features: dict[str, float]) -> np.ndarray:
|
| 563 |
"""Build raw (1, F) feature numpy array β NO scaling applied here.
|
| 564 |
|
| 565 |
+
Uses ``self.feature_cols`` which is populated from the artifacts features
|
| 566 |
+
CSV at startup, falling back to ``FEATURE_COLS_SCALAR``. Unknown feature
|
| 567 |
+
keys (e.g. v3-only engineered features not sent by the frontend) default
|
| 568 |
+
to 0.0 and are zero-padded by the deep-sequence builders as needed.
|
| 569 |
"""
|
| 570 |
+
return np.array([[features.get(c, 0.0) for c in self.feature_cols]])
|
| 571 |
|
| 572 |
@staticmethod
|
| 573 |
def _x_for_model(model: Any, x: np.ndarray) -> Any:
|
|
@@ -210,4 +210,4 @@ export interface SimulateResponse {
|
|
| 210 |
}
|
| 211 |
|
| 212 |
export const simulateBatteries = (req: SimulateRequest) =>
|
| 213 |
-
baseApi.post<SimulateResponse>("/
|
|
|
|
| 210 |
}
|
| 211 |
|
| 212 |
export const simulateBatteries = (req: SimulateRequest) =>
|
| 213 |
+
baseApi.post<SimulateResponse>("/v3/simulate", req).then((r) => r.data);
|
|
@@ -41,6 +41,9 @@ class FeatureWiseMHA(layers.Layer):
|
|
| 41 |
self.norm = layers.LayerNormalization()
|
| 42 |
self.dropout = layers.Dropout(dropout)
|
| 43 |
|
|
|
|
|
|
|
|
|
|
| 44 |
def call(self, x, training=False):
|
| 45 |
# x: (B, T, F) β transpose to (B, F, T) for feature-wise attention
|
| 46 |
x_t = tf.transpose(x, perm=[0, 2, 1]) # (B, F, T)
|
|
@@ -61,6 +64,9 @@ class TokenWiseMHA(layers.Layer):
|
|
| 61 |
self.norm = layers.LayerNormalization()
|
| 62 |
self.dropout = layers.Dropout(dropout)
|
| 63 |
|
|
|
|
|
|
|
|
|
|
| 64 |
def call(self, x, training=False):
|
| 65 |
attn = self.mha(x, x, training=training)
|
| 66 |
attn = self.dropout(attn, training=training)
|
|
@@ -78,6 +84,9 @@ class Conv1DFeedForward(layers.Layer):
|
|
| 78 |
self.norm = layers.LayerNormalization()
|
| 79 |
self.dropout = layers.Dropout(dropout)
|
| 80 |
|
|
|
|
|
|
|
|
|
|
| 81 |
def call(self, x, training=False):
|
| 82 |
ff = self.conv1(x)
|
| 83 |
ff = self.dropout(ff, training=training)
|
|
@@ -206,6 +215,9 @@ class DynamicGraphConv(layers.Layer):
|
|
| 206 |
self.proj = layers.Dense(d_model)
|
| 207 |
self.norm = layers.LayerNormalization()
|
| 208 |
|
|
|
|
|
|
|
|
|
|
| 209 |
def call(self, x, training=False):
|
| 210 |
"""
|
| 211 |
x: (B, T, F) β compute feature correlation matrix (F, F) as adjacency
|
|
|
|
| 41 |
self.norm = layers.LayerNormalization()
|
| 42 |
self.dropout = layers.Dropout(dropout)
|
| 43 |
|
| 44 |
+
def build(self, input_shape):
|
| 45 |
+
super().build(input_shape)
|
| 46 |
+
|
| 47 |
def call(self, x, training=False):
|
| 48 |
# x: (B, T, F) β transpose to (B, F, T) for feature-wise attention
|
| 49 |
x_t = tf.transpose(x, perm=[0, 2, 1]) # (B, F, T)
|
|
|
|
| 64 |
self.norm = layers.LayerNormalization()
|
| 65 |
self.dropout = layers.Dropout(dropout)
|
| 66 |
|
| 67 |
+
def build(self, input_shape):
|
| 68 |
+
super().build(input_shape)
|
| 69 |
+
|
| 70 |
def call(self, x, training=False):
|
| 71 |
attn = self.mha(x, x, training=training)
|
| 72 |
attn = self.dropout(attn, training=training)
|
|
|
|
| 84 |
self.norm = layers.LayerNormalization()
|
| 85 |
self.dropout = layers.Dropout(dropout)
|
| 86 |
|
| 87 |
+
def build(self, input_shape):
|
| 88 |
+
super().build(input_shape)
|
| 89 |
+
|
| 90 |
def call(self, x, training=False):
|
| 91 |
ff = self.conv1(x)
|
| 92 |
ff = self.dropout(ff, training=training)
|
|
|
|
| 215 |
self.proj = layers.Dense(d_model)
|
| 216 |
self.norm = layers.LayerNormalization()
|
| 217 |
|
| 218 |
+
def build(self, input_shape):
|
| 219 |
+
super().build(input_shape)
|
| 220 |
+
|
| 221 |
def call(self, x, training=False):
|
| 222 |
"""
|
| 223 |
x: (B, T, F) β compute feature correlation matrix (F, F) as adjacency
|