Spaces:
Sleeping
Sleeping
| """Input normalization for structural mechanics features. | |
| Physical quantities span wildly different scales (Poisson's ratio ~0.3 vs | |
| elastic modulus ~200e9). Strategy: log-transform extensive quantities that | |
| span orders of magnitude, then standardize all features to zero mean, | |
| unit variance. This is critical for stable neural network training. | |
| """ | |
| import json | |
| from pathlib import Path | |
| from typing import Optional | |
| import numpy as np | |
| import torch | |
| # Features that span orders of magnitude and should be log-transformed | |
| LOG_TRANSFORM_FEATURES = { | |
| "length", "width", "height", "inner_radius", "outer_radius", "thickness", | |
| "elastic_modulus", "yield_strength", "density", | |
| "point_load", "distributed_load", "internal_pressure", "pressure", | |
| "moment_of_inertia", "section_modulus", "cross_section_area", | |
| } | |
| # Features that are already on a reasonable scale (keep linear) | |
| LINEAR_FEATURES = {"poisson_ratio"} | |
| # One-hot encoded categorical feature | |
| CATEGORICAL_FEATURE = "config_id" | |
| # All config IDs in deterministic order | |
| CONFIG_IDS = [ | |
| "beam_ss_point", "beam_ss_udl", | |
| "beam_cantilever_point", "beam_cantilever_udl", | |
| "beam_fixed_point", "beam_fixed_udl", | |
| "plate_ss_uniform", "plate_fixed_uniform", | |
| "vessel_cylinder", "vessel_sphere", | |
| ] | |
| class LogTransformStandardizer: | |
| """Two-stage normalization: log-transform then standardize. | |
| Stores normalization parameters as model artifacts for reproducible | |
| inference. All parameters are JSON-serializable for deployment. | |
| """ | |
| def __init__(self) -> None: | |
| self.feature_names: list[str] = [] | |
| self.means: Optional[np.ndarray] = None | |
| self.stds: Optional[np.ndarray] = None | |
| self.log_mask: Optional[np.ndarray] = None | |
| self._fitted = False | |
| def input_dim(self) -> int: | |
| """Total input dimension after one-hot encoding.""" | |
| if not self._fitted: | |
| raise RuntimeError("Call fit() before accessing input_dim") | |
| return len(self.feature_names) + len(CONFIG_IDS) | |
| def fit(self, features: dict[str, np.ndarray], config_ids: np.ndarray) -> "LogTransformStandardizer": | |
| """Compute normalization parameters from training data. | |
| Args: | |
| features: Dict mapping feature name to 1D array of values. | |
| NaN values are replaced with 0 before transformation. | |
| config_ids: Array of config_id strings for one-hot encoding. | |
| """ | |
| self.feature_names = sorted(features.keys()) | |
| n_features = len(self.feature_names) | |
| # Build log-transform mask | |
| self.log_mask = np.array([ | |
| name in LOG_TRANSFORM_FEATURES for name in self.feature_names | |
| ]) | |
| # Stack features into matrix | |
| matrix = np.column_stack([features[name] for name in self.feature_names]) | |
| # Replace NaN with 0 (for optional features like inner_radius on beams) | |
| matrix = np.nan_to_num(matrix, nan=0.0) | |
| # Apply log10 to selected features (add epsilon for zero values) | |
| log_matrix = matrix.copy() | |
| for i in range(n_features): | |
| if self.log_mask[i]: | |
| col = matrix[:, i] | |
| col = np.where(col > 0, col, 1e-30) # avoid log(0) | |
| log_matrix[:, i] = np.log10(col) | |
| # Compute mean and std on transformed features | |
| self.means = log_matrix.mean(axis=0) | |
| self.stds = log_matrix.std(axis=0) | |
| self.stds = np.where(self.stds > 0, self.stds, 1.0) # avoid division by zero | |
| self._fitted = True | |
| return self | |
| def transform( | |
| self, | |
| features: dict[str, np.ndarray], | |
| config_ids: np.ndarray, | |
| ) -> torch.Tensor: | |
| """Transform raw features to normalized tensor. | |
| Returns: | |
| Tensor of shape (n_samples, input_dim) ready for model input. | |
| """ | |
| if not self._fitted: | |
| raise RuntimeError("Call fit() before transform()") | |
| n_samples = len(next(iter(features.values()))) | |
| # Stack numeric features | |
| matrix = np.column_stack([features[name] for name in self.feature_names]) | |
| matrix = np.nan_to_num(matrix, nan=0.0) | |
| # Log-transform | |
| for i in range(len(self.feature_names)): | |
| if self.log_mask[i]: | |
| col = matrix[:, i] | |
| col = np.where(col > 0, col, 1e-30) | |
| matrix[:, i] = np.log10(col) | |
| # Standardize | |
| matrix = (matrix - self.means) / self.stds | |
| # One-hot encode config_id | |
| config_onehot = np.zeros((n_samples, len(CONFIG_IDS)), dtype=np.float32) | |
| config_to_idx = {c: i for i, c in enumerate(CONFIG_IDS)} | |
| for row_idx, cid in enumerate(config_ids): | |
| if cid in config_to_idx: | |
| config_onehot[row_idx, config_to_idx[cid]] = 1.0 | |
| # Concatenate: [numeric_features | config_onehot] | |
| combined = np.concatenate([matrix.astype(np.float32), config_onehot], axis=1) | |
| return torch.from_numpy(combined) | |
| def save(self, path: Path) -> None: | |
| """Save normalization parameters to JSON.""" | |
| data = { | |
| "feature_names": self.feature_names, | |
| "means": self.means.tolist(), | |
| "stds": self.stds.tolist(), | |
| "log_mask": self.log_mask.tolist(), | |
| "config_ids": CONFIG_IDS, | |
| } | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| with open(path, "w") as f: | |
| json.dump(data, f, indent=2) | |
| def load(cls, path: Path) -> "LogTransformStandardizer": | |
| """Load normalization parameters from JSON.""" | |
| with open(path) as f: | |
| data = json.load(f) | |
| obj = cls() | |
| obj.feature_names = data["feature_names"] | |
| obj.means = np.array(data["means"]) | |
| obj.stds = np.array(data["stds"]) | |
| obj.log_mask = np.array(data["log_mask"]) | |
| obj._fitted = True | |
| return obj | |