Spaces:

DurstewitzLab
/

DynaMix

Running

App Files Files Community

Christoph Hemmer commited on Nov 13, 2025

Commit

74fc85f

1 Parent(s): 694a9cb

change power transformation

Browse files

Files changed (4) hide show

dynamix/dynamix.py +1 -2
dynamix/forecaster.py +2 -2
dynamix/preprocessing.py +21 -23
dynamix/preprocessing_utilities.py +16 -76

dynamix/dynamix.py CHANGED Viewed

@@ -208,7 +208,6 @@ class DynaMix(nn.Module):
             z: Latent state of shape (M, batch_size)
             context: Context data of shape (seq_length, batch_size, N)
             precomputed_cnn: Optional precomputed CNN features to avoid redundant computation for inference
-                            Shape should be (seq_length-1, batch_size, N)
         Returns:
             Updated latent state
@@ -223,7 +222,7 @@ class DynaMix(nn.Module):
             context: Context data of shape (seq_length, batch_size, N)
         Returns:
-            Precomputed CNN features of shape (seq_length-1, batch_size, N)
         """
         # Process context with convolution
         context_for_conv = context.permute(1, 2, 0)

             z: Latent state of shape (M, batch_size)
             context: Context data of shape (seq_length, batch_size, N)
             precomputed_cnn: Optional precomputed CNN features to avoid redundant computation for inference
         Returns:
             Updated latent state
             context: Context data of shape (seq_length, batch_size, N)
         Returns:
+            Precomputed CNN features
         """
         # Process context with convolution
         context_for_conv = context.permute(1, 2, 0)

dynamix/forecaster.py CHANGED Viewed

@@ -139,7 +139,7 @@ class DynaMixForecaster:
         Efficient batched forecasting with the DynaMix model.
         This method implements a complete forecasting pipeline including:
-        - Data preprocessing (Box-Cox, detrending, standardization)
         - Embedding techniques for dimensionality matching
         - DynaMix model prediction
         - Data postprocessing (inverse transformations)
@@ -168,7 +168,7 @@ class DynaMixForecaster:
         # Create data preprocessor
         preprocessor = DataPreprocessor(
             standardize=standardize,
-            box_cox=fit_nonstationary,
             detrending=fit_nonstationary,
             preprocessing_method=preprocessing_method
         )

         Efficient batched forecasting with the DynaMix model.
         This method implements a complete forecasting pipeline including:
+        - Data preprocessing (power transformation, detrending, standardization)
         - Embedding techniques for dimensionality matching
         - DynaMix model prediction
         - Data postprocessing (inverse transformations)
         # Create data preprocessor
         preprocessor = DataPreprocessor(
             standardize=standardize,
+            power_transform=fit_nonstationary,
             detrending=fit_nonstationary,
             preprocessing_method=preprocessing_method
         )

dynamix/preprocessing.py CHANGED Viewed

@@ -1,32 +1,32 @@
 import torch
 import numpy as np
 from .preprocessing_utilities import (TimeSeriesProcessor, Embedding,
-                                    BoxCoxTransformer, Detrending, estimate_initial_condition)
 class DataPreprocessor:
     """
     Main class for data preprocessing that orchestrates all transformations.
     """
-    def __init__(self, standardize=True, box_cox=False, detrending=False, preprocessing_method="pos_embedding"):
         """
         Initialize the data preprocessor.
         Args:
             standardize: Whether to standardize the data
-            box_cox: Whether to apply Box-Cox transformation
             detrending: Whether to apply exponential detrending
             preprocessing_method: Method for embedding ('pos_embedding', 'zero_embedding',
                                   'delay_embedding', 'delay_embedding_random')
         """
         self.standardize = standardize
-        self.box_cox = box_cox
         self.detrending = detrending
         self.preprocessing_method = preprocessing_method
         # Parameters for inverse transformations
-        self.box_cox_params_list = None
         self.detrending_params_list = None
         self.transformation_mean = None
         self.transformation_std = None
@@ -40,7 +40,7 @@ class DataPreprocessor:
     def _apply_transformations(self, context):
         """
-        Apply Box-Cox transformation and/or detrending to each batch in the context data.
         Args:
             context: Context data tensor of shape (seq_length, batch_size, N_data)
@@ -52,21 +52,19 @@ class DataPreprocessor:
         self.original_context = context.clone()
         # Before transformations standardize data
-        if self.box_cox or self.detrending:
             self.transformation_mean = torch.mean(context, dim=0)
             self.transformation_std = torch.std(context, dim=0)
             context = (context - self.transformation_mean.unsqueeze(0)) / self.transformation_std.unsqueeze(0)
-        # Apply Box-Cox transformation for each batch
-        if self.box_cox:
             transformed_context = torch.zeros_like(context)
-            self.box_cox_params_list = []
             for b in range(self.batch_size):
                 batch_context = context[:, b, :]
-                transformed, params = BoxCoxTransformer.transform(batch_context)
                 transformed_context[:, b, :] = transformed
-                self.box_cox_params_list.append(params)
             context = transformed_context
@@ -87,7 +85,7 @@ class DataPreprocessor:
     def _apply_transformations_inverse(self, output):
         """
-        Apply inverse Box-Cox and detrending transformations.
         Args:
             output: Model output of shape (T, batch_size, N)
@@ -103,11 +101,11 @@ class DataPreprocessor:
                 batch_output = Detrending.apply_detrending_inverse(batch_context, batch_output, self.detrending_params_list[b])
                 output[:, b, :] = batch_output
-        # Apply inverse Box-Cox transformation for each batch
-        if self.box_cox and self.box_cox_params_list is not None:
             for b in range(self.batch_size):
                 batch_output = output[:, b, :]
-                batch_output = BoxCoxTransformer.inverse_transform(batch_output, self.box_cox_params_list[b])
                 output[:, b, :] = batch_output
         # Apply inverse standardization if transformation was applied
@@ -193,17 +191,17 @@ class DataPreprocessor:
             Initial condition for forecasting
         Raises:
-            ValueError: If initial condition is provided with Box-Cox or detrending enabled
         """
         if initial_x is None:
             # Use last context value for each batch
             return context_embedded[-1]
-        # Raise error if initial condition is provided with Box-Cox or detrending enabled
-        if (self.box_cox or self.detrending):
             raise ValueError(
-                "Using initial conditions with Box-Cox or detrending is not supported. "
-                "Either disable Box-Cox and detrending or do not provide an initial condition."
             )
         # Process initial conditions for each batch
@@ -243,7 +241,7 @@ class DataPreprocessor:
         self.batch_size = context.shape[1]
         self.feature_dim = context.shape[2]
-        # Apply transformations (Box-Cox, detrending)
         context = self._apply_transformations(context)
         # Standardize data
@@ -270,7 +268,7 @@ class DataPreprocessor:
         # Undo standardization
         output = self._unstandardize_data(output)
-        # Apply inverse transformations (Box-Cox, detrending)
         output = self._apply_transformations_inverse(output)
         return output

 import torch
 import numpy as np
 from .preprocessing_utilities import (TimeSeriesProcessor, Embedding,
+                                    PowerTransformer, Detrending, estimate_initial_condition)
 class DataPreprocessor:
     """
     Main class for data preprocessing that orchestrates all transformations.
     """
+    def __init__(self, standardize=True, power_transform=False, detrending=False, preprocessing_method="pos_embedding"):
         """
         Initialize the data preprocessor.
         Args:
             standardize: Whether to standardize the data
+            power_transform: Whether to apply power transformation
             detrending: Whether to apply exponential detrending
             preprocessing_method: Method for embedding ('pos_embedding', 'zero_embedding',
                                   'delay_embedding', 'delay_embedding_random')
         """
         self.standardize = standardize
+        self.power_transform = power_transform
         self.detrending = detrending
         self.preprocessing_method = preprocessing_method
         # Parameters for inverse transformations
         self.detrending_params_list = None
+        self.power_transformer = PowerTransformer()
         self.transformation_mean = None
         self.transformation_std = None
     def _apply_transformations(self, context):
         """
+        Apply power transformation and/or detrending to each batch in the context data.
         Args:
             context: Context data tensor of shape (seq_length, batch_size, N_data)
         self.original_context = context.clone()
         # Before transformations standardize data
+        if self.power_transform or self.detrending:
             self.transformation_mean = torch.mean(context, dim=0)
             self.transformation_std = torch.std(context, dim=0)
             context = (context - self.transformation_mean.unsqueeze(0)) / self.transformation_std.unsqueeze(0)
+        # Apply power transformation for each batch
+        if self.power_transform:
             transformed_context = torch.zeros_like(context)
             for b in range(self.batch_size):
                 batch_context = context[:, b, :]
+                transformed = self.power_transformer.transform(batch_context)
                 transformed_context[:, b, :] = transformed
             context = transformed_context
     def _apply_transformations_inverse(self, output):
         """
+        Apply inverse power transformation and detrending transformations.
         Args:
             output: Model output of shape (T, batch_size, N)
                 batch_output = Detrending.apply_detrending_inverse(batch_context, batch_output, self.detrending_params_list[b])
                 output[:, b, :] = batch_output
+        # Apply inverse power transformation for each batch
+        if self.power_transform:
             for b in range(self.batch_size):
                 batch_output = output[:, b, :]
+                batch_output = self.power_transformer.inverse_transform(batch_output)
                 output[:, b, :] = batch_output
         # Apply inverse standardization if transformation was applied
             Initial condition for forecasting
         Raises:
+            ValueError: If initial condition is provided with power transformation or detrending enabled
         """
         if initial_x is None:
             # Use last context value for each batch
             return context_embedded[-1]
+        # Raise error if initial condition is provided with power transformation or detrending enabled
+        if (self.power_transform or self.detrending):
             raise ValueError(
+                "Using initial conditions with power transformation or detrending is not supported. "
+                "Either disable power transformation and detrending or do not provide an initial condition."
             )
         # Process initial conditions for each batch
         self.batch_size = context.shape[1]
         self.feature_dim = context.shape[2]
+        # Apply transformations (power transformation, detrending)
         context = self._apply_transformations(context)
         # Standardize data
         # Undo standardization
         output = self._unstandardize_data(output)
+        # Apply inverse transformations (power transformation, detrending)
         output = self._apply_transformations_inverse(output)
         return output

dynamix/preprocessing_utilities.py CHANGED Viewed

@@ -6,6 +6,8 @@ import random
 from statsmodels.tsa.stattools import acf
 from scipy.ndimage import gaussian_filter1d
 from scipy import optimize
 class TimeSeriesProcessor:
@@ -274,28 +276,25 @@ class Embedding:
             raise ValueError(f"Unsupported embedding method: {method}")
-class BoxCoxTransformer:
     """
-    Applies Box-Cox transformation to data for variance stabilization.
     """
-    def __init__(self, lambda_range=(-2, 2)):
         """
-        Initialize BoxCoxTransformer.
         Args:
             lambda_range: Range for lambda parameter search
         """
-        self.lambda_range = lambda_range
-        self.params = None
-    @staticmethod
-    def transform(data, lambda_range=(-2, 2)):
         """
-        Apply Box-Cox transformation to data for stabilization
         Args:
             data: Input data tensor of shape (seq_length, N)
-            lambda_range: Range for lambda parameter search
         Returns:
             Transformed data and parameters for inverse transformation
@@ -303,53 +302,17 @@ class BoxCoxTransformer:
         # Convert to numpy
         data_np, is_torch, device, dtype = TimeSeriesProcessor.to_numpy(data)
-        seq_length, n_dims = data_np.shape
-        transformed_data = np.zeros_like(data_np)
-        box_cox_params = []
-        for dim in range(n_dims):
-            # Add constant to ensure positivity
-            if np.min(data_np[:, dim]) <= 0:
-                offset = abs(np.min(data_np[:, dim])) + 1.2
-                data_shifted = data_np[:, dim] + offset
-            else:
-                offset = 1.2
-                data_shifted = data_np[:, dim] + offset
-            try:
-                # Find optimal lambda for Box-Cox transformation
-                transformed, lambda_param = stats.boxcox(data_shifted)
-                # Limit lambda to a reasonable range to prevent numerical issues
-                lambda_param = max(min(lambda_param, 2.0), -2.0)
-                # Recalculate transformation with bounded lambda for consistency
-                if abs(lambda_param) < 1e-8:
-                    # For lambda near zero, use logarithmic transformation
-                    transformed = np.log(data_shifted)
-                else:
-                    transformed = (data_shifted ** lambda_param - 1) / lambda_param
-                # Store transformed data and parameters
-                transformed_data[:, dim] = transformed
-            except:
-                # If transformation fails, just use the original data
-                transformed_data[:, dim] = data_np[:, dim]
-                lambda_param = 1.0  # Identity transform
-            box_cox_params.append((lambda_param, offset))
         # Convert back to torch if needed
-        return TimeSeriesProcessor.to_torch(transformed_data, is_torch, device, dtype), box_cox_params
-    @staticmethod
-    def inverse_transform(data, box_cox_params):
         """
-        Apply inverse Box-Cox transformation
         Args:
             data: Transformed data tensor
-            box_cox_params: Parameters from Box-Cox transformation
         Returns:
             Original scale data
@@ -357,30 +320,7 @@ class BoxCoxTransformer:
         # Convert to numpy for computation
         data_np, is_torch, device, dtype = TimeSeriesProcessor.to_numpy(data)
-        seq_length, n_dims = data_np.shape
-        inverse_data = np.zeros_like(data_np)
-        for dim in range(min(n_dims, len(box_cox_params))):
-            lambda_param, offset = box_cox_params[dim]
-            # Apply inverse transformation
-            if abs(lambda_param) < 1e-8:
-                # For lambda near zero, the transformation is logarithmic
-                inverse_data[:, dim] = np.exp(data_np[:, dim]) - offset
-            elif abs(lambda_param - 1.0) < 1e-8:
-                # For lambda=1 (identity transform), just subtract offset
-                inverse_data[:, dim] = data_np[:, dim] - offset
-            else:
-                # For other lambda values
-                base = lambda_param * data_np[:, dim] + 1
-                # Simple clipping approach to ensure base is positive
-                # This avoids complex numbers while preserving most data characteristics
-                base = np.maximum(base, 1e-10)
-                # Apply power transformation
-                result = base ** (1/lambda_param)
-                inverse_data[:, dim] = result - offset
         # Convert back to torch if needed
         return TimeSeriesProcessor.to_torch(inverse_data, is_torch, device, dtype)
@@ -447,7 +387,7 @@ class Detrending:
             initial_params = [0.0, 1.0, data_np[0,dim]]
             # Bounds for parameters
-            bounds = [(None, None), (0.0, 3.0), (None, None)]
             # Optimize
             result = optimize.minimize(
@@ -462,7 +402,7 @@ class Detrending:
                     'maxcor': 10
                 }
             )
-            optimal_params = np.round(result.x, 3)
             # Calculate trend and detrend the data
             t = np.arange(1, seq_length + 1)

 from statsmodels.tsa.stattools import acf
 from scipy.ndimage import gaussian_filter1d
 from scipy import optimize
+from scipy.optimize import curve_fit
+import sklearn
 class TimeSeriesProcessor:
             raise ValueError(f"Unsupported embedding method: {method}")
+class PowerTransformer:
     """
+    Applies power transformation to data.
     """
+    def __init__(self):
         """
+        Initialize PowerTransformer.
         Args:
             lambda_range: Range for lambda parameter search
         """
+        self.power_transformer = sklearn.preprocessing.PowerTransformer(method='yeo-johnson', standardize=False)
+    def transform(self, data):
         """
+        Apply power transformation to data for stabilization
         Args:
             data: Input data tensor of shape (seq_length, N)
         Returns:
             Transformed data and parameters for inverse transformation
         # Convert to numpy
         data_np, is_torch, device, dtype = TimeSeriesProcessor.to_numpy(data)
+        transformed_data = self.power_transformer.fit_transform(data_np)
         # Convert back to torch if needed
+        return TimeSeriesProcessor.to_torch(transformed_data, is_torch, device, dtype)
+    def inverse_transform(self, data):
         """
+        Apply inverse power transformation
         Args:
             data: Transformed data tensor
         Returns:
             Original scale data
         # Convert to numpy for computation
         data_np, is_torch, device, dtype = TimeSeriesProcessor.to_numpy(data)
+        inverse_data = self.power_transformer.inverse_transform(data_np)
         # Convert back to torch if needed
         return TimeSeriesProcessor.to_torch(inverse_data, is_torch, device, dtype)
             initial_params = [0.0, 1.0, data_np[0,dim]]
             # Bounds for parameters
+            bounds = [(None, None), (None, None), (None, None)]
             # Optimize
             result = optimize.minimize(
                     'maxcor': 10
                 }
             )
+            optimal_params = np.round(result.x, 10)
             # Calculate trend and detrend the data
             t = np.arange(1, seq_length + 1)