Christoph Hemmer commited on
Commit
74fc85f
·
1 Parent(s): 694a9cb

change power transformation

Browse files
dynamix/dynamix.py CHANGED
@@ -208,7 +208,6 @@ class DynaMix(nn.Module):
208
  z: Latent state of shape (M, batch_size)
209
  context: Context data of shape (seq_length, batch_size, N)
210
  precomputed_cnn: Optional precomputed CNN features to avoid redundant computation for inference
211
- Shape should be (seq_length-1, batch_size, N)
212
 
213
  Returns:
214
  Updated latent state
@@ -223,7 +222,7 @@ class DynaMix(nn.Module):
223
  context: Context data of shape (seq_length, batch_size, N)
224
 
225
  Returns:
226
- Precomputed CNN features of shape (seq_length-1, batch_size, N)
227
  """
228
  # Process context with convolution
229
  context_for_conv = context.permute(1, 2, 0)
 
208
  z: Latent state of shape (M, batch_size)
209
  context: Context data of shape (seq_length, batch_size, N)
210
  precomputed_cnn: Optional precomputed CNN features to avoid redundant computation for inference
 
211
 
212
  Returns:
213
  Updated latent state
 
222
  context: Context data of shape (seq_length, batch_size, N)
223
 
224
  Returns:
225
+ Precomputed CNN features
226
  """
227
  # Process context with convolution
228
  context_for_conv = context.permute(1, 2, 0)
dynamix/forecaster.py CHANGED
@@ -139,7 +139,7 @@ class DynaMixForecaster:
139
  Efficient batched forecasting with the DynaMix model.
140
 
141
  This method implements a complete forecasting pipeline including:
142
- - Data preprocessing (Box-Cox, detrending, standardization)
143
  - Embedding techniques for dimensionality matching
144
  - DynaMix model prediction
145
  - Data postprocessing (inverse transformations)
@@ -168,7 +168,7 @@ class DynaMixForecaster:
168
  # Create data preprocessor
169
  preprocessor = DataPreprocessor(
170
  standardize=standardize,
171
- box_cox=fit_nonstationary,
172
  detrending=fit_nonstationary,
173
  preprocessing_method=preprocessing_method
174
  )
 
139
  Efficient batched forecasting with the DynaMix model.
140
 
141
  This method implements a complete forecasting pipeline including:
142
+ - Data preprocessing (power transformation, detrending, standardization)
143
  - Embedding techniques for dimensionality matching
144
  - DynaMix model prediction
145
  - Data postprocessing (inverse transformations)
 
168
  # Create data preprocessor
169
  preprocessor = DataPreprocessor(
170
  standardize=standardize,
171
+ power_transform=fit_nonstationary,
172
  detrending=fit_nonstationary,
173
  preprocessing_method=preprocessing_method
174
  )
dynamix/preprocessing.py CHANGED
@@ -1,32 +1,32 @@
1
  import torch
2
  import numpy as np
3
  from .preprocessing_utilities import (TimeSeriesProcessor, Embedding,
4
- BoxCoxTransformer, Detrending, estimate_initial_condition)
5
 
6
 
7
  class DataPreprocessor:
8
  """
9
  Main class for data preprocessing that orchestrates all transformations.
10
  """
11
- def __init__(self, standardize=True, box_cox=False, detrending=False, preprocessing_method="pos_embedding"):
12
  """
13
  Initialize the data preprocessor.
14
 
15
  Args:
16
  standardize: Whether to standardize the data
17
- box_cox: Whether to apply Box-Cox transformation
18
  detrending: Whether to apply exponential detrending
19
  preprocessing_method: Method for embedding ('pos_embedding', 'zero_embedding',
20
  'delay_embedding', 'delay_embedding_random')
21
  """
22
  self.standardize = standardize
23
- self.box_cox = box_cox
24
  self.detrending = detrending
25
  self.preprocessing_method = preprocessing_method
26
 
27
  # Parameters for inverse transformations
28
- self.box_cox_params_list = None
29
  self.detrending_params_list = None
 
30
  self.transformation_mean = None
31
  self.transformation_std = None
32
 
@@ -40,7 +40,7 @@ class DataPreprocessor:
40
 
41
  def _apply_transformations(self, context):
42
  """
43
- Apply Box-Cox transformation and/or detrending to each batch in the context data.
44
 
45
  Args:
46
  context: Context data tensor of shape (seq_length, batch_size, N_data)
@@ -52,21 +52,19 @@ class DataPreprocessor:
52
  self.original_context = context.clone()
53
 
54
  # Before transformations standardize data
55
- if self.box_cox or self.detrending:
56
  self.transformation_mean = torch.mean(context, dim=0)
57
  self.transformation_std = torch.std(context, dim=0)
58
  context = (context - self.transformation_mean.unsqueeze(0)) / self.transformation_std.unsqueeze(0)
59
 
60
- # Apply Box-Cox transformation for each batch
61
- if self.box_cox:
62
  transformed_context = torch.zeros_like(context)
63
- self.box_cox_params_list = []
64
 
65
  for b in range(self.batch_size):
66
  batch_context = context[:, b, :]
67
- transformed, params = BoxCoxTransformer.transform(batch_context)
68
  transformed_context[:, b, :] = transformed
69
- self.box_cox_params_list.append(params)
70
 
71
  context = transformed_context
72
 
@@ -87,7 +85,7 @@ class DataPreprocessor:
87
 
88
  def _apply_transformations_inverse(self, output):
89
  """
90
- Apply inverse Box-Cox and detrending transformations.
91
 
92
  Args:
93
  output: Model output of shape (T, batch_size, N)
@@ -103,11 +101,11 @@ class DataPreprocessor:
103
  batch_output = Detrending.apply_detrending_inverse(batch_context, batch_output, self.detrending_params_list[b])
104
  output[:, b, :] = batch_output
105
 
106
- # Apply inverse Box-Cox transformation for each batch
107
- if self.box_cox and self.box_cox_params_list is not None:
108
  for b in range(self.batch_size):
109
  batch_output = output[:, b, :]
110
- batch_output = BoxCoxTransformer.inverse_transform(batch_output, self.box_cox_params_list[b])
111
  output[:, b, :] = batch_output
112
 
113
  # Apply inverse standardization if transformation was applied
@@ -193,17 +191,17 @@ class DataPreprocessor:
193
  Initial condition for forecasting
194
 
195
  Raises:
196
- ValueError: If initial condition is provided with Box-Cox or detrending enabled
197
  """
198
  if initial_x is None:
199
  # Use last context value for each batch
200
  return context_embedded[-1]
201
 
202
- # Raise error if initial condition is provided with Box-Cox or detrending enabled
203
- if (self.box_cox or self.detrending):
204
  raise ValueError(
205
- "Using initial conditions with Box-Cox or detrending is not supported. "
206
- "Either disable Box-Cox and detrending or do not provide an initial condition."
207
  )
208
 
209
  # Process initial conditions for each batch
@@ -243,7 +241,7 @@ class DataPreprocessor:
243
  self.batch_size = context.shape[1]
244
  self.feature_dim = context.shape[2]
245
 
246
- # Apply transformations (Box-Cox, detrending)
247
  context = self._apply_transformations(context)
248
 
249
  # Standardize data
@@ -270,7 +268,7 @@ class DataPreprocessor:
270
  # Undo standardization
271
  output = self._unstandardize_data(output)
272
 
273
- # Apply inverse transformations (Box-Cox, detrending)
274
  output = self._apply_transformations_inverse(output)
275
 
276
  return output
 
1
  import torch
2
  import numpy as np
3
  from .preprocessing_utilities import (TimeSeriesProcessor, Embedding,
4
+ PowerTransformer, Detrending, estimate_initial_condition)
5
 
6
 
7
  class DataPreprocessor:
8
  """
9
  Main class for data preprocessing that orchestrates all transformations.
10
  """
11
+ def __init__(self, standardize=True, power_transform=False, detrending=False, preprocessing_method="pos_embedding"):
12
  """
13
  Initialize the data preprocessor.
14
 
15
  Args:
16
  standardize: Whether to standardize the data
17
+ power_transform: Whether to apply power transformation
18
  detrending: Whether to apply exponential detrending
19
  preprocessing_method: Method for embedding ('pos_embedding', 'zero_embedding',
20
  'delay_embedding', 'delay_embedding_random')
21
  """
22
  self.standardize = standardize
23
+ self.power_transform = power_transform
24
  self.detrending = detrending
25
  self.preprocessing_method = preprocessing_method
26
 
27
  # Parameters for inverse transformations
 
28
  self.detrending_params_list = None
29
+ self.power_transformer = PowerTransformer()
30
  self.transformation_mean = None
31
  self.transformation_std = None
32
 
 
40
 
41
  def _apply_transformations(self, context):
42
  """
43
+ Apply power transformation and/or detrending to each batch in the context data.
44
 
45
  Args:
46
  context: Context data tensor of shape (seq_length, batch_size, N_data)
 
52
  self.original_context = context.clone()
53
 
54
  # Before transformations standardize data
55
+ if self.power_transform or self.detrending:
56
  self.transformation_mean = torch.mean(context, dim=0)
57
  self.transformation_std = torch.std(context, dim=0)
58
  context = (context - self.transformation_mean.unsqueeze(0)) / self.transformation_std.unsqueeze(0)
59
 
60
+ # Apply power transformation for each batch
61
+ if self.power_transform:
62
  transformed_context = torch.zeros_like(context)
 
63
 
64
  for b in range(self.batch_size):
65
  batch_context = context[:, b, :]
66
+ transformed = self.power_transformer.transform(batch_context)
67
  transformed_context[:, b, :] = transformed
 
68
 
69
  context = transformed_context
70
 
 
85
 
86
  def _apply_transformations_inverse(self, output):
87
  """
88
+ Apply inverse power transformation and detrending transformations.
89
 
90
  Args:
91
  output: Model output of shape (T, batch_size, N)
 
101
  batch_output = Detrending.apply_detrending_inverse(batch_context, batch_output, self.detrending_params_list[b])
102
  output[:, b, :] = batch_output
103
 
104
+ # Apply inverse power transformation for each batch
105
+ if self.power_transform:
106
  for b in range(self.batch_size):
107
  batch_output = output[:, b, :]
108
+ batch_output = self.power_transformer.inverse_transform(batch_output)
109
  output[:, b, :] = batch_output
110
 
111
  # Apply inverse standardization if transformation was applied
 
191
  Initial condition for forecasting
192
 
193
  Raises:
194
+ ValueError: If initial condition is provided with power transformation or detrending enabled
195
  """
196
  if initial_x is None:
197
  # Use last context value for each batch
198
  return context_embedded[-1]
199
 
200
+ # Raise error if initial condition is provided with power transformation or detrending enabled
201
+ if (self.power_transform or self.detrending):
202
  raise ValueError(
203
+ "Using initial conditions with power transformation or detrending is not supported. "
204
+ "Either disable power transformation and detrending or do not provide an initial condition."
205
  )
206
 
207
  # Process initial conditions for each batch
 
241
  self.batch_size = context.shape[1]
242
  self.feature_dim = context.shape[2]
243
 
244
+ # Apply transformations (power transformation, detrending)
245
  context = self._apply_transformations(context)
246
 
247
  # Standardize data
 
268
  # Undo standardization
269
  output = self._unstandardize_data(output)
270
 
271
+ # Apply inverse transformations (power transformation, detrending)
272
  output = self._apply_transformations_inverse(output)
273
 
274
  return output
dynamix/preprocessing_utilities.py CHANGED
@@ -6,6 +6,8 @@ import random
6
  from statsmodels.tsa.stattools import acf
7
  from scipy.ndimage import gaussian_filter1d
8
  from scipy import optimize
 
 
9
 
10
 
11
  class TimeSeriesProcessor:
@@ -274,28 +276,25 @@ class Embedding:
274
  raise ValueError(f"Unsupported embedding method: {method}")
275
 
276
 
277
- class BoxCoxTransformer:
278
  """
279
- Applies Box-Cox transformation to data for variance stabilization.
280
  """
281
- def __init__(self, lambda_range=(-2, 2)):
282
  """
283
- Initialize BoxCoxTransformer.
284
 
285
  Args:
286
  lambda_range: Range for lambda parameter search
287
  """
288
- self.lambda_range = lambda_range
289
- self.params = None
290
 
291
- @staticmethod
292
- def transform(data, lambda_range=(-2, 2)):
293
  """
294
- Apply Box-Cox transformation to data for stabilization
295
 
296
  Args:
297
  data: Input data tensor of shape (seq_length, N)
298
- lambda_range: Range for lambda parameter search
299
 
300
  Returns:
301
  Transformed data and parameters for inverse transformation
@@ -303,53 +302,17 @@ class BoxCoxTransformer:
303
  # Convert to numpy
304
  data_np, is_torch, device, dtype = TimeSeriesProcessor.to_numpy(data)
305
 
306
- seq_length, n_dims = data_np.shape
307
- transformed_data = np.zeros_like(data_np)
308
- box_cox_params = []
309
-
310
- for dim in range(n_dims):
311
- # Add constant to ensure positivity
312
- if np.min(data_np[:, dim]) <= 0:
313
- offset = abs(np.min(data_np[:, dim])) + 1.2
314
- data_shifted = data_np[:, dim] + offset
315
- else:
316
- offset = 1.2
317
- data_shifted = data_np[:, dim] + offset
318
-
319
- try:
320
- # Find optimal lambda for Box-Cox transformation
321
- transformed, lambda_param = stats.boxcox(data_shifted)
322
-
323
- # Limit lambda to a reasonable range to prevent numerical issues
324
- lambda_param = max(min(lambda_param, 2.0), -2.0)
325
-
326
- # Recalculate transformation with bounded lambda for consistency
327
- if abs(lambda_param) < 1e-8:
328
- # For lambda near zero, use logarithmic transformation
329
- transformed = np.log(data_shifted)
330
- else:
331
- transformed = (data_shifted ** lambda_param - 1) / lambda_param
332
-
333
- # Store transformed data and parameters
334
- transformed_data[:, dim] = transformed
335
- except:
336
- # If transformation fails, just use the original data
337
- transformed_data[:, dim] = data_np[:, dim]
338
- lambda_param = 1.0 # Identity transform
339
-
340
- box_cox_params.append((lambda_param, offset))
341
 
342
  # Convert back to torch if needed
343
- return TimeSeriesProcessor.to_torch(transformed_data, is_torch, device, dtype), box_cox_params
344
 
345
- @staticmethod
346
- def inverse_transform(data, box_cox_params):
347
  """
348
- Apply inverse Box-Cox transformation
349
 
350
  Args:
351
  data: Transformed data tensor
352
- box_cox_params: Parameters from Box-Cox transformation
353
 
354
  Returns:
355
  Original scale data
@@ -357,30 +320,7 @@ class BoxCoxTransformer:
357
  # Convert to numpy for computation
358
  data_np, is_torch, device, dtype = TimeSeriesProcessor.to_numpy(data)
359
 
360
- seq_length, n_dims = data_np.shape
361
- inverse_data = np.zeros_like(data_np)
362
-
363
- for dim in range(min(n_dims, len(box_cox_params))):
364
- lambda_param, offset = box_cox_params[dim]
365
-
366
- # Apply inverse transformation
367
- if abs(lambda_param) < 1e-8:
368
- # For lambda near zero, the transformation is logarithmic
369
- inverse_data[:, dim] = np.exp(data_np[:, dim]) - offset
370
- elif abs(lambda_param - 1.0) < 1e-8:
371
- # For lambda=1 (identity transform), just subtract offset
372
- inverse_data[:, dim] = data_np[:, dim] - offset
373
- else:
374
- # For other lambda values
375
- base = lambda_param * data_np[:, dim] + 1
376
-
377
- # Simple clipping approach to ensure base is positive
378
- # This avoids complex numbers while preserving most data characteristics
379
- base = np.maximum(base, 1e-10)
380
-
381
- # Apply power transformation
382
- result = base ** (1/lambda_param)
383
- inverse_data[:, dim] = result - offset
384
 
385
  # Convert back to torch if needed
386
  return TimeSeriesProcessor.to_torch(inverse_data, is_torch, device, dtype)
@@ -447,7 +387,7 @@ class Detrending:
447
  initial_params = [0.0, 1.0, data_np[0,dim]]
448
 
449
  # Bounds for parameters
450
- bounds = [(None, None), (0.0, 3.0), (None, None)]
451
 
452
  # Optimize
453
  result = optimize.minimize(
@@ -462,7 +402,7 @@ class Detrending:
462
  'maxcor': 10
463
  }
464
  )
465
- optimal_params = np.round(result.x, 3)
466
 
467
  # Calculate trend and detrend the data
468
  t = np.arange(1, seq_length + 1)
 
6
  from statsmodels.tsa.stattools import acf
7
  from scipy.ndimage import gaussian_filter1d
8
  from scipy import optimize
9
+ from scipy.optimize import curve_fit
10
+ import sklearn
11
 
12
 
13
  class TimeSeriesProcessor:
 
276
  raise ValueError(f"Unsupported embedding method: {method}")
277
 
278
 
279
+ class PowerTransformer:
280
  """
281
+ Applies power transformation to data.
282
  """
283
+ def __init__(self):
284
  """
285
+ Initialize PowerTransformer.
286
 
287
  Args:
288
  lambda_range: Range for lambda parameter search
289
  """
290
+ self.power_transformer = sklearn.preprocessing.PowerTransformer(method='yeo-johnson', standardize=False)
 
291
 
292
+ def transform(self, data):
 
293
  """
294
+ Apply power transformation to data for stabilization
295
 
296
  Args:
297
  data: Input data tensor of shape (seq_length, N)
 
298
 
299
  Returns:
300
  Transformed data and parameters for inverse transformation
 
302
  # Convert to numpy
303
  data_np, is_torch, device, dtype = TimeSeriesProcessor.to_numpy(data)
304
 
305
+ transformed_data = self.power_transformer.fit_transform(data_np)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
  # Convert back to torch if needed
308
+ return TimeSeriesProcessor.to_torch(transformed_data, is_torch, device, dtype)
309
 
310
+ def inverse_transform(self, data):
 
311
  """
312
+ Apply inverse power transformation
313
 
314
  Args:
315
  data: Transformed data tensor
 
316
 
317
  Returns:
318
  Original scale data
 
320
  # Convert to numpy for computation
321
  data_np, is_torch, device, dtype = TimeSeriesProcessor.to_numpy(data)
322
 
323
+ inverse_data = self.power_transformer.inverse_transform(data_np)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
 
325
  # Convert back to torch if needed
326
  return TimeSeriesProcessor.to_torch(inverse_data, is_torch, device, dtype)
 
387
  initial_params = [0.0, 1.0, data_np[0,dim]]
388
 
389
  # Bounds for parameters
390
+ bounds = [(None, None), (None, None), (None, None)]
391
 
392
  # Optimize
393
  result = optimize.minimize(
 
402
  'maxcor': 10
403
  }
404
  )
405
+ optimal_params = np.round(result.x, 10)
406
 
407
  # Calculate trend and detrend the data
408
  t = np.arange(1, seq_length + 1)