|
|
"""Fixed length transformer, pad or truncate panel to fixed length.""" |
|
|
|
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
|
|
|
from sktime.transformations.base import BaseTransformer |
|
|
from sktime.utils.pandas import df_map |
|
|
|
|
|
__all__ = ["FixedLengthTransformer"] |
|
|
__author__ = ["user"] |
|
|
|
|
|
|
|
|
class FixedLengthTransformer(BaseTransformer): |
|
|
"""Transform panel of variable length time series to fixed length. |
|
|
|
|
|
Transforms input dataset to a fixed length by either: |
|
|
- Padding shorter series with a fill value (default: 0) |
|
|
- Truncating longer series to the specified length |
|
|
|
|
|
Unlike PaddingTransformer, this transformer requires a fixed_length parameter |
|
|
and will both pad and truncate as needed. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
fixed_length : int |
|
|
The exact length that all series will be transformed to |
|
|
fill_value : any, optional (default=0) |
|
|
The value used to pad shorter series |
|
|
|
|
|
Example |
|
|
------- |
|
|
>>> import pandas as pd |
|
|
>>> from sktime.transformations.panel.fixed_length import FixedLengthTransformer |
|
|
>>> |
|
|
>>> # Create a sample nested DataFrame with unequal length time series |
|
|
>>> data = { |
|
|
... 'feature1': [ |
|
|
... pd.Series([1, 2, 3]), pd.Series([4, 5]), pd.Series([6, 7, 8, 9]) |
|
|
... ], |
|
|
... 'feature2': [ |
|
|
... pd.Series([10, 11]), pd.Series([12, 13, 14]), pd.Series([15]) |
|
|
... ] |
|
|
... } |
|
|
>>> X = pd.DataFrame(data) |
|
|
>>> |
|
|
>>> # Initialize the FixedLengthTransformer with fixed_length=3 |
|
|
>>> transformer = FixedLengthTransformer(fixed_length=3) |
|
|
>>> |
|
|
>>> # Fit the transformer to the data |
|
|
>>> transformer.fit(X) |
|
|
>>> |
|
|
>>> # Transform the data |
|
|
>>> Xt = transformer.transform(X) |
|
|
>>> |
|
|
>>> # Display the transformed data |
|
|
>>> print(Xt) |
|
|
""" |
|
|
|
|
|
_tags = { |
|
|
"authors": ["user"], |
|
|
"maintainers": ["user"], |
|
|
"scitype:transform-input": "Series", |
|
|
"scitype:transform-output": "Series", |
|
|
"scitype:instancewise": False, |
|
|
"X_inner_mtype": "nested_univ", |
|
|
"y_inner_mtype": "None", |
|
|
"fit_is_empty": True, |
|
|
"capability:unequal_length:removes": True, |
|
|
} |
|
|
|
|
|
def __init__(self, fixed_length, fill_value=0): |
|
|
if fixed_length is None or fixed_length <= 0: |
|
|
raise ValueError("fixed_length must be a positive integer") |
|
|
|
|
|
self.fixed_length = fixed_length |
|
|
self.fill_value = fill_value |
|
|
super().__init__() |
|
|
|
|
|
def _fit(self, X, y=None): |
|
|
"""Fit transformer to X and y. |
|
|
|
|
|
This is a no-op since we only need the fixed_length parameter. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
X : nested pandas DataFrame of shape [n_instances, n_features] |
|
|
each cell of X must contain pandas.Series |
|
|
y : ignored argument for interface compatibility |
|
|
|
|
|
Returns |
|
|
------- |
|
|
self : reference to self |
|
|
""" |
|
|
return self |
|
|
|
|
|
def _transform_series(self, series): |
|
|
"""Transform a single series to fixed length by padding or truncating. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
series : pandas.Series |
|
|
The input series to transform |
|
|
|
|
|
Returns |
|
|
------- |
|
|
numpy.ndarray |
|
|
Fixed length array |
|
|
""" |
|
|
series_length = len(series) |
|
|
|
|
|
if series_length == self.fixed_length: |
|
|
|
|
|
return series.values |
|
|
elif series_length < self.fixed_length: |
|
|
|
|
|
result = np.full(self.fixed_length, self.fill_value, dtype=float) |
|
|
result[:series_length] = series.iloc[:series_length] |
|
|
return result |
|
|
else: |
|
|
|
|
|
return series.iloc[:self.fixed_length].values |
|
|
|
|
|
def _transform(self, X, y=None): |
|
|
"""Transform X and return a transformed version. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
X : nested pandas DataFrame of shape [n_instances, n_features] |
|
|
each cell of X must contain pandas.Series |
|
|
y : ignored argument for interface compatibility |
|
|
|
|
|
Returns |
|
|
------- |
|
|
Xt : nested pandas DataFrame of shape [n_instances, n_features] |
|
|
each cell of Xt contains pandas.Series with fixed length |
|
|
""" |
|
|
n_instances, _ = X.shape |
|
|
|
|
|
|
|
|
transformed_rows = [] |
|
|
for i in range(n_instances): |
|
|
|
|
|
row_series = X.iloc[i, :].values |
|
|
transformed_series = [pd.Series(self._transform_series(series)) |
|
|
for series in row_series] |
|
|
transformed_rows.append(pd.Series(transformed_series)) |
|
|
|
|
|
|
|
|
Xt = df_map(pd.DataFrame(transformed_rows))(pd.Series) |
|
|
Xt.columns = X.columns |
|
|
|
|
|
return Xt |