File size: 4,915 Bytes
c9b7b21 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
"""Fixed length transformer, pad or truncate panel to fixed length."""
import numpy as np
import pandas as pd
from sktime.transformations.base import BaseTransformer
from sktime.utils.pandas import df_map
__all__ = ["FixedLengthTransformer"]
__author__ = ["user"]
class FixedLengthTransformer(BaseTransformer):
"""Transform panel of variable length time series to fixed length.
Transforms input dataset to a fixed length by either:
- Padding shorter series with a fill value (default: 0)
- Truncating longer series to the specified length
Unlike PaddingTransformer, this transformer requires a fixed_length parameter
and will both pad and truncate as needed.
Parameters
----------
fixed_length : int
The exact length that all series will be transformed to
fill_value : any, optional (default=0)
The value used to pad shorter series
Example
-------
>>> import pandas as pd
>>> from sktime.transformations.panel.fixed_length import FixedLengthTransformer
>>>
>>> # Create a sample nested DataFrame with unequal length time series
>>> data = {
... 'feature1': [
... pd.Series([1, 2, 3]), pd.Series([4, 5]), pd.Series([6, 7, 8, 9])
... ],
... 'feature2': [
... pd.Series([10, 11]), pd.Series([12, 13, 14]), pd.Series([15])
... ]
... }
>>> X = pd.DataFrame(data)
>>>
>>> # Initialize the FixedLengthTransformer with fixed_length=3
>>> transformer = FixedLengthTransformer(fixed_length=3)
>>>
>>> # Fit the transformer to the data
>>> transformer.fit(X)
>>>
>>> # Transform the data
>>> Xt = transformer.transform(X)
>>>
>>> # Display the transformed data
>>> print(Xt)
"""
_tags = {
"authors": ["user"],
"maintainers": ["user"],
"scitype:transform-input": "Series",
"scitype:transform-output": "Series",
"scitype:instancewise": False,
"X_inner_mtype": "nested_univ",
"y_inner_mtype": "None",
"fit_is_empty": True, # No need to compute anything during fit
"capability:unequal_length:removes": True,
}
def __init__(self, fixed_length, fill_value=0):
if fixed_length is None or fixed_length <= 0:
raise ValueError("fixed_length must be a positive integer")
self.fixed_length = fixed_length
self.fill_value = fill_value
super().__init__()
def _fit(self, X, y=None):
"""Fit transformer to X and y.
This is a no-op since we only need the fixed_length parameter.
Parameters
----------
X : nested pandas DataFrame of shape [n_instances, n_features]
each cell of X must contain pandas.Series
y : ignored argument for interface compatibility
Returns
-------
self : reference to self
"""
return self
def _transform_series(self, series):
"""Transform a single series to fixed length by padding or truncating.
Parameters
----------
series : pandas.Series
The input series to transform
Returns
-------
numpy.ndarray
Fixed length array
"""
series_length = len(series)
if series_length == self.fixed_length:
# Series is already the correct length
return series.values
elif series_length < self.fixed_length:
# Pad the series with fill_value
result = np.full(self.fixed_length, self.fill_value, dtype=float)
result[:series_length] = series.iloc[:series_length]
return result
else:
# Truncate the series
return series.iloc[:self.fixed_length].values
def _transform(self, X, y=None):
"""Transform X and return a transformed version.
Parameters
----------
X : nested pandas DataFrame of shape [n_instances, n_features]
each cell of X must contain pandas.Series
y : ignored argument for interface compatibility
Returns
-------
Xt : nested pandas DataFrame of shape [n_instances, n_features]
each cell of Xt contains pandas.Series with fixed length
"""
n_instances, _ = X.shape
# Process each row of instances
transformed_rows = []
for i in range(n_instances):
# Transform each series in the row
row_series = X.iloc[i, :].values
transformed_series = [pd.Series(self._transform_series(series))
for series in row_series]
transformed_rows.append(pd.Series(transformed_series))
# Convert back to DataFrame
Xt = df_map(pd.DataFrame(transformed_rows))(pd.Series)
Xt.columns = X.columns
return Xt |