chart-pattern-locator / utils /FixedLengthTransformer.py
yasirapunsith's picture
add files
c9b7b21
"""Fixed length transformer, pad or truncate panel to fixed length."""
import numpy as np
import pandas as pd
from sktime.transformations.base import BaseTransformer
from sktime.utils.pandas import df_map
__all__ = ["FixedLengthTransformer"]
__author__ = ["user"]
class FixedLengthTransformer(BaseTransformer):
"""Transform panel of variable length time series to fixed length.
Transforms input dataset to a fixed length by either:
- Padding shorter series with a fill value (default: 0)
- Truncating longer series to the specified length
Unlike PaddingTransformer, this transformer requires a fixed_length parameter
and will both pad and truncate as needed.
Parameters
----------
fixed_length : int
The exact length that all series will be transformed to
fill_value : any, optional (default=0)
The value used to pad shorter series
Example
-------
>>> import pandas as pd
>>> from sktime.transformations.panel.fixed_length import FixedLengthTransformer
>>>
>>> # Create a sample nested DataFrame with unequal length time series
>>> data = {
... 'feature1': [
... pd.Series([1, 2, 3]), pd.Series([4, 5]), pd.Series([6, 7, 8, 9])
... ],
... 'feature2': [
... pd.Series([10, 11]), pd.Series([12, 13, 14]), pd.Series([15])
... ]
... }
>>> X = pd.DataFrame(data)
>>>
>>> # Initialize the FixedLengthTransformer with fixed_length=3
>>> transformer = FixedLengthTransformer(fixed_length=3)
>>>
>>> # Fit the transformer to the data
>>> transformer.fit(X)
>>>
>>> # Transform the data
>>> Xt = transformer.transform(X)
>>>
>>> # Display the transformed data
>>> print(Xt)
"""
_tags = {
"authors": ["user"],
"maintainers": ["user"],
"scitype:transform-input": "Series",
"scitype:transform-output": "Series",
"scitype:instancewise": False,
"X_inner_mtype": "nested_univ",
"y_inner_mtype": "None",
"fit_is_empty": True, # No need to compute anything during fit
"capability:unequal_length:removes": True,
}
def __init__(self, fixed_length, fill_value=0):
if fixed_length is None or fixed_length <= 0:
raise ValueError("fixed_length must be a positive integer")
self.fixed_length = fixed_length
self.fill_value = fill_value
super().__init__()
def _fit(self, X, y=None):
"""Fit transformer to X and y.
This is a no-op since we only need the fixed_length parameter.
Parameters
----------
X : nested pandas DataFrame of shape [n_instances, n_features]
each cell of X must contain pandas.Series
y : ignored argument for interface compatibility
Returns
-------
self : reference to self
"""
return self
def _transform_series(self, series):
"""Transform a single series to fixed length by padding or truncating.
Parameters
----------
series : pandas.Series
The input series to transform
Returns
-------
numpy.ndarray
Fixed length array
"""
series_length = len(series)
if series_length == self.fixed_length:
# Series is already the correct length
return series.values
elif series_length < self.fixed_length:
# Pad the series with fill_value
result = np.full(self.fixed_length, self.fill_value, dtype=float)
result[:series_length] = series.iloc[:series_length]
return result
else:
# Truncate the series
return series.iloc[:self.fixed_length].values
def _transform(self, X, y=None):
"""Transform X and return a transformed version.
Parameters
----------
X : nested pandas DataFrame of shape [n_instances, n_features]
each cell of X must contain pandas.Series
y : ignored argument for interface compatibility
Returns
-------
Xt : nested pandas DataFrame of shape [n_instances, n_features]
each cell of Xt contains pandas.Series with fixed length
"""
n_instances, _ = X.shape
# Process each row of instances
transformed_rows = []
for i in range(n_instances):
# Transform each series in the row
row_series = X.iloc[i, :].values
transformed_series = [pd.Series(self._transform_series(series))
for series in row_series]
transformed_rows.append(pd.Series(transformed_series))
# Convert back to DataFrame
Xt = df_map(pd.DataFrame(transformed_rows))(pd.Series)
Xt.columns = X.columns
return Xt