File size: 4,915 Bytes
c9b7b21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
"""Fixed length transformer, pad or truncate panel to fixed length."""

import numpy as np
import pandas as pd

from sktime.transformations.base import BaseTransformer
from sktime.utils.pandas import df_map

__all__ = ["FixedLengthTransformer"]
__author__ = ["user"]


class FixedLengthTransformer(BaseTransformer):
    """Transform panel of variable length time series to fixed length.

    Transforms input dataset to a fixed length by either:
    - Padding shorter series with a fill value (default: 0)
    - Truncating longer series to the specified length

    Unlike PaddingTransformer, this transformer requires a fixed_length parameter
    and will both pad and truncate as needed.

    Parameters
    ----------
    fixed_length : int
        The exact length that all series will be transformed to
    fill_value : any, optional (default=0)
        The value used to pad shorter series

    Example
    -------
    >>> import pandas as pd
    >>> from sktime.transformations.panel.fixed_length import FixedLengthTransformer
    >>>
    >>> # Create a sample nested DataFrame with unequal length time series
    >>> data = {
    ...     'feature1': [
    ...         pd.Series([1, 2, 3]), pd.Series([4, 5]), pd.Series([6, 7, 8, 9])
    ...     ],
    ...     'feature2': [
    ...         pd.Series([10, 11]), pd.Series([12, 13, 14]), pd.Series([15])
    ...     ]
    ... }
    >>> X = pd.DataFrame(data)
    >>>
    >>> # Initialize the FixedLengthTransformer with fixed_length=3
    >>> transformer = FixedLengthTransformer(fixed_length=3)
    >>>
    >>> # Fit the transformer to the data
    >>> transformer.fit(X)
    >>>
    >>> # Transform the data
    >>> Xt = transformer.transform(X)
    >>>
    >>> # Display the transformed data
    >>> print(Xt)
    """

    _tags = {
        "authors": ["user"],
        "maintainers": ["user"],
        "scitype:transform-input": "Series",
        "scitype:transform-output": "Series",
        "scitype:instancewise": False,
        "X_inner_mtype": "nested_univ",
        "y_inner_mtype": "None",
        "fit_is_empty": True,  # No need to compute anything during fit
        "capability:unequal_length:removes": True,
    }

    def __init__(self, fixed_length, fill_value=0):
        if fixed_length is None or fixed_length <= 0:
            raise ValueError("fixed_length must be a positive integer")
            
        self.fixed_length = fixed_length
        self.fill_value = fill_value
        super().__init__()

    def _fit(self, X, y=None):
        """Fit transformer to X and y.

        This is a no-op since we only need the fixed_length parameter.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, n_features]
            each cell of X must contain pandas.Series
        y : ignored argument for interface compatibility

        Returns
        -------
        self : reference to self
        """
        return self

    def _transform_series(self, series):
        """Transform a single series to fixed length by padding or truncating.

        Parameters
        ----------
        series : pandas.Series
            The input series to transform

        Returns
        -------
        numpy.ndarray
            Fixed length array
        """
        series_length = len(series)
        
        if series_length == self.fixed_length:
            # Series is already the correct length
            return series.values
        elif series_length < self.fixed_length:
            # Pad the series with fill_value
            result = np.full(self.fixed_length, self.fill_value, dtype=float)
            result[:series_length] = series.iloc[:series_length]
            return result
        else:
            # Truncate the series
            return series.iloc[:self.fixed_length].values

    def _transform(self, X, y=None):
        """Transform X and return a transformed version.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, n_features]
            each cell of X must contain pandas.Series
        y : ignored argument for interface compatibility

        Returns
        -------
        Xt : nested pandas DataFrame of shape [n_instances, n_features]
            each cell of Xt contains pandas.Series with fixed length
        """
        n_instances, _ = X.shape

        # Process each row of instances
        transformed_rows = []
        for i in range(n_instances):
            # Transform each series in the row
            row_series = X.iloc[i, :].values
            transformed_series = [pd.Series(self._transform_series(series)) 
                                 for series in row_series]
            transformed_rows.append(pd.Series(transformed_series))
        
        # Convert back to DataFrame
        Xt = df_map(pd.DataFrame(transformed_rows))(pd.Series)
        Xt.columns = X.columns

        return Xt