from typing import List import numpy as np import pandas as pd from pandas.tseries import offsets from pandas.tseries.frequencies import to_offset class TimeFeature: def __init__(self): pass def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: pass def __repr__(self): return self.__class__.__name__ + "()" class SecondOfMinute(TimeFeature): """Minute of hour encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return index.second / 59.0 - 0.5 class MinuteOfHour(TimeFeature): """Minute of hour encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return index.minute / 59.0 - 0.5 class HourOfDay(TimeFeature): """Hour of day encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return index.hour / 23.0 - 0.5 class DayOfWeek(TimeFeature): """Hour of day encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return index.dayofweek / 6.0 - 0.5 class DayOfMonth(TimeFeature): """Day of month encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return (index.day - 1) / 30.0 - 0.5 class DayOfYear(TimeFeature): """Day of year encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return (index.dayofyear - 1) / 365.0 - 0.5 class MonthOfYear(TimeFeature): """Month of year encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return (index.month - 1) / 11.0 - 0.5 class WeekOfYear(TimeFeature): """Week of year encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return (index.week - 1) / 52.0 - 0.5 def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: """ Returns a list of time features that will be appropriate for the given frequency string. Parameters ---------- freq_str Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. """ features_by_offsets = { offsets.YearEnd: [], offsets.QuarterEnd: [MonthOfYear], offsets.MonthEnd: [MonthOfYear], offsets.Week: [DayOfMonth, WeekOfYear], offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear], offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear], offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], offsets.Minute: [ MinuteOfHour, HourOfDay, DayOfWeek, DayOfMonth, DayOfYear, ], offsets.Second: [ SecondOfMinute, MinuteOfHour, HourOfDay, DayOfWeek, DayOfMonth, DayOfYear, ], } offset = to_offset(freq_str) for offset_type, feature_classes in features_by_offsets.items(): if isinstance(offset, offset_type): return [cls() for cls in feature_classes] supported_freq_msg = f""" Unsupported frequency {freq_str} The following frequencies are supported: Y - yearly alias: A M - monthly W - weekly D - daily B - business days H - hourly T - minutely alias: min S - secondly """ raise RuntimeError(supported_freq_msg) def time_features(dates, timeenc=1, freq="h"): """ > `time_features` takes in a `dates` dataframe with a 'dates' column and extracts the date down to `freq` where freq can be any of the following if `timeenc` is 0: > * m - [month] > * w - [month] > * d - [month, day, weekday] > * b - [month, day, weekday] > * h - [month, day, weekday, hour] > * t - [month, day, weekday, hour, *minute] > > If `timeenc` is 1, a similar, but different list of `freq` values are supported (all encoded between [-0.5 and 0.5]): > * Q - [month] > * M - [month] > * W - [Day of month, week of year] > * D - [Day of week, day of month, day of year] > * B - [Day of week, day of month, day of year] > * H - [Hour of day, day of week, day of month, day of year] > * T - [Minute of hour*, hour of day, day of week, day of month, day of year] > * S - [Second of minute, minute of hour, hour of day, day of week, day of month, day of year] *minute returns a number from 0-3 corresponding to the 15 minute period it falls into. """ if timeenc == 0: dates["month"] = dates.date.apply(lambda row: row.month, 1) dates["day"] = dates.date.apply(lambda row: row.day, 1) dates["weekday"] = dates.date.apply(lambda row: row.weekday, 1) dates["hour"] = dates.date.apply(lambda row: row.hour, 1) dates["minute"] = dates.date.apply(lambda row: row.minute, 1) dates["minute"] = dates.minute.map(lambda x: x // 15) freq_map = { "y": [], "m": ["month"], "w": ["month"], "d": ["month", "day", "weekday"], "b": ["month", "day", "weekday"], "h": ["month", "day", "weekday", "hour"], "t": ["month", "day", "weekday", "hour", "minute"], } return dates[freq_map[freq.lower()]].values elif timeenc == 1: dates = pd.to_datetime(dates.date.values) return np.vstack( [feat(dates) for feat in time_features_from_frequency_str(freq)] ).transpose(1, 0) elif timeenc == 2: # Time2Vec dates = pd.to_datetime(dates.date.values) return np.vstack( [feat(dates) for feat in time_features_from_frequency_str(freq)] ).transpose(1, 0)