Spaces:
Sleeping
Sleeping
Upload date_features.py
Browse files- date_features.py +39 -0
date_features.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
# Define the getDateFeatures() function
|
| 4 |
+
def getDateFeatures(df):
|
| 5 |
+
df['holiday_type'] = 'Workday'
|
| 6 |
+
df['is_holiday'] = False
|
| 7 |
+
|
| 8 |
+
df['year'] = df['date'].dt.year
|
| 9 |
+
df['month'] = df['date'].dt.month
|
| 10 |
+
df['dayofmonth'] = df['date'].dt.day
|
| 11 |
+
df['dayofweek'] = df['date'].dt.dayofweek
|
| 12 |
+
df['weekofyear'] = df['date'].dt.weekofyear
|
| 13 |
+
|
| 14 |
+
df['quarter'] = df['date'].dt.quarter
|
| 15 |
+
df['is_month_start'] = df['date'].dt.is_month_start.astype(int)
|
| 16 |
+
df['is_month_end'] = df['date'].dt.is_month_end.astype(int)
|
| 17 |
+
df['is_quarter_start'] = df['date'].dt.is_quarter_start.astype(int)
|
| 18 |
+
|
| 19 |
+
df['is_quarter_end'] = df['date'].dt.is_quarter_end.astype(int)
|
| 20 |
+
df['is_year_start'] = df['date'].dt.is_year_start.astype(int)
|
| 21 |
+
df['is_year_end'] = df['date'].dt.is_year_end.astype(int)
|
| 22 |
+
# Extract the 'year' and 'weekofyear' components from the 'date' column
|
| 23 |
+
df['year_weekofyear'] = df['date'].dt.year * 100 + df['date'].dt.weekofyear
|
| 24 |
+
|
| 25 |
+
# create new coolumns to represent the cyclic nature of a year
|
| 26 |
+
df['dayofyear'] = df['date'].dt.dayofyear
|
| 27 |
+
df["sin(dayofyear)"] = np.sin(df["dayofyear"])
|
| 28 |
+
df["cos(dayofyear)"] = np.cos(df["dayofyear"])
|
| 29 |
+
|
| 30 |
+
df["is_weekend"] = np.where(df['dayofweek'] > 4, 1, 0)
|
| 31 |
+
|
| 32 |
+
# Define the criteria for each season
|
| 33 |
+
seasons = {'Winter': [12, 1, 2], 'Spring': [3, 4, 5], 'Summer': [6, 7, 8], 'Autumn': [9, 10, 11]}
|
| 34 |
+
|
| 35 |
+
# Create the 'season' column based on the 'date' column
|
| 36 |
+
df['season'] = df['month'].map({month: season for season, months in seasons.items() for month in months})
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
return df
|