predict-power / calendar_features.py
jeffliulab's picture
Initial deploy: live ISO-NE demand fetch + baseline inference
308474b verified
"""
Build the 44-d calendar one-hot used by the demand-forecasting model.
Layout (matches training/data_preparation/dataset.py):
hour-of-day one-hot (24)
+ day-of-week one-hot (7)
+ month one-hot (12)
+ US holiday flag (1)
"""
from __future__ import annotations
from datetime import datetime, timedelta
from typing import Iterable
import numpy as np
CAL_DIM = 44
# US federal holidays for 2022-2026 (date-only, year-agnostic match below).
# Encoded as (month, day) tuples for fixed-date holidays plus a small set
# of moving holidays we hardcode by date.
_FIXED_HOLIDAYS_MD = {
(1, 1), # New Year's Day
(7, 4), # Independence Day
(11, 11), # Veterans Day
(12, 25), # Christmas
(6, 19), # Juneteenth
}
_MOVING_HOLIDAYS = {
# MLK Day (3rd Mon Jan), Presidents' Day (3rd Mon Feb),
# Memorial Day (last Mon May), Labor Day (1st Mon Sep),
# Columbus (2nd Mon Oct), Thanksgiving (4th Thu Nov)
# Pre-computed for 2022-2026.
(2022, 1, 17), (2022, 2, 21), (2022, 5, 30), (2022, 9, 5),
(2022, 10, 10), (2022, 11, 24),
(2023, 1, 16), (2023, 2, 20), (2023, 5, 29), (2023, 9, 4),
(2023, 10, 9), (2023, 11, 23),
(2024, 1, 15), (2024, 2, 19), (2024, 5, 27), (2024, 9, 2),
(2024, 10, 14), (2024, 11, 28),
(2025, 1, 20), (2025, 2, 17), (2025, 5, 26), (2025, 9, 1),
(2025, 10, 13), (2025, 11, 27),
(2026, 1, 19), (2026, 2, 16), (2026, 5, 25), (2026, 9, 7),
(2026, 10, 12), (2026, 11, 26),
}
def _is_holiday(dt: datetime) -> bool:
if (dt.month, dt.day) in _FIXED_HOLIDAYS_MD:
return True
if (dt.year, dt.month, dt.day) in _MOVING_HOLIDAYS:
return True
return False
def encode_one(dt: datetime) -> np.ndarray:
"""Single (44,) calendar vector for the given timestamp."""
v = np.zeros(CAL_DIM, dtype=np.float32)
v[dt.hour] = 1.0 # 0..23
v[24 + dt.weekday()] = 1.0 # 24..30 (Mon=0)
v[31 + dt.month - 1] = 1.0 # 31..42
v[43] = 1.0 if _is_holiday(dt) else 0.0
return v
def encode_range(start_dt: datetime, n_hours: int) -> np.ndarray:
"""Stack n_hours calendar vectors starting at start_dt (inclusive)."""
return np.stack([encode_one(start_dt + timedelta(hours=i))
for i in range(n_hours)], axis=0)
if __name__ == "__main__":
now = datetime(2022, 12, 25, 12)
v = encode_one(now)
print(f"Christmas noon 2022: hour={v[:24].argmax()}, "
f"dow={v[24:31].argmax()}, month={v[31:43].argmax()+1}, "
f"holiday={v[43]:.0f}")