decode-iblend-code / scripts /decode_reimplementation.py

Upload scripts/decode_reimplementation.py with huggingface_hub

c06d939 verified 4 days ago

38.3 kB

	#!/usr/bin/env python3
	"""Re-implement the DECODE paper training pipeline for local I-BLEND data.

	The paper pipeline is:
	1. Fuse energy, occupancy, calendar, and weather-like environmental features.
	2. Align everything to a 10-minute sampling rate.
	3. Normalize features with Min-Max scaling.
	4. Split chronologically into train/validation/test with a 70:15:15 ratio.
	5. Compare LSTM with Linear Regression, Decision Tree, and Random Forest.

	This local implementation supports two targets:
	- paper_buildings: 7 building-level series matching the paper.
	- meters: 9 meter-level series from all_buildings_power.csv.

	Weather is optional because the local weather file in this workspace starts in
	2018, while the energy data ends in 2017. The script detects this and continues.
	"""

	from __future__ import annotations

	import argparse
	import os
	import json
	import math
	import sys
	import warnings
	from dataclasses import dataclass
	from pathlib import Path


	ROOT = Path(__file__).resolve().parents[1]
	DEFAULT_DATA_MINING_ROOT = ROOT if (ROOT / "IIITD_occupancy_dataset").exists() else ROOT.parent
	DATA_MINING_ROOT = Path(os.environ.get("IBLEND_DATA_ROOT", DEFAULT_DATA_MINING_ROOT))
	ENERGY_FILE = Path(os.environ.get("IBLEND_ENERGY_FILE", DATA_MINING_ROOT / "energy_dataset" / "all_buildings_power.csv"))
	OCCUPANCY_DIR = DATA_MINING_ROOT / "IIITD_occupancy_dataset" / "IIITD_occupancy_dataset"
	CALENDAR_DIR = DATA_MINING_ROOT / "iiitd_calender_schedule" / "iiitd_calender_schedule"
	WEATHER_FILE = DATA_MINING_ROOT / "weather_comparison" / "weather_comparison" / "IIITD_and_airport_data.csv"
	OUT_DIR = ROOT / "decode_reimplementation_outputs"
	TZ = "Asia/Kolkata"


	PAPER_BUILDINGS = {
	"Academic": {"meters": ["Academic"], "occupancy": "ACB"},
	"Boys_hostel": {"meters": ["Boys_main", "Boys_backup"], "occupancy": "BH"},
	"Girls_hostel": {"meters": ["Girls_main", "Girls_backup"], "occupancy": "GH"},
	"Library": {"meters": ["Library"], "occupancy": "LB"},
	"Lecture": {"meters": ["Lecture"], "occupancy": "LCB"},
	"Dining": {"meters": ["Mess"], "occupancy": "DB"},
	"Facilities": {"meters": ["Facilities"], "occupancy": "SRB"},
	}

	METER_TARGETS = {
	"Academic": {"meters": ["Academic"], "occupancy": "ACB"},
	"Boys_main": {"meters": ["Boys_main"], "occupancy": "BH"},
	"Boys_backup": {"meters": ["Boys_backup"], "occupancy": "BH"},
	"Facilities": {"meters": ["Facilities"], "occupancy": "SRB"},
	"Girls_main": {"meters": ["Girls_main"], "occupancy": "GH"},
	"Girls_backup": {"meters": ["Girls_backup"], "occupancy": "GH"},
	"Lecture": {"meters": ["Lecture"], "occupancy": "LCB"},
	"Library": {"meters": ["Library"], "occupancy": "LB"},
	"Mess": {"meters": ["Mess"], "occupancy": "DB"},
	}


	@dataclass
	class SplitData:
	x_train: object
	x_val: object
	x_test: object
	y_train: object
	y_val: object
	y_test: object
	feature_names: list[str]
	test_span_steps: int \| None = None


	def import_stack():
	try:
	import numpy as np
	import pandas as pd
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.linear_model import Ridge
	from sklearn.metrics import mean_absolute_error, r2_score
	from sklearn.preprocessing import MinMaxScaler
	from sklearn.tree import DecisionTreeRegressor
	except ImportError as exc:
	missing = str(exc).split("No module named ")[-1].strip("'")
	raise SystemExit(
	f"Missing dependency: {missing}\n"
	"Install the base training stack with:\n"
	f" {sys.executable} -m pip install pandas numpy scikit-learn\n"
	"Install LSTM support with either PyTorch or TensorFlow:\n"
	f" {sys.executable} -m pip install torch\n"
	f" {sys.executable} -m pip install tensorflow\n"
	) from exc

	torch_import_error = None
	try:
	import torch
	from torch import nn
	from torch.utils.data import DataLoader, TensorDataset
	except Exception as exc:
	torch = None
	nn = None
	DataLoader = None
	TensorDataset = None
	torch_import_error = f"{type(exc).__name__}: {exc}"

	lgbm_import_error = None
	try:
	import lightgbm as lgb
	except Exception as exc:
	lgb = None
	lgbm_import_error = f"{type(exc).__name__}: {exc}"

	statsmodels_import_error = None
	try:
	from statsmodels.tsa.arima.model import ARIMA
	except Exception as exc:
	ARIMA = None
	statsmodels_import_error = f"{type(exc).__name__}: {exc}"

	tf = None
	keras = None
	tf_import_error = None
	disable_tf_when_torch_available = os.environ.get("DECODE_DISABLE_TENSORFLOW", "1") == "1"
	if disable_tf_when_torch_available and torch is not None:
	tf_import_error = "disabled because PyTorch is available"
	else:
	try:
	import tensorflow as tf
	from tensorflow import keras
	except Exception as exc:
	tf = None
	keras = None
	tf_import_error = f"{type(exc).__name__}: {exc}"

	return {
	"np": np,
	"pd": pd,
	"RandomForestRegressor": RandomForestRegressor,
	"Ridge": Ridge,
	"DecisionTreeRegressor": DecisionTreeRegressor,
	"MinMaxScaler": MinMaxScaler,
	"mean_absolute_error": mean_absolute_error,
	"r2_score": r2_score,
	"torch": torch,
	"nn": nn,
	"DataLoader": DataLoader,
	"TensorDataset": TensorDataset,
	"torch_import_error": torch_import_error,
	"lgb": lgb,
	"lgbm_import_error": lgbm_import_error,
	"ARIMA": ARIMA,
	"statsmodels_import_error": statsmodels_import_error,
	"tf": tf,
	"keras": keras,
	"tf_import_error": tf_import_error,
	}


	def parse_args() -> argparse.Namespace:
	parser = argparse.ArgumentParser(description="DECODE paper re-implementation for I-BLEND data.")
	parser.add_argument(
	"--mode",
	choices=["paper_buildings", "meters"],
	default="paper_buildings",
	help="paper_buildings trains 7 building-level targets; meters trains 9 meter-level targets.",
	)
	parser.add_argument(
	"--target",
	default="all",
	help="Target name to train, or 'all'. Names depend on --mode.",
	)
	parser.add_argument("--freq", default="10min", help="Common sampling frequency. Paper uses 10min.")
	parser.add_argument("--lookback", type=int, default=18, help="LSTM lookback steps. 18 at 10min = 3 hours.")
	parser.add_argument("--horizon", type=int, default=1, help="Prediction horizon in rows. 1 at 10min = next 10min.")
	parser.add_argument("--horizon-days", type=float, default=0, help="Prediction horizon in days. Overrides --horizon when > 0.")
	parser.add_argument("--test-span-days", type=float, default=0, help="Evaluate only this many days from the chronological test split.")
	parser.add_argument("--epochs", type=int, default=20, help="LSTM epochs. Paper uses 20.")
	parser.add_argument("--batch-size", type=int, default=64, help="LSTM batch size. Paper uses 64.")
	parser.add_argument("--rf-trees", type=int, default=500, help="Random Forest trees. Paper tuned to 500.")
	parser.add_argument("--dl-models", default="lstm,cnn,tcn", help="Comma-separated deep models: lstm,cnn,tcn,none.")
	parser.add_argument("--include-arima", action="store_true", help="Fit ARIMA(2,1,2). This can be slow on full series.")
	parser.add_argument("--arima-max-train", type=int, default=20000, help="Maximum recent train points for ARIMA fitting.")
	parser.add_argument("--max-rows", type=int, default=0, help="Optional cap after preprocessing for quick smoke tests.")
	parser.add_argument("--include-weather", action="store_true", help="Try to merge local weather data if time ranges overlap.")
	parser.add_argument("--skip-lstm", action="store_true", help="Only train baseline ML models.")
	parser.add_argument("--output-dir", default=str(OUT_DIR), help="Output directory.")
	return parser.parse_args()


	def read_energy_10min(pd, freq: str):
	if not ENERGY_FILE.exists():
	raise FileNotFoundError(f"Missing energy file: {ENERGY_FILE}")
	df = pd.read_csv(ENERGY_FILE, na_values=["NA", ""])
	df["datetime"] = pd.to_datetime(df["timestamp"], unit="s", utc=True).dt.tz_convert(TZ)
	df = df.drop(columns=["timestamp"]).set_index("datetime").sort_index()

	# Paper predicts energy in Wh. Original columns are power in W at 1-minute resolution.
	# For a 10-minute interval: Wh = mean(W) * 10 / 60.
	mean_power_w = df.resample(freq).mean()
	interval_minutes = pd.Timedelta(freq).total_seconds() / 60
	energy_wh = mean_power_w * interval_minutes / 60
	return energy_wh


	def read_occupancy_10min(pd, code: str, freq: str):
	path = OCCUPANCY_DIR / f"{code}.csv"
	if not path.exists():
	warnings.warn(f"Missing occupancy file for {code}: {path}")
	return None
	df = pd.read_csv(path, na_values=["NA", ""])
	df["datetime"] = pd.to_datetime(df["timestamp"], unit="s", utc=True).dt.tz_convert(TZ)
	df = df.drop(columns=["timestamp"]).set_index("datetime").sort_index()
	occ = df.resample(freq).mean()
	occ["occupancy_count"] = occ["occupancy_count"].interpolate(method="time").ffill().bfill()
	return occ


	def read_calendar(pd):
	frames = []
	for path in sorted(CALENDAR_DIR.glob("calender_year_*.csv")):
	frames.append(pd.read_csv(path))
	if not frames:
	warnings.warn(f"No calendar files found in {CALENDAR_DIR}")
	return None
	cal = pd.concat(frames, ignore_index=True)
	cal["date"] = pd.to_datetime(cal["Date"]).dt.date
	cal = cal[["date", "working_day", "activity"]].drop_duplicates("date")
	cal["working_day"] = pd.to_numeric(cal["working_day"], errors="coerce").fillna(0).astype(int)
	cal["activity"] = cal["activity"].fillna("unknown").astype(str)
	cal["activity_code"] = cal["activity"].astype("category").cat.codes
	return cal


	def read_weather_10min(pd, freq: str):
	if not WEATHER_FILE.exists():
	warnings.warn(f"Missing weather file: {WEATHER_FILE}")
	return None
	df = pd.read_csv(WEATHER_FILE, na_values=["NA", ""])
	first_col = df.columns[0]
	df = df.rename(columns={first_col: "datetime"})
	df["datetime"] = pd.to_datetime(df["datetime"], errors="coerce")
	df = df.dropna(subset=["datetime"]).set_index("datetime").sort_index()
	if df.index.tz is None:
	df.index = df.index.tz_localize(TZ)
	weather = df.resample(freq).mean().interpolate(method="time").ffill().bfill()
	return weather


	def horizon_steps_from_args(pd, args) -> int:
	if args.horizon_days and args.horizon_days > 0:
	freq_delta = pd.Timedelta(args.freq)
	steps = int(round(pd.Timedelta(days=args.horizon_days) / freq_delta))
	return max(1, steps)
	return max(1, args.horizon)


	def test_span_steps_from_args(pd, args) -> int \| None:
	if args.test_span_days and args.test_span_days > 0:
	freq_delta = pd.Timedelta(args.freq)
	steps = int(round(pd.Timedelta(days=args.test_span_days) / freq_delta))
	return max(1, steps)
	return None


	def describe_steps(pd, steps: int, freq: str) -> str:
	delta = pd.Timedelta(freq) * steps
	total_minutes = delta.total_seconds() / 60
	if total_minutes % 1440 == 0:
	return f"{int(total_minutes // 1440)} day(s)"
	if total_minutes % 60 == 0:
	return f"{int(total_minutes // 60)} hour(s)"
	return f"{total_minutes:g} minute(s)"


	def add_time_features(pd, df):
	out = df.copy()
	idx = out.index
	out["hour"] = idx.hour
	out["day_of_week"] = idx.dayofweek
	out["month"] = idx.month
	out["time_slot"] = idx.hour * 60 + idx.minute
	out["hour_sin"] = (2 * math.pi * out["hour"] / 24).map(math.sin)
	out["hour_cos"] = (2 * math.pi * out["hour"] / 24).map(math.cos)
	out["dow_sin"] = (2 * math.pi * out["day_of_week"] / 7).map(math.sin)
	out["dow_cos"] = (2 * math.pi * out["day_of_week"] / 7).map(math.cos)
	out["month_sin"] = (2 * math.pi * out["month"] / 12).map(math.sin)
	out["month_cos"] = (2 * math.pi * out["month"] / 12).map(math.cos)
	return out


	def add_historical_features(df):
	out = df.copy()
	out["energy_lag_1"] = out["energy_wh"].shift(1)
	out["energy_lag_6"] = out["energy_wh"].shift(6)
	out["energy_lag_144"] = out["energy_wh"].shift(144)
	out["energy_lag_1008"] = out["energy_wh"].shift(1008)
	out["rolling_mean_6"] = out["energy_wh"].shift(1).rolling(6).mean()
	out["rolling_mean_144"] = out["energy_wh"].shift(1).rolling(144).mean()
	out["rolling_std_144"] = out["energy_wh"].shift(1).rolling(144).std()

	# DECODE-style baseline features: previous three days with same working-day class
	# and same time instant.
	same_type_groups = out.groupby(["working_day", "time_slot"], sort=False)["energy_wh"]
	out["same_day_type_lag_1"] = same_type_groups.shift(1)
	out["same_day_type_lag_2"] = same_type_groups.shift(2)
	out["same_day_type_lag_3"] = same_type_groups.shift(3)
	return out


	def build_target_frame(pd, energy_10min, calendar, target_name: str, target_spec: dict, freq: str, include_weather: bool):
	missing_meters = [m for m in target_spec["meters"] if m not in energy_10min.columns]
	if missing_meters:
	raise KeyError(f"{target_name} references missing meter columns: {missing_meters}")

	df = pd.DataFrame(index=energy_10min.index)
	df["energy_wh"] = energy_10min[target_spec["meters"]].sum(axis=1, min_count=1)

	occ = read_occupancy_10min(pd, target_spec["occupancy"], freq)
	if occ is not None:
	df = df.join(occ[["occupancy_count"]], how="left")
	else:
	df["occupancy_count"] = math.nan

	df["date"] = df.index.date
	if calendar is not None:
	df = df.reset_index().merge(calendar, on="date", how="left").set_index("datetime").sort_index()
	else:
	df["working_day"] = (df.index.dayofweek < 5).astype(int)
	df["activity"] = "unknown"
	df["activity_code"] = 0

	if include_weather:
	weather = read_weather_10min(pd, freq)
	if weather is not None:
	before = len(df)
	df = df.join(weather, how="inner")
	if df.empty:
	warnings.warn(
	"Weather data has no overlap with this target after joining. "
	"Continuing without weather features."
	)
	df = pd.DataFrame(index=energy_10min.index)
	df["energy_wh"] = energy_10min[target_spec["meters"]].sum(axis=1, min_count=1)
	occ = read_occupancy_10min(pd, target_spec["occupancy"], freq)
	if occ is not None:
	df = df.join(occ[["occupancy_count"]], how="left")
	df["date"] = df.index.date
	df = df.reset_index().merge(calendar, on="date", how="left").set_index("datetime").sort_index()
	elif len(df) < before:
	warnings.warn(f"Weather join reduced rows from {before} to {len(df)}.")

	df["occupancy_count"] = df["occupancy_count"].interpolate(method="time").ffill().bfill()
	fallback_working_day = pd.Series((df.index.dayofweek < 5).astype(int), index=df.index)
	df["working_day"] = df["working_day"].fillna(fallback_working_day).astype(int)
	df["activity"] = df["activity"].fillna("unknown").astype(str)
	df["activity_code"] = df["activity_code"].fillna(0).astype(int)
	df = add_time_features(pd, df)
	df = add_historical_features(df)
	return df


	def make_ml_split(
	np,
	pd,
	MinMaxScaler,
	df,
	horizon: int,
	max_rows: int,
	test_span_steps: int \| None = None,
	) -> tuple[SplitData, object, object]:
	data = df.copy()
	data["target"] = data["energy_wh"].shift(-horizon)

	non_features = {"target", "date", "activity"}
	feature_names = [
	c for c in data.columns
	if c not in non_features and pd.api.types.is_numeric_dtype(data[c])
	]
	clean = data[feature_names + ["target"]].replace([np.inf, -np.inf], np.nan).dropna()
	if max_rows and len(clean) > max_rows:
	clean = clean.tail(max_rows)

	n = len(clean)
	if n < 100:
	raise ValueError(f"Not enough clean rows after preprocessing: {n}")
	train_end = int(n * 0.70)
	val_end = int(n * 0.85)

	x_raw = clean[feature_names]
	y_raw = clean[["target"]]

	x_scaler = MinMaxScaler()
	y_scaler = MinMaxScaler()

	x_train = x_scaler.fit_transform(x_raw.iloc[:train_end])
	x_val = x_scaler.transform(x_raw.iloc[train_end:val_end])
	x_test = x_scaler.transform(x_raw.iloc[val_end:])

	y_train = y_scaler.fit_transform(y_raw.iloc[:train_end]).ravel()
	y_val = y_scaler.transform(y_raw.iloc[train_end:val_end]).ravel()
	y_test = y_scaler.transform(y_raw.iloc[val_end:]).ravel()

	if test_span_steps is not None:
	x_test = x_test[:test_span_steps]
	y_test = y_test[:test_span_steps]

	split = SplitData(
	x_train=x_train,
	x_val=x_val,
	x_test=x_test,
	y_train=y_train,
	y_val=y_val,
	y_test=y_test,
	feature_names=feature_names,
	test_span_steps=test_span_steps,
	)
	return split, y_scaler, clean


	def make_lstm_sequences(np, split: SplitData, lookback: int):
	x_all = np.vstack([split.x_train, split.x_val, split.x_test])
	y_all = np.concatenate([split.y_train, split.y_val, split.y_test])
	n_train = len(split.y_train)
	n_val = len(split.y_val)

	xs, ys, end_indices = [], [], []
	for end in range(lookback - 1, len(y_all)):
	# Use the sequence ending at row `end` to predict that row's target.
	# The target was already shifted by --horizon during feature engineering,
	# so excluding row `end` here would make sequence models forecast one
	# extra step farther than the baseline models.
	xs.append(x_all[end - lookback + 1:end + 1])
	ys.append(y_all[end])
	end_indices.append(end)
	xs = np.asarray(xs)
	ys = np.asarray(ys)
	end_indices = np.asarray(end_indices)

	train_mask = end_indices < n_train
	val_mask = (end_indices >= n_train) & (end_indices < n_train + n_val)
	test_mask = end_indices >= n_train + n_val

	return (
	xs[train_mask],
	xs[val_mask],
	xs[test_mask],
	ys[train_mask],
	ys[val_mask],
	ys[test_mask],
	)


	def inverse_metric(np, y_scaler, y_true_scaled, y_pred_scaled, mean_absolute_error, r2_score):
	y_true = y_scaler.inverse_transform(np.asarray(y_true_scaled).reshape(-1, 1)).ravel()
	y_pred = y_scaler.inverse_transform(np.asarray(y_pred_scaled).reshape(-1, 1)).ravel()
	return {
	"mae_wh": float(mean_absolute_error(y_true, y_pred)),
	"r2": float(r2_score(y_true, y_pred)),
	}


	def save_feature_importance(pd, out_dir: Path, target_name: str, model_name: str, feature_names: list[str], importances):
	importance_dir = out_dir / "feature_importance"
	importance_dir.mkdir(parents=True, exist_ok=True)
	table = pd.DataFrame({"feature": feature_names, "importance": importances})
	table = table.sort_values("importance", ascending=False)
	table.to_csv(importance_dir / f"{target_name}_{model_name}_feature_importance.csv", index=False)


	def train_baselines(stack, split: SplitData, y_scaler, args, out_dir: Path, target_name: str):
	np = stack["np"]
	pd = stack["pd"]
	models = {
	"ridge_regression": stack["Ridge"](alpha=1.0),
	"decision_tree": stack["DecisionTreeRegressor"](max_depth=14, min_samples_split=20, random_state=42),
	"random_forest": stack["RandomForestRegressor"](
	n_estimators=args.rf_trees,
	random_state=42,
	n_jobs=-1,
	min_samples_split=2,
	),
	}
	if stack["lgb"] is not None:
	models["lightgbm"] = stack["lgb"].LGBMRegressor(
	n_estimators=500,
	learning_rate=0.03,
	num_leaves=31,
	subsample=0.9,
	colsample_bytree=0.9,
	random_state=42,
	n_jobs=-1,
	verbose=-1,
	)
	rows = []
	for name, model in models.items():
	model.fit(split.x_train, split.y_train)
	pred = model.predict(split.x_test)
	metrics = inverse_metric(
	np,
	y_scaler,
	split.y_test,
	pred,
	stack["mean_absolute_error"],
	stack["r2_score"],
	)
	if name == "lightgbm" and hasattr(model, "feature_importances_"):
	save_feature_importance(pd, out_dir, target_name, name, split.feature_names, model.feature_importances_)
	metrics["feature_importance_path"] = str(
	out_dir / "feature_importance" / f"{target_name}_{name}_feature_importance.csv"
	)
	rows.append({"model": name, **metrics})
	if stack["lgb"] is None:
	rows.append({
	"model": "lightgbm",
	"mae_wh": math.nan,
	"r2": math.nan,
	"note": f"lightgbm_missing: {stack.get('lgbm_import_error')}",
	})
	return rows


	def train_sequence_torch(stack, split: SplitData, y_scaler, args, model_kind: str):
	np = stack["np"]
	torch = stack["torch"]
	nn = stack["nn"]
	DataLoader = stack["DataLoader"]
	TensorDataset = stack["TensorDataset"]

	torch.manual_seed(42)
	np.random.seed(42)

	x_train, x_val, x_test, y_train, y_val, y_test = make_lstm_sequences(np, split, args.lookback)
	if len(x_train) < 100 or len(x_val) < 10 or len(x_test) < 10:
	return [{"model": model_kind, "mae_wh": math.nan, "r2": math.nan, "note": "not_enough_sequences", "backend": "torch"}]

	class DecodeLSTM(nn.Module):
	def __init__(self, input_size: int):
	super().__init__()
	self.lstm = nn.LSTM(input_size=input_size, hidden_size=32, batch_first=True)
	self.head = nn.Sequential(
	nn.Linear(32, 5),
	nn.ReLU(),
	nn.Linear(5, 5),
	nn.ReLU(),
	nn.Linear(5, 1),
	)

	def forward(self, x):
	output, _ = self.lstm(x)
	return self.head(output[:, -1, :]).squeeze(-1)

	class DecodeCNN1D(nn.Module):
	def __init__(self, input_size: int):
	super().__init__()
	self.net = nn.Sequential(
	nn.Conv1d(input_size, 32, kernel_size=3, padding=1),
	nn.ReLU(),
	nn.Conv1d(32, 32, kernel_size=3, padding=1),
	nn.ReLU(),
	nn.AdaptiveAvgPool1d(1),
	)
	self.head = nn.Sequential(
	nn.Flatten(),
	nn.Linear(32, 16),
	nn.ReLU(),
	nn.Linear(16, 1),
	)

	def forward(self, x):
	x = x.transpose(1, 2)
	return self.head(self.net(x)).squeeze(-1)

	class Chomp1d(nn.Module):
	def __init__(self, chomp_size: int):
	super().__init__()
	self.chomp_size = chomp_size

	def forward(self, x):
	return x[:, :, :-self.chomp_size].contiguous() if self.chomp_size else x

	class DecodeTCN(nn.Module):
	def __init__(self, input_size: int):
	super().__init__()
	self.net = nn.Sequential(
	nn.Conv1d(input_size, 32, kernel_size=3, padding=2, dilation=1),
	Chomp1d(2),
	nn.ReLU(),
	nn.Conv1d(32, 32, kernel_size=3, padding=4, dilation=2),
	Chomp1d(4),
	nn.ReLU(),
	)
	self.head = nn.Sequential(
	nn.Linear(32, 16),
	nn.ReLU(),
	nn.Linear(16, 1),
	)

	def forward(self, x):
	x = x.transpose(1, 2)
	output = self.net(x).transpose(1, 2)
	return self.head(output[:, -1, :]).squeeze(-1)

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model_classes = {
	"lstm": DecodeLSTM,
	"cnn1d": DecodeCNN1D,
	"tcn": DecodeTCN,
	}
	model = model_classes[model_kind](input_size=x_train.shape[2]).to(device)
	criterion = nn.L1Loss()
	optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3)

	train_ds = TensorDataset(
	torch.tensor(x_train, dtype=torch.float32),
	torch.tensor(y_train, dtype=torch.float32),
	)
	val_x = torch.tensor(x_val, dtype=torch.float32).to(device)
	val_y = torch.tensor(y_val, dtype=torch.float32).to(device)
	train_loader = DataLoader(train_ds, batch_size=args.batch_size, shuffle=True)

	best_state = None
	best_val_loss = math.inf
	patience = 4
	patience_left = patience
	epochs_run = 0

	for epoch in range(args.epochs):
	model.train()
	train_loss = 0.0
	seen = 0
	for batch_x, batch_y in train_loader:
	batch_x = batch_x.to(device)
	batch_y = batch_y.to(device)
	optimizer.zero_grad()
	pred = model(batch_x)
	loss = criterion(pred, batch_y)
	loss.backward()
	optimizer.step()
	train_loss += loss.item() * len(batch_y)
	seen += len(batch_y)

	model.eval()
	with torch.no_grad():
	val_pred = model(val_x)
	val_loss = criterion(val_pred, val_y).item()

	epochs_run = epoch + 1
	print(f"Epoch {epochs_run}/{args.epochs} - loss: {train_loss / max(seen, 1):.6f} - val_loss: {val_loss:.6f}")
	if val_loss < best_val_loss:
	best_val_loss = val_loss
	best_state = {key: value.detach().cpu().clone() for key, value in model.state_dict().items()}
	patience_left = patience
	else:
	patience_left -= 1
	if patience_left <= 0:
	break

	if best_state is not None:
	model.load_state_dict(best_state)

	model.eval()
	test_loader = DataLoader(
	TensorDataset(torch.tensor(x_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32)),
	batch_size=args.batch_size,
	shuffle=False,
	)
	preds = []
	with torch.no_grad():
	for batch_x, _ in test_loader:
	preds.append(model(batch_x.to(device)).detach().cpu().numpy())
	pred = np.concatenate(preds)
	metrics = inverse_metric(
	np,
	y_scaler,
	y_test,
	pred,
	stack["mean_absolute_error"],
	stack["r2_score"],
	)
	metrics["epochs_run"] = epochs_run
	metrics["backend"] = "torch"
	metrics["device"] = str(device)
	return [{"model": model_kind, **metrics}]


	def train_lstm_torch(stack, split: SplitData, y_scaler, args):
	return train_sequence_torch(stack, split, y_scaler, args, "lstm")


	def train_cnn1d_torch(stack, split: SplitData, y_scaler, args):
	return train_sequence_torch(stack, split, y_scaler, args, "cnn1d")


	def train_tcn_torch(stack, split: SplitData, y_scaler, args):
	return train_sequence_torch(stack, split, y_scaler, args, "tcn")


	def train_lstm_keras(stack, split: SplitData, y_scaler, args):
	np = stack["np"]
	keras = stack["keras"]
	tf = stack["tf"]
	tf.random.set_seed(42)
	np.random.seed(42)

	x_train, x_val, x_test, y_train, y_val, y_test = make_lstm_sequences(np, split, args.lookback)
	if len(x_train) < 100 or len(x_val) < 10 or len(x_test) < 10:
	return [{"model": "lstm", "mae_wh": math.nan, "r2": math.nan, "note": "not_enough_sequences", "backend": "tensorflow"}]

	model = keras.Sequential(
	[
	keras.layers.Input(shape=(x_train.shape[1], x_train.shape[2])),
	keras.layers.LSTM(32),
	keras.layers.Dense(5, activation="relu"),
	keras.layers.Dense(5, activation="relu"),
	keras.layers.Dense(1),
	]
	)
	model.compile(optimizer=keras.optimizers.RMSprop(), loss="mae")
	early_stop = keras.callbacks.EarlyStopping(monitor="val_loss", patience=4, restore_best_weights=True)
	history = model.fit(
	x_train,
	y_train,
	validation_data=(x_val, y_val),
	epochs=args.epochs,
	batch_size=args.batch_size,
	verbose=1,
	callbacks=[early_stop],
	)
	pred = model.predict(x_test, verbose=0).ravel()
	metrics = inverse_metric(
	np,
	y_scaler,
	y_test,
	pred,
	stack["mean_absolute_error"],
	stack["r2_score"],
	)
	metrics["epochs_run"] = len(history.history["loss"])
	metrics["backend"] = "tensorflow"
	return [{"model": "lstm", **metrics}]


	def train_lstm(stack, split: SplitData, y_scaler, args):
	if args.skip_lstm:
	return [{"model": "lstm", "mae_wh": math.nan, "r2": math.nan, "note": "skipped_by_flag"}]
	if stack["torch"] is not None:
	return train_lstm_torch(stack, split, y_scaler, args)
	if stack["keras"] is not None:
	return train_lstm_keras(stack, split, y_scaler, args)
	note = (
	"lstm_backend_missing: "
	f"torch={stack.get('torch_import_error')}; "
	f"tensorflow={stack.get('tf_import_error')}"
	)
	return [{"model": "lstm", "mae_wh": math.nan, "r2": math.nan, "note": note}]


	def train_deep_models(stack, split: SplitData, y_scaler, args):
	requested = {item.strip().lower() for item in args.dl_models.split(",") if item.strip()}
	if "none" in requested or args.skip_lstm:
	return [{"model": "deep_models", "mae_wh": math.nan, "r2": math.nan, "note": "skipped_by_flag"}]

	rows = []
	if "lstm" in requested:
	rows.extend(train_lstm(stack, split, y_scaler, args))

	torch_missing_note = f"torch_missing: {stack.get('torch_import_error')}"
	if "cnn" in requested:
	requested.add("cnn1d")
	if "cnn1d" in requested:
	if stack["torch"] is not None:
	rows.extend(train_cnn1d_torch(stack, split, y_scaler, args))
	else:
	rows.append({"model": "cnn1d", "mae_wh": math.nan, "r2": math.nan, "note": torch_missing_note})
	if "tcn" in requested:
	if stack["torch"] is not None:
	rows.extend(train_tcn_torch(stack, split, y_scaler, args))
	else:
	rows.append({"model": "tcn", "mae_wh": math.nan, "r2": math.nan, "note": torch_missing_note})
	return rows


	def train_arima(stack, clean, args):
	if not args.include_arima:
	return [{"model": "arima", "mae_wh": math.nan, "r2": math.nan, "note": "skipped_enable_with_include_arima"}]
	if stack["ARIMA"] is None:
	return [{
	"model": "arima",
	"mae_wh": math.nan,
	"r2": math.nan,
	"note": f"statsmodels_missing: {stack.get('statsmodels_import_error')}",
	}]

	n = len(clean)
	train_end = int(n * 0.70)
	val_end = int(n * 0.85)
	history = clean["energy_wh"].iloc[:val_end].dropna()
	if args.arima_max_train and len(history) > args.arima_max_train:
	history = history.iloc[-args.arima_max_train:]
	y_true = clean["target"].iloc[val_end:].dropna()
	test_span_steps = test_span_steps_from_args(stack["pd"], args)
	if test_span_steps is not None:
	y_true = y_true.iloc[:test_span_steps]
	if len(history) < 50 or len(y_true) < 10:
	return [{"model": "arima", "mae_wh": math.nan, "r2": math.nan, "note": "not_enough_points"}]

	try:
	model = stack["ARIMA"](history, order=(2, 1, 2))
	fitted = model.fit()
	forecast = fitted.forecast(steps=len(y_true))
	metrics = {
	"mae_wh": float(stack["mean_absolute_error"](y_true, forecast)),
	"r2": float(stack["r2_score"](y_true, forecast)),
	"note": f"order=(2,1,2); train_points={len(history)}",
	}
	return [{"model": "arima", **metrics}]
	except Exception as exc:
	return [{"model": "arima", "mae_wh": math.nan, "r2": math.nan, "note": f"arima_failed: {type(exc).__name__}: {exc}"}]


	def save_dataset(pd, clean, out_dir: Path, target_name: str):
	out_path = out_dir / "processed" / f"{target_name}_train_ready.csv"
	out_path.parent.mkdir(parents=True, exist_ok=True)
	clean.to_csv(out_path)
	return out_path


	def run_target(stack, args, energy_10min, calendar, target_name: str, target_spec: dict, out_dir: Path):
	pd = stack["pd"]
	np = stack["np"]
	horizon_steps = horizon_steps_from_args(pd, args)
	test_span_steps = test_span_steps_from_args(pd, args)
	print(f"\n=== Target: {target_name} ===")
	print(f"Horizon: {horizon_steps} step(s) = {describe_steps(pd, horizon_steps, args.freq)}")
	print(f"Lookback: {args.lookback} step(s) = {describe_steps(pd, args.lookback, args.freq)}")
	if test_span_steps is not None:
	print(f"Test span: {test_span_steps} step(s) = {describe_steps(pd, test_span_steps, args.freq)}")
	df = build_target_frame(
	pd,
	energy_10min,
	calendar,
	target_name,
	target_spec,
	args.freq,
	args.include_weather,
	)
	split, y_scaler, clean = make_ml_split(
	np,
	pd,
	stack["MinMaxScaler"],
	df,
	horizon=horizon_steps,
	max_rows=args.max_rows,
	test_span_steps=test_span_steps,
	)
	dataset_path = save_dataset(pd, clean, out_dir, target_name)
	print(f"Rows after feature engineering: {len(clean):,}")
	print(f"Train/val/test: {len(split.y_train):,}/{len(split.y_val):,}/{len(split.y_test):,}")
	print(f"Saved train-ready table: {dataset_path}")

	rows = []
	rows.extend(train_baselines(stack, split, y_scaler, args, out_dir, target_name))
	rows.extend(train_deep_models(stack, split, y_scaler, args))
	rows.extend(train_arima(stack, clean, args))
	for row in rows:
	row["target"] = target_name
	row["rows"] = len(clean)
	row["features"] = len(split.feature_names)
	row["horizon_steps"] = horizon_steps
	row["horizon"] = describe_steps(pd, horizon_steps, args.freq)
	row["lookback_steps"] = args.lookback
	row["lookback"] = describe_steps(pd, args.lookback, args.freq)
	row["test_span_steps"] = test_span_steps if test_span_steps is not None else len(split.y_test)
	row["test_span"] = describe_steps(pd, row["test_span_steps"], args.freq)
	return rows


	def main() -> int:
	args = parse_args()
	stack = import_stack()
	pd = stack["pd"]

	out_dir = Path(args.output_dir)
	out_dir.mkdir(parents=True, exist_ok=True)
	(out_dir / "processed").mkdir(exist_ok=True)

	config = PAPER_BUILDINGS if args.mode == "paper_buildings" else METER_TARGETS
	if args.target != "all":
	if args.target not in config:
	raise SystemExit(f"Unknown target {args.target!r}. Available: {', '.join(config)}")
	config = {args.target: config[args.target]}

	print("DECODE re-implementation")
	print(f"Mode: {args.mode}")
	print(f"Frequency: {args.freq}")
	print(f"Python executable: {sys.executable}")
	print(f"PyTorch available: {stack['torch'] is not None}")
	if stack["torch"] is None:
	print(f"PyTorch import error: {stack.get('torch_import_error')}")
	print(f"TensorFlow available: {stack['keras'] is not None}")
	if stack["keras"] is None:
	print(f"TensorFlow import error: {stack.get('tf_import_error')}")
	print(f"LightGBM available: {stack['lgb'] is not None}")
	if stack["lgb"] is None:
	print(f"LightGBM import error: {stack.get('lgbm_import_error')}")
	print(f"Statsmodels ARIMA available: {stack['ARIMA'] is not None}")
	if stack["ARIMA"] is None:
	print(f"Statsmodels import error: {stack.get('statsmodels_import_error')}")
	if not args.skip_lstm:
	if stack["torch"] is not None:
	print("LSTM backend: PyTorch")
	elif stack["keras"] is not None:
	print("LSTM backend: TensorFlow/Keras")
	else:
	print("LSTM backend: unavailable")

	energy_10min = read_energy_10min(pd, args.freq)
	calendar = read_calendar(pd)

	all_rows = []
	for target_name, target_spec in config.items():
	rows = run_target(stack, args, energy_10min, calendar, target_name, target_spec, out_dir)
	all_rows.extend(rows)

	results = pd.DataFrame(all_rows)
	effective_horizon = horizon_steps_from_args(pd, args)
	effective_test_span = test_span_steps_from_args(pd, args)
	result_suffix = f"h{effective_horizon}"
	if effective_test_span is not None:
	result_suffix += f"_ts{effective_test_span}"
	result_path = out_dir / f"results_{args.mode}_{result_suffix}.csv"
	results.to_csv(result_path, index=False)

	metadata = {
	"mode": args.mode,
	"target": args.target,
	"freq": args.freq,
	"lookback": args.lookback,
	"horizon": args.horizon,
	"horizon_days": args.horizon_days,
	"horizon_steps_effective": horizon_steps_from_args(pd, args),
	"test_span_days": args.test_span_days,
	"test_span_steps_effective": test_span_steps_from_args(pd, args),
	"epochs": args.epochs,
	"batch_size": args.batch_size,
	"rf_trees": args.rf_trees,
	"dl_models": args.dl_models,
	"include_arima": args.include_arima,
	"include_weather": args.include_weather,
	"pytorch_available": stack["torch"] is not None,
	"torch_import_error": stack.get("torch_import_error"),
	"lightgbm_available": stack["lgb"] is not None,
	"lightgbm_import_error": stack.get("lgbm_import_error"),
	"statsmodels_available": stack["ARIMA"] is not None,
	"statsmodels_import_error": stack.get("statsmodels_import_error"),
	"tensorflow_available": stack["keras"] is not None,
	"tensorflow_import_error": stack.get("tf_import_error"),
	"energy_file": str(ENERGY_FILE),
	"occupancy_dir": str(OCCUPANCY_DIR),
	"calendar_dir": str(CALENDAR_DIR),
	"weather_file": str(WEATHER_FILE),
	}
	(out_dir / f"run_config_{args.mode}.json").write_text(json.dumps(metadata, indent=2), encoding="utf-8")

	print("\n=== Results ===")
	print(results.sort_values(["target", "mae_wh"]).to_string(index=False))
	print(f"\nSaved results: {result_path}")
	return 0


	if __name__ == "__main__":
	raise SystemExit(main())