Spaces:
Running
Running
File size: 1,733 Bytes
d317049 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | """Validation helpers for the weekly TFT target contract."""
from __future__ import annotations
from typing import Literal
import numpy as np
import pandas as pd
WEEKLY_TARGET_HELPER_COLS = {
"target_1d_log_return",
"target_5d_log_return",
"realized_vol_20d",
"material_move_5d",
}
def validate_weekly_target_contract(
df: pd.DataFrame,
*,
mode: Literal["train", "inference"] = "train",
) -> None:
"""Validate weekly target/helper columns without treating helpers as inputs."""
required = [
"target",
"target_1d_log_return",
"target_5d_log_return",
"realized_vol_20d",
"material_move_5d",
"time_idx",
"group_id",
]
missing = [c for c in required if c not in df.columns]
if missing:
raise ValueError(f"Missing weekly target columns: {missing}")
if mode == "train":
for col in ("target", "target_1d_log_return", "target_5d_log_return"):
if df[col].isna().any():
raise ValueError(f"{col} contains NaN after feature-store construction")
comparable = df[["target", "target_1d_log_return"]].dropna()
if not comparable.empty and not np.allclose(
comparable["target"].to_numpy(),
comparable["target_1d_log_return"].to_numpy(),
atol=1e-10,
equal_nan=True,
):
raise ValueError("target and target_1d_log_return are not identical")
material_values = set(df["material_move_5d"].dropna().unique())
if not material_values.issubset({0.0, 1.0}):
raise ValueError("material_move_5d must be binary 0/1")
if not df["time_idx"].is_monotonic_increasing:
raise ValueError("time_idx must be monotonic increasing")
|