Spaces:

NX-AI
/

TiRex-demo

Running on T4

App Files Files Community

Nikita commited on Oct 21

Commit

ab631da

1 Parent(s): 9ff4612

torch.compile, removed tirex folder, loading from pip

Browse files

Files changed (14) hide show

app.py +1 -1
environment.yaml +1 -0
tirex/__init__.py +0 -8
tirex/api_adapter/__init__.py +0 -2
tirex/api_adapter/forecast.py +0 -209
tirex/api_adapter/gluon.py +0 -48
tirex/api_adapter/hf_data.py +0 -38
tirex/api_adapter/standard_adapter.py +0 -67
tirex/base.py +0 -73
tirex/models/__init__.py +0 -2
tirex/models/components.py +0 -147
tirex/models/mixed_stack.py +0 -143
tirex/models/predict_utils.py +0 -72
tirex/models/tirex.py +0 -231

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ from tirex import load_model, ForecastModel
 # ----------------------------
 torch.manual_seed(42)
-model: ForecastModel = load_model("NX-AI/TiRex",device='cuda')
 def model_forecast(input_data, forecast_length=256, file_name=None):
     if os.path.basename(file_name) == "loop.csv" and forecast_length==256:

 # ----------------------------
 torch.manual_seed(42)
+model: ForecastModel = load_model("NX-AI/TiRex", backend="torch", device="cuda", compile=True)
 def model_forecast(input_data, forecast_length=256, file_name=None):
     if os.path.basename(file_name) == "loop.csv" and forecast_length==256:

environment.yaml CHANGED Viewed

@@ -15,6 +15,7 @@ dependencies:
   - cuda-toolkit=12.6
   - cuda-cccl=12.6
   - pip:
       - --index-url https://download.pytorch.org/whl/cu126
       - --extra-index-url https://pypi.org/simple
       - pyarrow

   - cuda-toolkit=12.6
   - cuda-cccl=12.6
   - pip:
+      - tirex-ts
       - --index-url https://download.pytorch.org/whl/cu126
       - --extra-index-url https://pypi.org/simple
       - pyarrow

tirex/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-# Copyright (c) NXAI GmbH.
-# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
-from .api_adapter.forecast import ForecastModel
-from .base import load_model
-from .models.tirex import TiRexZero
-__all__ = ["load_model", "ForecastModel"]

tirex/api_adapter/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- # Copyright (c) NXAI GmbH.
2	- # This software may be used and distributed according to the terms of the NXAI Community License Agreement.

tirex/api_adapter/forecast.py DELETED Viewed

@@ -1,209 +0,0 @@
-# Copyright (c) NXAI GmbH.
-# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
-from abc import ABC, abstractmethod
-from typing import Literal
-import torch
-from .standard_adapter import ContextType, get_batches
-try:
-    from .gluon import format_gluonts_output, get_gluon_batches
-    _GLUONTS_AVAILABLE = True
-except ImportError:
-    _GLUONTS_AVAILABLE = False
-try:
-    from .hf_data import get_hfdata_batches
-    _HF_DATASETS_AVAILABLE = True
-except ImportError:
-    _HF_DATASETS_AVAILABLE = False
-DEF_TARGET_COLUMN = "target"
-DEF_META_COLUMNS = ("start", "item_id")
-def _format_output(
-    quantiles: torch.Tensor,
-    means: torch.Tensor,
-    sample_meta: list[dict],
-    quantile_levels: list[float],
-    output_type: Literal["torch", "numpy", "gluonts"],
-):
-    if output_type == "torch":
-        return quantiles.cpu(), means.cpu()
-    elif output_type == "numpy":
-        return quantiles.cpu().numpy(), means.cpu().numpy()
-    elif output_type == "gluonts":
-        if not _GLUONTS_AVAILABLE:
-            raise ValueError("output_type glutonts needs GluonTs but GluonTS is not available (not installed)!")
-        return format_gluonts_output(quantiles, means, sample_meta, quantile_levels)
-    else:
-        raise ValueError(f"Invalid output type: {output_type}")
-def _as_generator(batches, fc_func, quantile_levels, output_type, **predict_kwargs):
-    for batch_ctx, batch_meta in batches:
-        quantiles, mean = fc_func(batch_ctx, **predict_kwargs)
-        yield _format_output(
-            quantiles=quantiles,
-            means=mean,
-            sample_meta=batch_meta,
-            quantile_levels=quantile_levels,
-            output_type=output_type,
-        )
-def _gen_forecast(fc_func, batches, output_type, quantile_levels, yield_per_batch, **predict_kwargs):
-    if yield_per_batch:
-        return _as_generator(batches, fc_func, quantile_levels, output_type, **predict_kwargs)
-    prediction_q = []
-    prediction_m = []
-    sample_meta = []
-    for batch_ctx, batch_meta in batches:
-        quantiles, mean = fc_func(batch_ctx, **predict_kwargs)
-        prediction_q.append(quantiles)
-        prediction_m.append(mean)
-        sample_meta.extend(batch_meta)
-    prediction_q = torch.cat(prediction_q, dim=0)
-    prediction_m = torch.cat(prediction_m, dim=0)
-    return _format_output(
-        quantiles=prediction_q,
-        means=prediction_m,
-        sample_meta=sample_meta,
-        quantile_levels=quantile_levels,
-        output_type=output_type,
-    )
-def _common_forecast_doc():
-    common_doc = f"""
-        This method takes historical context data as input and outputs probabilistic forecasts.
-        Args:
-            output_type (Literal["torch", "numpy", "gluonts"], optional):
-                Specifies the desired format of the returned forecasts:
-                - "torch": Returns forecasts as `torch.Tensor` objects [batch_dim, forecast_len, |quantile_levels|]
-                - "numpy": Returns forecasts as `numpy.ndarray` objects [batch_dim, forecast_len, |quantile_levels|]
-                - "gluonts": Returns forecasts as a list of GluonTS `Forecast` objects.
-                Defaults to "torch".
-            batch_size (int, optional): The number of time series instances to process concurrently by the model.
-                                        Defaults to 512. Must be $>= 1$.
-            quantile_levels (List[float], optional): Quantile levels for which predictions should be generated.
-                                                     Defaults to (0.1, 0.2, ..., 0.9).
-            yield_per_batch (bool, optional): If `True`, the method will act as a generator, yielding
-                                              forecasts batch by batch as they are computed.
-                                              Defaults to `False`.
-            **predict_kwargs: Additional keyword arguments that are passed directly to the underlying
-                              prediction mechanism of the pre-trained model. Refer to the model's
-                              internal prediction method documentation for available options.
-        Returns:
-            The return type depends on `output_type` and `yield_per_batch`:
-                - If `yield_per_batch` is `True`: An iterator that yields forecasts. Each yielded item
-                  will correspond to a batch of forecasts in the format specified by `output_type`.
-                - If `yield_per_batch` is `False`: A single object containing all forecasts.
-                  - If `output_type="torch"`: `Tuple[torch.Tensor, torch.Tensor]` (quantiles, mean).
-                  - If `output_type="numpy"`: `Tuple[numpy.ndarray, numpy.ndarray]` (quantiles, mean).
-                  - If `output_type="gluonts"`: A `List[gluonts.model.forecast.Forecast]` of all forecasts.
-        """
-    return common_doc
-class ForecastModel(ABC):
-    @abstractmethod
-    def _forecast_quantiles(self, batch, **predict_kwargs):
-        pass
-    def forecast(
-        self,
-        context: ContextType,
-        output_type: Literal["torch", "numpy", "gluonts"] = "torch",
-        batch_size: int = 512,
-        quantile_levels: list[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
-        yield_per_batch: bool = False,
-        **predict_kwargs,
-    ):
-        f"""
-        {_common_forecast_doc}
-        Args:
-            context (ContextType): The historical "context" data of the time series:
-                - `torch.Tensor`: 1D `[context_length]` or 2D `[batch_dim, context_length]` tensor
-                - `np.ndarray`: 1D `[context_length]` or 2D `[batch_dim, context_length]` array
-                - `List[torch.Tensor]`: List of 1D tensors (samples with different lengths get padded per batch)
-                - `List[np.ndarray]`: List of 1D arrays (samples with different lengths get padded per batch)
-        """
-        assert batch_size >= 1, "Batch size must be >= 1"
-        batches = get_batches(context, batch_size)
-        return _gen_forecast(
-            self._forecast_quantiles, batches, output_type, quantile_levels, yield_per_batch, **predict_kwargs
-        )
-    def forecast_gluon(
-        self,
-        gluonDataset,
-        output_type: Literal["torch", "numpy", "gluonts"] = "torch",
-        batch_size: int = 512,
-        quantile_levels: list[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
-        yield_per_batch: bool = False,
-        data_kwargs: dict = {},
-        **predict_kwargs,
-    ):
-        f"""
-        {_common_forecast_doc()}
-        Args:
-            gluonDataset (gluon_ts.dataset.common.Dataset): A GluonTS dataset object containing the
-                                                            historical time series data.
-            data_kwargs (dict, optional): Additional keyword arguments passed to the
-                                          autogluon data processing function.
-        """
-        assert batch_size >= 1, "Batch size must be >= 1"
-        if not _GLUONTS_AVAILABLE:
-            raise ValueError("forecast_gluon glutonts needs GluonTs but GluonTS is not available (not installed)!")
-        batches = get_gluon_batches(gluonDataset, batch_size, **data_kwargs)
-        return _gen_forecast(
-            self._forecast_quantiles, batches, output_type, quantile_levels, yield_per_batch, **predict_kwargs
-        )
-    def forecast_hfdata(
-        self,
-        hf_dataset,
-        output_type: Literal["torch", "numpy", "gluonts"] = "torch",
-        batch_size: int = 512,
-        quantile_levels: list[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
-        yield_per_batch: bool = False,
-        data_kwargs: dict = {},
-        **predict_kwargs,
-    ):
-        f"""
-        {_common_forecast_doc()}
-        Args:
-            hf_dataset (datasets.Dataset): A Hugging Face `Dataset` object containing the
-                                           historical time series data.
-            data_kwargs (dict, optional): Additional keyword arguments passed to the
-                                          datasets data processing function.
-        """
-        assert batch_size >= 1, "Batch size must be >= 1"
-        if not _HF_DATASETS_AVAILABLE:
-            raise ValueError(
-                "forecast_hfdata glutonts needs HuggingFace datasets but datasets is not available (not installed)!"
-            )
-        batches = get_hfdata_batches(hf_dataset, batch_size, **data_kwargs)
-        return _gen_forecast(
-            self._forecast_quantiles, batches, output_type, quantile_levels, yield_per_batch, **predict_kwargs
-        )

tirex/api_adapter/gluon.py DELETED Viewed

@@ -1,48 +0,0 @@
-# Copyright (c) NXAI GmbH.
-# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
-import pandas as pd
-import torch
-from gluonts.dataset.common import Dataset
-from gluonts.dataset.field_names import FieldName
-from gluonts.model.forecast import QuantileForecast
-from .standard_adapter import _batch_pad_iterable
-DEF_TARGET_COLUMN = FieldName.TARGET  # target
-DEF_META_COLUMNS = (FieldName.START, FieldName.ITEM_ID)
-def _get_gluon_ts_map(**gluon_kwargs):
-    target_col = gluon_kwargs.get("target_column", DEF_TARGET_COLUMN)
-    meta_columns = gluon_kwargs.get("meta_columns", DEF_META_COLUMNS)
-    def extract_gluon(series):
-        ctx = torch.Tensor(series[target_col])
-        meta = {k: series[k] for k in meta_columns if k in series}
-        meta["length"] = len(ctx)
-        return ctx, meta
-    return extract_gluon
-def get_gluon_batches(gluonDataset: Dataset, batch_size: int, **gluon_kwargs):
-    return _batch_pad_iterable(map(_get_gluon_ts_map(**gluon_kwargs), gluonDataset), batch_size)
-def format_gluonts_output(quantile_forecasts: torch.Tensor, mean_forecasts, meta: list[dict], quantile_levels):
-    forecasts = []
-    for i in range(quantile_forecasts.shape[0]):
-        start_date = meta[i].get(FieldName.START, pd.Period("01-01-2000", freq=meta[i].get("freq", "h")))
-        start_date += meta[i].get("length", 0)
-        forecasts.append(
-            QuantileForecast(
-                forecast_arrays=torch.cat((quantile_forecasts[i], mean_forecasts[i].unsqueeze(1)), dim=1)
-                .T.cpu()
-                .numpy(),
-                start_date=start_date,
-                item_id=meta[i].get(FieldName.ITEM_ID, None),
-                forecast_keys=list(map(str, quantile_levels)) + ["mean"],
-            )
-        )
-    return forecasts

tirex/api_adapter/hf_data.py DELETED Viewed

@@ -1,38 +0,0 @@
-# Copyright (c) NXAI GmbH.
-# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
-import datasets
-import torch
-from .standard_adapter import _batch_pad_iterable
-DEF_TARGET_COLUMN = "target"
-def _get_hf_map(dataset: datasets.Dataset, **hf_kwargs):
-    target_col = hf_kwargs.get("target_column", DEF_TARGET_COLUMN)
-    meta_columns = hf_kwargs.get("meta_columns", ())
-    columns_to_pass = [target_col] + list(meta_columns)
-    remove_cols = [col for col in dataset.column_names if col not in columns_to_pass]
-    dataset = (
-        dataset.with_format("torch")
-        .remove_columns(remove_cols)
-        .cast_column(target_col, datasets.Sequence(datasets.Value("float32")))
-    )
-    def yield_batch_tuples(sample: dict) -> tuple[torch.Tensor, dict]:
-        context_data = sample[target_col]
-        if context_data.ndim > 1:
-            context_data = context_data.squeeze()
-        assert context_data.ndim == 1
-        meta = {k: sample[k] for k in meta_columns if k in sample}
-        meta["length"] = len(context_data)
-        return context_data, meta
-    return dataset, yield_batch_tuples
-def get_hfdata_batches(hf_dataset: datasets.Dataset, batch_size: int, **hf_kwargs):
-    dataset, map_func = _get_hf_map(hf_dataset, **hf_kwargs)
-    return _batch_pad_iterable(map(map_func, dataset), batch_size)

tirex/api_adapter/standard_adapter.py DELETED Viewed

@@ -1,67 +0,0 @@
-# Copyright (c) NXAI GmbH.
-# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
-import itertools
-from collections.abc import Iterable, Iterator, Sequence
-from typing import Union
-import numpy as np
-import torch
-ContextType = Union[
-    torch.Tensor,
-    np.ndarray,
-    list[torch.Tensor],
-    list[np.ndarray],
-]
-def _batched_slice(full_batch, full_meta: list[dict] | None, batch_size: int) -> Iterator[tuple[Sequence, list[dict]]]:
-    if len(full_batch) <= batch_size:
-        yield full_batch, full_meta if full_meta is not None else [{} for _ in range(len(full_batch))]
-    else:
-        for i in range(0, len(full_batch), batch_size):
-            batch = full_batch[i : i + batch_size]
-            yield batch, (full_meta[i : i + batch_size] if full_meta is not None else [{} for _ in range(len(batch))])
-def _batched(iterable: Iterable, n: int):
-    it = iter(iterable)
-    while batch := tuple(itertools.islice(it, n)):
-        yield batch
-def _batch_pad_iterable(iterable: Iterable[tuple[torch.Tensor, dict]], batch_size: int):
-    for batch in _batched(iterable, batch_size):
-        # ctx_it_len, ctx_it_data, it_meta = itertools.tee(batch, 3)
-        max_len = max(len(el[0]) for el in batch)
-        padded_batch = []
-        meta = []
-        for el in batch:
-            sample = el[0]
-            assert isinstance(sample, torch.Tensor)
-            assert sample.ndim == 1
-            assert len(sample) > 0, "Each sample needs to have a length > 0"
-            padding = torch.full(size=(max_len - len(sample),), fill_value=torch.nan, device=sample.device)
-            padded_batch.append(torch.cat((padding, sample)))
-            meta.append(el[1])
-        yield torch.stack(padded_batch), meta
-def get_batches(context: ContextType, batch_size: int):
-    batches = None
-    if isinstance(context, torch.Tensor):
-        if context.ndim == 1:
-            context = context.unsqueeze(0)
-        assert context.ndim == 2
-        batches = _batched_slice(context, None, batch_size)
-    elif isinstance(context, np.ndarray):
-        if context.ndim == 1:
-            context = np.expand_dims(context, axis=0)
-        assert context.ndim == 2
-        batches = map(lambda x: (torch.Tensor(x[0]), x[1]), _batched_slice(context, None, batch_size))
-    elif isinstance(context, (list, Iterable)):
-        batches = _batch_pad_iterable(map(lambda x: (torch.Tensor(x), None), context), batch_size)
-    if batches is None:
-        raise ValueError(f"Context type {type(context)} not supported! Supported Types: {ContextType}")
-    return batches

tirex/base.py DELETED Viewed

@@ -1,73 +0,0 @@
-# Copyright (c) NXAI GmbH.
-# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
-import os
-from abc import ABC, abstractmethod
-from typing import TypeVar
-from huggingface_hub import hf_hub_download
-T = TypeVar("T", bound="PretrainedModel")
-def parse_hf_repo_id(path):
-    parts = path.split("/")
-    return "/".join(parts[0:2])
-class PretrainedModel(ABC):
-    REGISTRY: dict[str, "PretrainedModel"] = {}
-    def __init_subclass__(cls, **kwargs):
-        super().__init_subclass__(**kwargs)
-        cls.REGISTRY[cls.register_name()] = cls
-    @classmethod
-    def from_pretrained(cls: type[T], path, device: str = "cuda:0", hf_kwargs=None, ckp_kwargs=None) -> T:
-        if hf_kwargs is None:
-            hf_kwargs = {}
-        if ckp_kwargs is None:
-            ckp_kwargs = {}
-        if os.path.exists(path):
-            print("Loading weights from local directory")
-            checkpoint_path = path
-        else:
-            repo_id = parse_hf_repo_id(path)
-            checkpoint_path = hf_hub_download(repo_id=repo_id, filename="model.ckpt", **hf_kwargs)
-        model = cls.load_from_checkpoint(checkpoint_path, map_location=device, **ckp_kwargs)
-        model.after_load_from_checkpoint()
-        return model
-    @classmethod
-    @abstractmethod
-    def register_name(cls) -> str:
-        pass
-    def after_load_from_checkpoint(self):
-        pass
-def load_model(path: str, device: str = "cuda:0", hf_kwargs=None, ckp_kwargs=None) -> PretrainedModel:
-    """Loads a TiRex model. This function attempts to load the specified model.
-    Args:
-        path (str): Hugging Face path to the model (e.g. NX-AI/TiRex)
-        device (str, optional): The device on which to load the model (e.g., "cuda:0", "cpu").
-                                If you want to use "cpu" you need to deactivate the sLSTM CUDA kernels (check repository FAQ!).
-        hf_kwargs (dict, optional): Keyword arguments to pass to the Hugging Face Hub download method.
-        ckp_kwargs (dict, optional): Keyword arguments to pass when loading the checkpoint.
-    Returns:
-        PretrainedModel: The loaded model.
-    Examples:
-        model: ForecastModel = load_model("NX-AI/TiRex")
-    """
-    try:
-        _, model_id = parse_hf_repo_id(path).split("/")
-    except:
-        raise ValueError(f"Invalid model path {path}")
-    model_cls = PretrainedModel.REGISTRY.get(model_id, None)
-    if model_cls is None:
-        raise ValueError(f"Invalid model id {model_id}")
-    return model_cls.from_pretrained(path, device=device, hf_kwargs=hf_kwargs, ckp_kwargs=ckp_kwargs)

tirex/models/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- # Copyright (c) NXAI GmbH.
2	- # This software may be used and distributed according to the terms of the NXAI Community License Agreement.

tirex/models/components.py DELETED Viewed

@@ -1,147 +0,0 @@
-# Copyright (c) NXAI GmbH.
-# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
-from dataclasses import dataclass, field
-from typing import Any
-import torch
-SCALER_STATE = "scaler_state"
-class ResidualBlock(torch.nn.Module):
-    def __init__(
-        self,
-        in_dim: int,
-        h_dim: int,
-        out_dim: int,
-        dropout: float = 0,
-    ) -> None:
-        super().__init__()
-        self.dropout = torch.nn.Dropout(dropout)
-        self.hidden_layer = torch.nn.Linear(in_dim, h_dim)
-        self.output_layer = torch.nn.Linear(h_dim, out_dim)
-        self.residual_layer = torch.nn.Linear(in_dim, out_dim)
-        self.act = torch.nn.ReLU()
-    def forward(self, x: torch.Tensor):
-        hid = self.act(self.hidden_layer(x))
-        out = self.output_layer(hid)
-        res = self.residual_layer(x)
-        out = out + res
-        return out
-@dataclass
-class StandardScaler:
-    eps: float = 1e-5
-    nan_loc: float = 0.0
-    def scale(
-        self,
-        x: torch.Tensor,
-        loc_scale: tuple[torch.Tensor, torch.Tensor] | None = None,
-    ) -> tuple[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]:
-        if loc_scale is None:
-            loc = torch.nan_to_num(torch.nanmean(x, dim=-1, keepdim=True), nan=self.nan_loc)
-            scale = torch.nan_to_num(torch.nanmean((x - loc).square(), dim=-1, keepdim=True).sqrt(), nan=1.0)
-            scale = torch.where(scale == 0, torch.abs(loc) + self.eps, scale)
-        else:
-            loc, scale = loc_scale
-        return ((x - loc) / scale), (loc, scale)
-    def re_scale(self, x: torch.Tensor, loc_scale: tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
-        loc, scale = loc_scale
-        return x * scale + loc
-@dataclass
-class _Patcher:
-    patch_size: int
-    patch_stride: int
-    left_pad: bool
-    def __post_init__(self):
-        assert self.patch_size % self.patch_stride == 0
-    def __call__(self, x: torch.Tensor) -> torch.Tensor:
-        assert x.ndim == 2
-        length = x.shape[-1]
-        if length < self.patch_size or (length % self.patch_stride != 0):
-            if length < self.patch_size:
-                padding_size = (
-                    *x.shape[:-1],
-                    self.patch_size - (length % self.patch_size),
-                )
-            else:
-                padding_size = (
-                    *x.shape[:-1],
-                    self.patch_stride - (length % self.patch_stride),
-                )
-            padding = torch.full(size=padding_size, fill_value=torch.nan, dtype=x.dtype, device=x.device)
-            if self.left_pad:
-                x = torch.concat((padding, x), dim=-1)
-            else:
-                x = torch.concat((x, padding), dim=-1)
-        x = x.unfold(dimension=-1, size=self.patch_size, step=self.patch_stride)
-        return x
-@dataclass
-class PatchedUniTokenizer:
-    patch_size: int
-    scaler: Any = field(default_factory=StandardScaler)
-    patch_stride: int | None = None
-    def __post_init__(self):
-        if self.patch_stride is None:
-            self.patch_stride = self.patch_size
-        self.patcher = _Patcher(self.patch_size, self.patch_stride, left_pad=True)
-    def context_input_transform(self, data: torch.Tensor):
-        assert data.ndim == 2
-        data, scale_state = self.scaler.scale(data)
-        return self.patcher(data), {SCALER_STATE: scale_state}
-    def output_transform(self, data: torch.Tensor, tokenizer_state: dict):
-        data_shape = data.shape
-        data = self.scaler.re_scale(data.reshape(data_shape[0], -1), tokenizer_state[SCALER_STATE]).view(*data_shape)
-        return data
-class StreamToLogger:
-    """Fake file-like stream object that redirects writes to a logger
-    instance."""
-    def __init__(self, logger, log_level):
-        self.logger = logger
-        self.log_level = log_level
-        self.linebuf = ""  # Buffer for partial lines
-    def write(self, message):
-        # Filter out empty messages (often from just a newline)
-        if message.strip():
-            self.linebuf += message
-            # If the message contains a newline, process the full line
-            if "\n" in self.linebuf:
-                lines = self.linebuf.splitlines(keepends=True)
-                for line in lines:
-                    if line.endswith("\n"):
-                        # Log full lines without the trailing newline (logger adds its own)
-                        self.logger.log(self.log_level, line.rstrip("\n"))
-                    else:
-                        # Keep partial lines in buffer
-                        self.linebuf = line
-                        return
-                self.linebuf = ""  # All lines processed
-            # If no newline, keep buffering
-    def flush(self):
-        # Log any remaining buffered content when flush is called
-        if self.linebuf.strip():
-            self.logger.log(self.log_level, self.linebuf.rstrip("\n"))
-            self.linebuf = ""

tirex/models/mixed_stack.py DELETED Viewed

@@ -1,143 +0,0 @@
-# Copyright (c) NXAI GmbH.
-# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
-import os
-from dataclasses import dataclass, field
-import torch
-from torch import nn
-from xlstm.blocks.slstm.layer import sLSTMLayer, sLSTMLayerConfig
-from xlstm.xlstm_large import xLSTMLargeConfig
-from xlstm.xlstm_large.components import RMSNorm
-from xlstm.xlstm_large.model import FeedForward, mLSTMBlock, mLSTMStateType
-def skip_cuda():
-    return os.getenv("TIREX_NO_CUDA", "False").lower() in ("true", "1", "t")
-def init_cell(config: xLSTMLargeConfig, block_idx, num_blocks):
-    return sLSTMLayer(
-        sLSTMLayerConfig(
-            embedding_dim=config.embedding_dim,
-            num_heads=config.num_heads,
-            conv1d_kernel_size=0,  # 0 means no convolution included
-            group_norm_weight=True,
-            dropout=0,
-            # CellConfig
-            backend="vanilla" if skip_cuda() else "cuda",
-            bias_init="powerlaw_blockdependent",
-            recurrent_weight_init="zeros",
-            num_gates=4,
-            gradient_recurrent_cut=False,
-            gradient_recurrent_clipval=None,
-            forward_clipval=None,
-            batch_size=8,  # needed?
-            _block_idx=block_idx,
-            _num_blocks=num_blocks,
-        )
-    )
-sLSTMLayerStateType = tuple[torch.Tensor, torch.Tensor]
-sLSTMStateType = dict[int, sLSTMLayerStateType]
-class sLSTMBlock(nn.Module):
-    def __init__(self, config: xLSTMLargeConfig, block_idx: int, num_blocks: int):
-        super().__init__()
-        self.config = config
-        self.norm_slstm = RMSNorm(
-            num_features=config.embedding_dim,
-            eps=config.norm_eps,
-            use_weight=True,
-            use_bias=config.use_bias,
-            force_float32_reductions=config.norm_reduction_force_float32,
-        )
-        self.slstm_layer = init_cell(config, block_idx, num_blocks)
-        self.norm_ffn = RMSNorm(
-            num_features=config.embedding_dim,
-            eps=config.norm_eps,
-            use_weight=True,
-            use_bias=config.use_bias,
-            force_float32_reductions=config.norm_reduction_force_float32,
-        )
-        self.ffn = FeedForward(config)
-    def forward(
-        self, x: torch.Tensor, state: sLSTMLayerStateType | None = None
-    ) -> tuple[torch.Tensor, sLSTMLayerStateType]:
-        x_slstm = self.norm_slstm(x)
-        if state is None:
-            conv_state, slstm_state = None, None
-        else:
-            conv_state, slstm_state = state
-        x_slstm, state = self.slstm_layer(x_slstm, conv_state, slstm_state, return_last_state=True)
-        x = x + x_slstm
-        x_ffn = self.norm_ffn(x)
-        x_ffn = self.ffn(x_ffn)
-        x = x + x_ffn
-        return x, (state["conv_state"], state["slstm_state"])
-@dataclass
-class xLSTMMixedLargeConfig(xLSTMLargeConfig):
-    slstm_at: list[int] = field(default_factory=list)
-    all_slstm: bool = True
-    @property
-    def block_types(self):
-        return ["s" if i in self.slstm_at or self.all_slstm else "m" for i in range(self.num_blocks)]
-class xLSTMMixedLargeBlockStack(nn.Module):
-    config_class = xLSTMMixedLargeConfig
-    def __init__(self, config: xLSTMMixedLargeConfig):
-        super().__init__()
-        self.config = config
-        self.blocks = nn.ModuleList(
-            [
-                sLSTMBlock(config, block_idx=i, num_blocks=config.num_blocks) if t == "s" else mLSTMBlock(config)
-                for i, t in enumerate(config.block_types)
-            ]
-        )
-        if self.config.add_out_norm:
-            self.out_norm = RMSNorm(
-                num_features=config.embedding_dim,
-                eps=config.norm_eps,
-                use_weight=True,
-                use_bias=config.use_bias,
-                force_float32_reductions=config.norm_reduction_force_float32,
-            )
-        else:
-            self.out_norm = nn.Identity()
-    def forward(
-        self, x: torch.Tensor, state: mLSTMStateType | sLSTMStateType | None = None
-    ) -> tuple[torch.Tensor, mLSTMStateType]:
-        if state is None:
-            state = {i: None for i in range(len(self.blocks))}
-        for i, block in enumerate(self.blocks):
-            block_state = state[i]
-            x, block_state_new = block(x, block_state)
-            if block_state is None:
-                state[i] = block_state_new
-            else:
-                pass
-                ## layer state is a tuple of three tensors: c, n, m
-                ## we update the state in place in order to avoid creating new tensors
-                # for state_idx in range(len(block_state)):
-                #    state[i][state_idx].copy_(block_state_new[state_idx])
-        x = self.out_norm(x)
-        return x, state

tirex/models/predict_utils.py DELETED Viewed

@@ -1,72 +0,0 @@
-# Copyright (c) NXAI GmbH.
-# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
-import logging
-from abc import abstractmethod
-import torch
-from ..api_adapter.forecast import ForecastModel
-LOGGER = logging.getLogger()
-class TensorQuantileUniPredictMixin(ForecastModel):
-    @abstractmethod
-    def _forecast_tensor(
-        self,
-        context: torch.Tensor,
-        prediction_length: int | None = None,
-        **predict_kwargs,
-    ) -> torch.Tensor:
-        pass
-    @property
-    @abstractmethod
-    def quantiles(self):
-        pass
-    def _forecast_quantiles(
-        self,
-        context: torch.Tensor,
-        prediction_length: int | None = None,
-        quantile_levels: list[float] = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
-        output_device: str = "cpu",
-        auto_cast: bool = False,
-        **predict_kwargs,
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        with torch.autocast(device_type=self.device.type, enabled=auto_cast):
-            predictions = self._forecast_tensor(
-                context=context, prediction_length=prediction_length, **predict_kwargs
-            ).detach()
-        predictions = predictions.to(torch.device(output_device)).swapaxes(1, 2)
-        training_quantile_levels = list(self.quantiles)
-        if set(quantile_levels).issubset(set(training_quantile_levels)):
-            quantiles = predictions[..., [training_quantile_levels.index(q) for q in quantile_levels]]
-        else:
-            if min(quantile_levels) < min(training_quantile_levels) or max(quantile_levels) > max(
-                training_quantile_levels
-            ):
-                logging.warning(
-                    f"Requested quantile levels ({quantile_levels}) fall outside the range of "
-                    f"quantiles the model was trained on ({training_quantile_levels}). "
-                    "Predictions for out-of-range quantiles will be clamped to the nearest "
-                    "boundary of the trained quantiles (i.e., minimum or maximum trained level). "
-                    "This can significantly impact prediction accuracy, especially for extreme quantiles. "
-                )
-            # Interpolate quantiles
-            augmented_predictions = torch.cat(
-                [predictions[..., [0]], predictions, predictions[..., [-1]]],
-                dim=-1,
-            )
-            quantiles = torch.quantile(
-                augmented_predictions,
-                q=torch.tensor(quantile_levels, dtype=augmented_predictions.dtype),
-                dim=-1,
-            ).permute(1, 2, 0)
-        # median as mean
-        mean = predictions[:, :, training_quantile_levels.index(0.5)]
-        return quantiles, mean

tirex/models/tirex.py DELETED Viewed

@@ -1,231 +0,0 @@
-# Copyright (c) NXAI GmbH.
-# This software may be used and distributed according to the terms of the NXAI Community License Agreement.
-import logging
-import warnings
-from contextlib import redirect_stdout
-from dataclasses import dataclass
-import lightning as L
-import torch
-from dacite import Config, from_dict
-from ..base import PretrainedModel
-from .components import PatchedUniTokenizer, ResidualBlock, StreamToLogger
-from .mixed_stack import skip_cuda, xLSTMMixedLargeBlockStack, xLSTMMixedLargeConfig
-from .predict_utils import TensorQuantileUniPredictMixin
-LOGGER = logging.getLogger()
-@dataclass
-class TiRexZeroConfig:
-    input_patch_size: int
-    output_patch_size: int
-    quantiles: list[float]
-    block_kwargs: dict
-    input_ff_dim: int
-class TiRexZero(L.LightningModule, PretrainedModel, TensorQuantileUniPredictMixin):
-    def __init__(self, model_config: dict, train_ctx_len=None):
-        super().__init__()
-        self.model_config: TiRexZeroConfig = from_dict(TiRexZeroConfig, model_config, config=Config(strict=True))
-        assert self.model_config.input_patch_size == self.model_config.output_patch_size
-        self.train_ctx_len = train_ctx_len
-        # Block Stack
-        self.nan_mask_value = 0
-        self.block_stack, resolved_config = self.init_block(self.model_config.block_kwargs)
-        self.model_config.block_kwargs = resolved_config
-        # Input Layer
-        self.input_patch_embedding = ResidualBlock(
-            in_dim=self.model_config.input_patch_size * 2,
-            h_dim=self.model_config.input_ff_dim,
-            out_dim=self.model_config.block_kwargs.embedding_dim,
-        )
-        self.tokenizer = PatchedUniTokenizer(
-            patch_size=self.model_config.input_patch_size,
-        )
-        # Output Layer
-        self.num_quantiles = len(self.model_config.quantiles)
-        quantiles = torch.tensor(self.model_config.quantiles)
-        self.register_buffer("quantiles", quantiles, persistent=False)
-        self.output_patch_embedding = ResidualBlock(
-            in_dim=self.model_config.block_kwargs.embedding_dim,
-            h_dim=self.model_config.input_ff_dim,
-            out_dim=self.num_quantiles * self.model_config.output_patch_size,
-        )
-        self.save_hyperparameters()
-    @classmethod
-    def register_name(cls):
-        return "TiRex"
-    def init_block(self, block_kwargs):
-        config = from_dict(xLSTMMixedLargeConfig, block_kwargs)
-        log_redirect = StreamToLogger(LOGGER, logging.INFO)
-        with redirect_stdout(log_redirect):  # avoid excessive print statements of sLSTM compile
-            model = xLSTMMixedLargeBlockStack(config)
-        return model, config
-    @property
-    def quantiles(self):
-        return self.model.quantiles
-    def _forward_model_tokenized(
-        self,
-        input_token,
-        input_mask=None,
-        rollouts=1,
-    ):
-        input_mask = (
-            input_mask.to(input_token.dtype)
-            if input_mask is not None
-            else torch.isnan(input_token).logical_not().to(input_token.dtype)
-        )
-        assert rollouts >= 1
-        bs, numb_ctx_token, token_dim = input_token.shape
-        if rollouts > 1:
-            input_token = torch.cat(
-                (
-                    input_token,
-                    torch.full(
-                        (bs, rollouts - 1, token_dim),
-                        fill_value=torch.nan,
-                        device=input_token.device,
-                        dtype=input_token.dtype,
-                    ),
-                ),
-                dim=1,
-            )
-            input_mask = torch.cat(
-                (
-                    input_mask,
-                    torch.full(
-                        (bs, rollouts - 1, token_dim),
-                        fill_value=False,
-                        device=input_mask.device,
-                        dtype=input_mask.dtype,
-                    ),
-                ),
-                dim=1,
-            )
-        input_token = torch.nan_to_num(input_token, nan=self.nan_mask_value)
-        input_embeds = self.input_patch_embedding(torch.cat((input_token, input_mask), dim=2))
-        # hidden_states = []
-        # for rollout in range(rollout):
-        x = self.block_stack(input_embeds)
-        if isinstance(x, tuple):
-            hidden_states = x[0]
-        else:
-            hidden_states = x
-        quantile_preds = self.output_patch_embedding(hidden_states)
-        quantile_preds = torch.unflatten(quantile_preds, -1, (self.num_quantiles, self.model_config.output_patch_size))
-        quantile_preds = torch.transpose(quantile_preds, 1, 2)  # switch quantile and num_token_dimension
-        # quantile_preds: [batch_size, num_quantiles, num_token, output_patch_size]
-        return quantile_preds, hidden_states
-    @torch.inference_mode()
-    def _forecast_tensor(
-        self,
-        context: torch.Tensor,
-        prediction_length: int | None = None,
-        max_context: int | None = None,
-        max_accelerated_rollout_steps: int = 1,
-    ) -> torch.Tensor:
-        predictions = []
-        if prediction_length is None:
-            prediction_length = self.tokenizer.patch_size
-        remaining = -(prediction_length // -self.tokenizer.patch_size)
-        if max_context is None:
-            max_context = self.train_ctx_len
-        min_context = max(self.train_ctx_len, max_context)
-        context = context.to(
-            device=self.device,
-            dtype=torch.float32,
-        )
-        while remaining > 0:
-            if context.shape[-1] > max_context:
-                context = context[..., -max_context:]
-            if context.shape[-1] < min_context:
-                pad = torch.full(
-                    (context.shape[0], min_context - context.shape[-1]),
-                    fill_value=torch.nan,
-                    device=context.device,
-                    dtype=context.dtype,
-                )
-                context = torch.concat((pad, context), dim=1)
-            tokenized_tensor, tokenizer_state = self.tokenizer.context_input_transform(context)
-            fut_rollouts = min(remaining, max_accelerated_rollout_steps)
-            with torch.no_grad():
-                prediction, _ = self._forward_model_tokenized(input_token=tokenized_tensor, rollouts=fut_rollouts)
-                prediction = prediction[:, :, -fut_rollouts:, :].to(tokenized_tensor)  # predicted token
-                # [bs, num_quantiles, num_predicted_token, output_patch_size]
-            prediction = self.tokenizer.output_transform(prediction, tokenizer_state)
-            prediction = prediction.flatten(start_dim=2)
-            predictions.append(prediction)
-            remaining -= fut_rollouts
-            if remaining <= 0:
-                break
-            context = torch.cat([context, torch.full_like(prediction[:, 0, :], fill_value=torch.nan)], dim=-1)
-        return torch.cat(predictions, dim=-1)[..., :prediction_length].to(
-            dtype=torch.float32,
-        )
-    def on_load_checkpoint(self, checkpoint: dict) -> None:
-        state_dict = checkpoint["state_dict"]
-        load_vanilla_kernel = skip_cuda()
-        if load_vanilla_kernel:
-            warnings.warn(
-                "You use TiRex without sLSTM CUDA kernels! This might slow down the model considerably and might degrade forecasting results!"
-                "Set the environment variable TIREX_NO_CUDA to 0 to avoid this!"
-            )
-            block_kwargs = self.model_config.block_kwargs
-            head_dim = block_kwargs.embedding_dim // block_kwargs.num_heads
-            num_gates = 4
-            new_state_dict = {}
-            for k, v in state_dict.items():
-                if "slstm_layer.slstm_cell._recurrent_kernel_" in k:
-                    new_state_dict[k] = (
-                        v.reshape(
-                            block_kwargs.num_heads,
-                            head_dim,
-                            num_gates,
-                            head_dim,
-                        )
-                        .permute(0, 2, 3, 1)
-                        .reshape(
-                            block_kwargs.num_heads,
-                            num_gates * head_dim,
-                            head_dim,
-                        )
-                    )
-                    # new_state_dict[k] = v.permute(0, 2, 1)
-                elif "slstm_layer.slstm_cell._bias_" in k:
-                    new_state_dict[k] = (
-                        v.reshape(block_kwargs.num_heads, num_gates, head_dim).permute(1, 0, 2).reshape(-1)
-                    )
-                else:
-                    new_state_dict[k] = v
-            checkpoint["state_dict"] = new_state_dict
-    def after_load_from_checkpoint(self):
-        if not skip_cuda() and self.device.type != "cuda":
-            warnings.warn(
-                f"You use TiRex with sLSTM CUDA kernels BUT DO NOT LOAD THE DEVICE ON A CUDA DEVICE (device type is {self.device.type})!"
-                "This is not supported and calls to the model will likely lead to an error if you dont move your model to a CUDA device!"
-                "If you want to run TiRex on CPU you need to disable sLSTM CUDA kernels but be aware of the downsides (see FAQ)"
-            )