File size: 8,120 Bytes

from contextlib import nullcontext
import time
import torch
from apogee.tokenizer import Tokenizer
from apogee.model import GPT, ModelConfig

from typing import Any, Dict, Optional, Union
from pathlib import Path

torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn

class EndpointHandler:
    """
    Handler class.
    """

    def __init__(self, base_path: Optional[Union[str, Path]] = None, device: Optional[str] = None):
        if base_path is None:
            base_path = Path(__file__).parent
        self.base_path = Path(base_path)
        # Get the device
        if device is None:
            device = "cuda" if torch.cuda.is_available() else "cpu"
        self.device = device
        print(f"Handler spwaned on device {self.device} 🚀")
        ckpt_path = self.base_path / "ckpt.pt"
        print(f"Loading model from {ckpt_path} 🤖")
        checkpoint = torch.load(ckpt_path, map_location=device)
        self.config = ModelConfig(**checkpoint["model_config"])
        self.tokenizer = Tokenizer()
        self.model = GPT(self.config)
        state_dict = checkpoint['model']
        unwanted_prefix = '_orig_mod.'
        for k in list(state_dict.keys()):
            if k.startswith(unwanted_prefix):
                state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
        self.model.load_state_dict(state_dict)
        self.model.eval()
        self.model.to(self.device)
        self.model = torch.compile(self.model)
        dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() and torch.cuda.get_device_capability()[0] >= 8 else 'float16' # 'float32' or 'bfloat16' or 'float16'
        ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
        self.ctx = nullcontext() if device == 'cpu' else torch.amp.autocast(device_type=device, dtype=ptdtype)
        print("Warming up hardware 🔥")
        with torch.no_grad(), self.ctx:
            self.model(torch.randint(0, self.tokenizer.vocabulary_size, (1, self.config.block_size), device=self.device))
        print("Model ready ! ✅")
        # Precompute useful values
        self.max_candles = self.config.block_size // self.tokenizer.tokens_per_candle

    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
        """
        Args:
            data (Dict[str, Any]):
                inputs: Dict[str, List[float]] with keys:
                    timestamps: Timestamps of the time serie
                    open: Open prices
                    high: High prices
                    low: Low prices
                    close: Close prices
                    volume: Volumes
                steps: int = 4 | Number of sampling steps
                n_scenarios: int = 32 | Number of scenarios to generate
                seed: Optional[int] = None | Seed for the random number generator
        Return:
            Dict[str, Any] Generated scenarios with keys:
                timestamps: Timestamps of the time serie
                open: Open prices
                high: High prices
                low: Low prices
                close: Close prices
                volume: Volumes
        """
        t_start = time.time() # Start the timer
        # Unpack input data
        inputs = data.pop("inputs", data)
        # Validate the inputs
        assert "timestamps" in inputs and "open" in inputs and "high" in inputs and "low" in inputs and "close" in inputs and "volume" in inputs, "Required keys: timestamps, open, high, low, close, volume"
        assert isinstance(inputs["timestamps"], list) and isinstance(inputs["open"], list) and isinstance(inputs["high"], list) and isinstance(inputs["low"], list) and isinstance(inputs["close"], list) and isinstance(inputs["volume"], list), "Inputs must be lists"
        assert len(inputs["timestamps"]) == len(inputs["open"]) == len(inputs["high"]) == len(inputs["low"]) == len(inputs["close"]) == len(inputs["volume"]), "Inputs must have the same length"
        timestamps = torch.tensor(list(map(int, inputs["timestamps"])))
        samples = torch.tensor([inputs["open"], inputs["high"], inputs["low"], inputs["close"], inputs["volume"]], dtype=torch.float32).T.contiguous()
        steps = data.pop("steps", 4)
        n_scenarios = data.pop("n_scenarios", 32)
        seed = data.pop("seed", None)
        # Validate the params
        assert isinstance(steps, int) and steps > 0, "steps must be a positive integer"
        assert isinstance(n_scenarios, int) and n_scenarios > 0, "n_scenarios must be a positive integer"
        if seed is not None:
            assert isinstance(seed, int), "seed must be an integer"
            torch.manual_seed(seed)
            torch.cuda.manual_seed(seed)
        # Generate scenarios
        samples = samples[-self.max_candles + steps:] # Keep only the last candles that fit in the model's context
        tokens = self.tokenizer.encode(samples) # Encode the samples into tokens
        tokens = tokens.to(self.device).unsqueeze(0).long() # Add a batch dimension
        with torch.no_grad(), self.ctx:
            for _ in range(steps * self.tokenizer.tokens_per_candle):
                assert tokens.shape[1] <= self.config.block_size, "Too many tokens in the sequence"
                logits = self.model(tokens) # forward the model to get the logits for the index in the sequence
                logits = logits[:, -1, :] # pluck the logits at the final step
                # apply softmax to convert logits to (normalized) probabilities
                probs = torch.nn.functional.softmax(logits, dim=-1)
                # sample from the distribution
                if probs.shape[0] != n_scenarios:
                    next_tokens = torch.multinomial(probs, num_samples=n_scenarios, replacement=True).T
                    tokens = tokens.expand(n_scenarios, -1)
                else:
                    next_tokens = torch.multinomial(probs, num_samples=1)
                # append sampled index to the running sequence and continue
                tokens = torch.cat((tokens, next_tokens), dim=1)
        # Decode the tokens back into samples
        scenarios = self.tokenizer.decode(tokens)[:, -steps:]
        print(f"Generated {n_scenarios} scenarios in {time.time() - t_start:.2f} seconds ⏱")
        print("Nans:", torch.isnan(scenarios).sum().item())
        print("Infs:", torch.isinf(scenarios).sum().item())
        high_not_highest = (scenarios[..., :4].max(-1).values > scenarios[..., 1])
        low_not_lowest = (scenarios[..., :4].min(-1).values < scenarios[..., 2])
        invalid_candle = high_not_highest | low_not_lowest
        print("Highest not high rate:", high_not_highest.float().mean().item())
        print("Lowest not low rate:", low_not_lowest.float().mean().item())
        print("Invalid candles rate:", invalid_candle.float().mean().item())
        print("Invalid scenario rate:", invalid_candle.any(dim=-1).float().mean().item())
        return {
            "timestamps": (timestamps[-1] + torch.arange(1, steps+1) * torch.median(torch.diff(timestamps)).item()).tolist(),
            "open": scenarios[:, :, 0].tolist(),
            "high": scenarios[:, :, 1].tolist(),
            "low": scenarios[:, :, 2].tolist(),
            "close": scenarios[:, :, 3].tolist(),
            "volume": scenarios[:, :, 4].tolist()
        }

if __name__ == "__main__":
    import pandas as pd
    handler = EndpointHandler()
    test_path = Path(__file__).parents[2] / "tests" / "assets" / "BTCUSDT-1m-2019-03.csv"
    with open(test_path, "r") as f:
        data = pd.read_csv(f)
    y = handler({
        "inputs": {
            "timestamps": data[data.columns[0]].tolist(),
            "open": data[data.columns[1]].tolist(),
            "high": data[data.columns[2]].tolist(),
            "low": data[data.columns[3]].tolist(),
            "close": data[data.columns[4]].tolist(),
            "volume": data[data.columns[5]].tolist()
        },
        "steps": 4,
        "n_scenarios": 64,
        "seed": 42
    })