Update handler.py

81df173 verified 10 months ago

8.12 kB

	from contextlib import nullcontext
	import time
	import torch
	from apogee.tokenizer import Tokenizer
	from apogee.model import GPT, ModelConfig

	from typing import Any, Dict, Optional, Union
	from pathlib import Path

	torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
	torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn

	class EndpointHandler:
	"""
	Handler class.
	"""

	def __init__(self, base_path: Optional[Union[str, Path]] = None, device: Optional[str] = None):
	if base_path is None:
	base_path = Path(__file__).parent
	self.base_path = Path(base_path)
	# Get the device
	if device is None:
	device = "cuda" if torch.cuda.is_available() else "cpu"
	self.device = device
	print(f"Handler spwaned on device {self.device} 🚀")
	ckpt_path = self.base_path / "ckpt.pt"
	print(f"Loading model from {ckpt_path} 🤖")
	checkpoint = torch.load(ckpt_path, map_location=device)
	self.config = ModelConfig(**checkpoint["model_config"])
	self.tokenizer = Tokenizer()
	self.model = GPT(self.config)
	state_dict = checkpoint['model']
	unwanted_prefix = '_orig_mod.'
	for k in list(state_dict.keys()):
	if k.startswith(unwanted_prefix):
	state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
	self.model.load_state_dict(state_dict)
	self.model.eval()
	self.model.to(self.device)
	self.model = torch.compile(self.model)
	dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() and torch.cuda.get_device_capability()[0] >= 8 else 'float16' # 'float32' or 'bfloat16' or 'float16'
	ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
	self.ctx = nullcontext() if device == 'cpu' else torch.amp.autocast(device_type=device, dtype=ptdtype)
	print("Warming up hardware 🔥")
	with torch.no_grad(), self.ctx:
	self.model(torch.randint(0, self.tokenizer.vocabulary_size, (1, self.config.block_size), device=self.device))
	print("Model ready ! ✅")
	# Precompute useful values
	self.max_candles = self.config.block_size // self.tokenizer.tokens_per_candle

	def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
	"""
	Args:
	data (Dict[str, Any]):
	inputs: Dict[str, List[float]] with keys:
	timestamps: Timestamps of the time serie
	open: Open prices
	high: High prices
	low: Low prices
	close: Close prices
	volume: Volumes
	steps: int = 4 \| Number of sampling steps
	n_scenarios: int = 32 \| Number of scenarios to generate
	seed: Optional[int] = None \| Seed for the random number generator
	Return:
	Dict[str, Any] Generated scenarios with keys:
	timestamps: Timestamps of the time serie
	open: Open prices
	high: High prices
	low: Low prices
	close: Close prices
	volume: Volumes
	"""
	t_start = time.time() # Start the timer
	# Unpack input data
	inputs = data.pop("inputs", data)
	# Validate the inputs
	assert "timestamps" in inputs and "open" in inputs and "high" in inputs and "low" in inputs and "close" in inputs and "volume" in inputs, "Required keys: timestamps, open, high, low, close, volume"
	assert isinstance(inputs["timestamps"], list) and isinstance(inputs["open"], list) and isinstance(inputs["high"], list) and isinstance(inputs["low"], list) and isinstance(inputs["close"], list) and isinstance(inputs["volume"], list), "Inputs must be lists"
	assert len(inputs["timestamps"]) == len(inputs["open"]) == len(inputs["high"]) == len(inputs["low"]) == len(inputs["close"]) == len(inputs["volume"]), "Inputs must have the same length"
	timestamps = torch.tensor(list(map(int, inputs["timestamps"])))
	samples = torch.tensor([inputs["open"], inputs["high"], inputs["low"], inputs["close"], inputs["volume"]], dtype=torch.float32).T.contiguous()
	steps = data.pop("steps", 4)
	n_scenarios = data.pop("n_scenarios", 32)
	seed = data.pop("seed", None)
	# Validate the params
	assert isinstance(steps, int) and steps > 0, "steps must be a positive integer"
	assert isinstance(n_scenarios, int) and n_scenarios > 0, "n_scenarios must be a positive integer"
	if seed is not None:
	assert isinstance(seed, int), "seed must be an integer"
	torch.manual_seed(seed)
	torch.cuda.manual_seed(seed)
	# Generate scenarios
	samples = samples[-self.max_candles + steps:] # Keep only the last candles that fit in the model's context
	tokens = self.tokenizer.encode(samples) # Encode the samples into tokens
	tokens = tokens.to(self.device).unsqueeze(0).long() # Add a batch dimension
	with torch.no_grad(), self.ctx:
	for _ in range(steps * self.tokenizer.tokens_per_candle):
	assert tokens.shape[1] <= self.config.block_size, "Too many tokens in the sequence"
	logits = self.model(tokens) # forward the model to get the logits for the index in the sequence
	logits = logits[:, -1, :] # pluck the logits at the final step
	# apply softmax to convert logits to (normalized) probabilities
	probs = torch.nn.functional.softmax(logits, dim=-1)
	# sample from the distribution
	if probs.shape[0] != n_scenarios:
	next_tokens = torch.multinomial(probs, num_samples=n_scenarios, replacement=True).T
	tokens = tokens.expand(n_scenarios, -1)
	else:
	next_tokens = torch.multinomial(probs, num_samples=1)
	# append sampled index to the running sequence and continue
	tokens = torch.cat((tokens, next_tokens), dim=1)
	# Decode the tokens back into samples
	scenarios = self.tokenizer.decode(tokens)[:, -steps:]
	print(f"Generated {n_scenarios} scenarios in {time.time() - t_start:.2f} seconds ⏱")
	print("Nans:", torch.isnan(scenarios).sum().item())
	print("Infs:", torch.isinf(scenarios).sum().item())
	high_not_highest = (scenarios[..., :4].max(-1).values > scenarios[..., 1])
	low_not_lowest = (scenarios[..., :4].min(-1).values < scenarios[..., 2])
	invalid_candle = high_not_highest \| low_not_lowest
	print("Highest not high rate:", high_not_highest.float().mean().item())
	print("Lowest not low rate:", low_not_lowest.float().mean().item())
	print("Invalid candles rate:", invalid_candle.float().mean().item())
	print("Invalid scenario rate:", invalid_candle.any(dim=-1).float().mean().item())
	return {
	"timestamps": (timestamps[-1] + torch.arange(1, steps+1) * torch.median(torch.diff(timestamps)).item()).tolist(),
	"open": scenarios[:, :, 0].tolist(),
	"high": scenarios[:, :, 1].tolist(),
	"low": scenarios[:, :, 2].tolist(),
	"close": scenarios[:, :, 3].tolist(),
	"volume": scenarios[:, :, 4].tolist()
	}

	if __name__ == "__main__":
	import pandas as pd
	handler = EndpointHandler()
	test_path = Path(__file__).parents[2] / "tests" / "assets" / "BTCUSDT-1m-2019-03.csv"
	with open(test_path, "r") as f:
	data = pd.read_csv(f)
	y = handler({
	"inputs": {
	"timestamps": data[data.columns[0]].tolist(),
	"open": data[data.columns[1]].tolist(),
	"high": data[data.columns[2]].tolist(),
	"low": data[data.columns[3]].tolist(),
	"close": data[data.columns[4]].tolist(),
	"volume": data[data.columns[5]].tolist()
	},
	"steps": 4,
	"n_scenarios": 64,
	"seed": 42
	})