Spaces:

leehao163
/

Kronosdemo

Runtime error

App Files Files Community

leehao163 commited on Jan 2

Commit

a2c7d6b

verified ·

1 Parent(s): bd59cf2

Create model.py

Browse files

Files changed (1) hide show

model.py +91 -0

model.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import torch
+import torch.nn as nn
+import numpy as np
+import pandas as pd
+from huggingface_hub import from_pretrained_keras, hf_hub_download
+import os
+# ------------- KronosTokenizer 分词器类 -------------
+class KronosTokenizer(nn.Module):
+    def __init__(self, vocab_size=1024, embed_dim=128):
+        super().__init__()
+        self.vocab_size = vocab_size
+        self.embed = nn.Embedding(vocab_size, embed_dim)
+        # 量化参数（Kronos核心：将连续OHLCV转为离散token）
+        self.scale = nn.Parameter(torch.ones(5))  # 对应OHLCV5个特征
+        self.shift = nn.Parameter(torch.zeros(5))
+    @classmethod
+    def from_pretrained(cls, model_id, **kwargs):
+        """从Hugging Face Hub加载预训练分词器"""
+        model = cls(**kwargs)
+        # 下载预训练权重
+        weight_path = hf_hub_download(repo_id=model_id, filename="tokenizer_weights.bin")
+        model.load_state_dict(torch.load(weight_path, map_location="cpu"))
+        return model
+    def forward(self, x):
+        """将OHLCV数据量化为token"""
+        x = (x - self.shift) / self.scale
+        x = torch.clamp(torch.round(x), 0, self.vocab_size - 1).long()
+        return self.embed(x)
+# ------------- Kronos 主模型类 -------------
+class Kronos(nn.Module):
+    def __init__(self, d_model=256, nhead=8, num_layers=6):
+        super().__init__()
+        self.transformer = nn.TransformerDecoder(
+            nn.TransformerDecoderLayer(d_model=d_model, nhead=nhead, batch_first=True),
+            num_layers=num_layers
+        )
+        self.fc = nn.Linear(d_model, 5)  # 输出OHLCV5个特征
+    @classmethod
+    def from_pretrained(cls, model_id, torch_dtype=torch.float32, **kwargs):
+        """从Hugging Face Hub加载预训练Kronos模型"""
+        model = cls(**kwargs)
+        # 下载预训练权重
+        weight_path = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin")
+        state_dict = torch.load(weight_path, map_location="cpu", weights_only=True)
+        model.load_state_dict(state_dict)
+        model.dtype = torch_dtype
+        return model
+    def forward(self, x):
+        """模型前向传播：输入token嵌入，输出预测的OHLCV特征"""
+        out = self.transformer(x, x)  # 自回归解码
+        return self.fc(out)
+# ------------- KronosPredictor 预测器类 -------------
+class KronosPredictor:
+    def __init__(self, model, tokenizer, device="cpu", max_context=512):
+        self.model = model.to(device).eval()
+        self.tokenizer = tokenizer.to(device)
+        self.device = device
+        self.max_context = max_context
+    def preprocess(self, df):
+        """预处理OHLCV数据：标准化+截断长度"""
+        ohlcv = df[["open", "high", "low", "close", "volume"]].values.astype(np.float32)
+        # 截断到模型最大上下文长度
+        if len(ohlcv) > self.max_context:
+            ohlcv = ohlcv[-self.max_context:]
+        return torch.tensor(ohlcv, device=self.device)
+    def predict(self, csv_data, prediction_length=5, num_samples=10):
+        """核心预测方法：输入CSV数据，输出预测结果"""
+        # 读取CSV并预处理
+        df = pd.read_csv(csv_data)
+        x = self.preprocess(df)
+        # 分词器量化
+        x_embed = self.tokenizer(x)
+        # 多次采样提升稳定性
+        predictions = []
+        with torch.no_grad():
+            for _ in range(num_samples):
+                pred = self.model(x_embed)
+                # 生成未来prediction_length步的预测
+                pred_seq = pred[-prediction_length:].cpu().numpy()
+                predictions.append(pred_seq)
+        # 取均值作为最终预测
+        return np.mean(predictions, axis=0)