| """ |
| This function is adapted from [NeurIPS2023-One-Fits-All] by [tianzhou2011] |
| Original source: [https://github.com/DAMO-DI-ML/NeurIPS2023-One-Fits-All] |
| """ |
|
|
| import argparse |
| from typing import Dict |
| import numpy as np |
| import torchinfo |
| import torch |
| from torch import nn, optim |
| from torch.utils.data import DataLoader |
| from torch.nn.utils import weight_norm |
| import tqdm |
| import os, math |
| from typing import Optional |
| import torch.nn.functional as F |
|
|
| from transformers.models.gpt2.modeling_gpt2 import GPT2Model |
| from einops import rearrange |
|
|
|
|
| from ..utils.torch_utility import EarlyStoppingTorch, PositionalEmbedding, TokenEmbedding, TemporalEmbedding, get_gpu, TimeFeatureEmbedding, DataEmbedding, adjust_learning_rate |
| from ..utils.dataset import ReconstructDataset |
|
|
| class DataEmbedding_wo_pos(nn.Module): |
| def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): |
| super(DataEmbedding_wo_pos, self).__init__() |
|
|
| self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) |
| self.position_embedding = PositionalEmbedding(d_model=d_model) |
| self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, |
| freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( |
| d_model=d_model, embed_type=embed_type, freq=freq) |
| self.dropout = nn.Dropout(p=dropout) |
|
|
| def forward(self, x, x_mark): |
| if x_mark is None: |
| x = self.value_embedding(x) |
| else: |
| x = self.value_embedding(x) + self.temporal_embedding(x_mark) |
| return self.dropout(x) |
|
|
| class PatchEmbedding(nn.Module): |
| def __init__(self, d_model, patch_len, stride, dropout): |
| super(PatchEmbedding, self).__init__() |
| |
| self.patch_len = patch_len |
| self.stride = stride |
| self.padding_patch_layer = nn.ReplicationPad1d((0, stride)) |
|
|
| |
| self.value_embedding = TokenEmbedding(patch_len, d_model) |
|
|
| |
| self.position_embedding = PositionalEmbedding(d_model) |
|
|
| |
| self.dropout = nn.Dropout(dropout) |
|
|
| def forward(self, x): |
| |
| n_vars = x.shape[1] |
| x = self.padding_patch_layer(x) |
| x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride) |
| x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3])) |
| |
| x = self.value_embedding(x) + self.position_embedding(x) |
| return self.dropout(x), n_vars |
|
|
| class DataEmbedding_wo_time(nn.Module): |
| def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): |
| super(DataEmbedding_wo_time, self).__init__() |
|
|
| self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) |
| self.position_embedding = PositionalEmbedding(d_model=d_model) |
| self.dropout = nn.Dropout(p=dropout) |
|
|
| def forward(self, x): |
| x = self.value_embedding(x) + self.position_embedding(x) |
| return self.dropout(x) |
|
|
| class Model(nn.Module): |
| |
| def __init__(self, |
| pred_len=0, |
| seq_len=100, |
| patch_size=1, |
| stride=1, |
| d_model = 768, |
| d_ff = 768, |
| embed = "timeF", |
| gpt_layers = 6, |
| enc_in = 1, |
| c_out = 1, |
| freq = "h", |
| dropout= 0.1, |
| mlp = 0, |
| model_path = "pre_train"): |
| super(Model, self).__init__() |
| self.pred_len = pred_len |
| self.seq_len = seq_len |
| self.patch_size = patch_size |
| self.stride = stride |
| self.seq_len = seq_len |
| self.d_ff = d_ff |
| self.d_model = d_model |
| self.embed = embed |
| self.gpt_layers = gpt_layers |
| self.enc_in = enc_in |
| self.c_out = c_out |
| self.freq = freq |
| self.dropout = dropout |
| self.model_path = model_path |
| self.mlp = mlp |
| |
| self.patch_num = (self.seq_len + self.pred_len - self.patch_size) // self.stride + 1 |
|
|
| self.padding_patch_layer = nn.ReplicationPad1d((0, self.stride)) |
| self.patch_num += 1 |
| self.enc_embedding = DataEmbedding(self.enc_in * self.patch_size, self.d_model, self.embed, self.freq, |
| self.dropout) |
|
|
| self.gpt2 = GPT2Model.from_pretrained('gpt2', output_attentions=True, output_hidden_states=True) |
| self.gpt2.h = self.gpt2.h[:self.gpt_layers] |
| |
| for i, (name, param) in enumerate(self.gpt2.named_parameters()): |
| if 'ln' in name or 'wpe' in name: |
| param.requires_grad = True |
| elif 'mlp' in name and self.mlp == 1: |
| param.requires_grad = True |
| else: |
| param.requires_grad = False |
|
|
| |
| |
| |
|
|
| self.ln_proj = nn.LayerNorm(self.d_ff) |
| self.out_layer = nn.Linear( |
| self.d_ff, |
| self.c_out, |
| bias=True) |
|
|
| def forward(self, x_enc): |
| dec_out = self.anomaly_detection(x_enc) |
| return dec_out |
|
|
| def anomaly_detection(self, x_enc): |
| B, L, M = x_enc.shape |
| |
| |
|
|
| seg_num = 25 |
| x_enc = rearrange(x_enc, 'b (n s) m -> b n s m', s=seg_num) |
| means = x_enc.mean(2, keepdim=True).detach() |
| x_enc = x_enc - means |
| stdev = torch.sqrt( |
| torch.var(x_enc, dim=2, keepdim=True, unbiased=False) + 1e-5) |
| x_enc /= stdev |
| x_enc = rearrange(x_enc, 'b n s m -> b (n s) m') |
|
|
| |
| |
| |
| |
| |
|
|
| |
| enc_out = torch.nn.functional.pad(x_enc, (0, 768-x_enc.shape[-1])) |
| |
| outputs = self.gpt2(inputs_embeds=enc_out).last_hidden_state |
| |
| outputs = outputs[:, :, :self.d_ff] |
| |
| dec_out = self.out_layer(outputs) |
|
|
| |
|
|
| dec_out = rearrange(dec_out, 'b (n s) m -> b n s m', s=seg_num) |
| dec_out = dec_out * \ |
| (stdev[:, :, 0, :].unsqueeze(2).repeat( |
| 1, 1, seg_num, 1)) |
| dec_out = dec_out + \ |
| (means[:, :, 0, :].unsqueeze(2).repeat( |
| 1, 1, seg_num, 1)) |
| dec_out = rearrange(dec_out, 'b n s m -> b (n s) m') |
|
|
| |
| |
| |
| |
| |
| |
| return dec_out |
|
|
| class OFA(): |
| def __init__(self, |
| win_size = 100, |
| stride = 1, |
| enc_in = 1, |
| features = 'M', |
| batch_size = 128, |
| learning_rate = 0.0001, |
| epochs = 10, |
| patience = 3, |
| lradj = "type1", |
| validation_size=0.2): |
| super().__init__() |
| self.win_size = win_size |
| self.stride = stride |
| self.enc_in = enc_in |
| self.features = features |
| self.batch_size = batch_size |
| self.learning_rate = learning_rate |
| self.epochs = epochs |
| self.patience = patience |
| self.lradj = lradj |
| self.validation_size = validation_size |
|
|
| self.decision_scores_ = None |
| |
| cuda = True |
| self.y_hats = None |
| |
| self.cuda = cuda |
| self.device = get_gpu(self.cuda) |
| |
| self.model = Model(seq_len=self.win_size, enc_in=self.enc_in, c_out=self.enc_in).float().to(self.device) |
| self.model_optim = optim.Adam(self.model.parameters(), lr=self.learning_rate) |
| self.criterion = nn.MSELoss() |
| |
| self.early_stopping = EarlyStoppingTorch(None, patience=self.patience) |
| self.input_shape = (self.batch_size, self.win_size, self.enc_in) |
| |
| def fit(self, data): |
| tsTrain = data[:int((1-self.validation_size)*len(data))] |
| tsValid = data[int((1-self.validation_size)*len(data)):] |
|
|
| train_loader = DataLoader( |
| dataset=ReconstructDataset(tsTrain, window_size=self.win_size, stride=self.stride), |
| batch_size=self.batch_size, |
| shuffle=True |
| ) |
| |
| valid_loader = DataLoader( |
| dataset=ReconstructDataset(tsValid, window_size=self.win_size, stride=self.stride), |
| batch_size=self.batch_size, |
| shuffle=False |
| ) |
| |
| train_steps = len(train_loader) |
| for epoch in range(1, self.epochs + 1): |
| |
| train_loss = 0 |
| self.model.train() |
| |
| loop = tqdm.tqdm(enumerate(train_loader),total=len(train_loader),leave=True) |
| for i, (batch_x, _) in loop: |
| self.model_optim.zero_grad() |
| |
| batch_x = batch_x.float().to(self.device) |
| |
| outputs = self.model(batch_x) |
| loss = self.criterion(outputs, batch_x) |
| |
| loss.backward() |
| self.model_optim.step() |
| |
| train_loss += loss.cpu().item() |
| |
| loop.set_description(f'Training Epoch [{epoch}/{self.epochs}]') |
| loop.set_postfix(loss=loss.item(), avg_loss=train_loss/(i+1)) |
| |
| |
| self.model.eval() |
| total_loss = [] |
| |
| loop = tqdm.tqdm(enumerate(valid_loader),total=len(valid_loader),leave=True) |
| with torch.no_grad(): |
| for i, (batch_x, _) in loop: |
| batch_x = batch_x.float().to(self.device) |
|
|
| outputs = self.model(batch_x) |
| f_dim = -1 if self.features == 'MS' else 0 |
| outputs = outputs[:, :, f_dim:] |
| pred = outputs.detach().cpu() |
| true = batch_x.detach().cpu() |
|
|
| loss = self.criterion(pred, true) |
| total_loss.append(loss) |
| loop.set_description(f'Valid Epoch [{epoch}/{self.epochs}]') |
| |
| valid_loss = np.average(total_loss) |
| loop.set_postfix(loss=loss.item(), valid_loss=valid_loss) |
| self.early_stopping(valid_loss, self.model) |
| if self.early_stopping.early_stop: |
| print(" Early stopping<<<") |
| break |
| |
| adjust_learning_rate(self.model_optim, epoch + 1, self.lradj, self.learning_rate) |
| |
| |
| def decision_function(self, data): |
| test_loader = DataLoader( |
| dataset=ReconstructDataset(data, window_size=self.win_size, stride=self.stride), |
| batch_size=self.batch_size, |
| shuffle=False |
| ) |
| |
| self.model.eval() |
| attens_energy = [] |
| y_hats = [] |
| self.anomaly_criterion = nn.MSELoss(reduce=False) |
| |
| loop = tqdm.tqdm(enumerate(test_loader),total=len(test_loader),leave=True) |
| with torch.no_grad(): |
| for i, (batch_x, _) in loop: |
| batch_x = batch_x.float().to(self.device) |
| |
| outputs = self.model(batch_x) |
| |
| |
| |
| score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1) |
| y_hat = torch.squeeze(outputs, -1) |
| |
| score = score.detach().cpu().numpy()[:, -1] |
| y_hat = y_hat.detach().cpu().numpy()[:, -1] |
| |
| attens_energy.append(score) |
| y_hats.append(y_hat) |
| loop.set_description(f'Testing Phase: ') |
|
|
| attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1) |
| scores = np.array(attens_energy) |
| |
| y_hats = np.concatenate(y_hats, axis=0).reshape(-1) |
| y_hats = np.array(y_hats) |
|
|
| assert scores.ndim == 1 |
| |
| import shutil |
| self.save_path = None |
| if self.save_path and os.path.exists(self.save_path): |
| shutil.rmtree(self.save_path) |
| |
| |
| scores_win = [scores[i] for i in range(scores.shape[0])] |
| self.decision_scores_ = np.zeros(len(data)) |
| count = np.zeros(len(data)) |
| for i, score in enumerate(scores_win): |
| start = i * self.stride |
| end = start + self.win_size |
| self.decision_scores_[start:end] += score |
| count[start:end] += 1 |
| self.decision_scores_ = self.decision_scores_ / np.maximum(count, 1) |
|
|
| return self.decision_scores_ |
| |
| def param_statistic(self, save_file): |
| model_stats = torchinfo.summary(self.model, self.input_shape, verbose=0) |
| with open(save_file, 'w') as f: |
| f.write(str(model_stats)) |
|
|