| | import torch
|
| | import torch.nn as nn
|
| | import torch.nn.functional as F
|
| |
|
| |
|
| | class ProbAttention(nn.Module):
|
| | def __init__(self):
|
| | super(ProbAttention, self).__init__()
|
| |
|
| | def forward(self, queries, keys, values):
|
| | scores = torch.matmul(queries, keys.transpose(-2, -1)) / queries.size(-1) ** 0.5
|
| | attn = torch.softmax(scores, dim=-1)
|
| | context = torch.matmul(attn, values)
|
| | return context
|
| |
|
| |
|
| | class EncoderLayer(nn.Module):
|
| | def __init__(self, d_model, n_heads, d_ff=2048, dropout=0.1):
|
| | super(EncoderLayer, self).__init__()
|
| | self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout)
|
| | self.linear1 = nn.Linear(d_model, d_ff)
|
| | self.dropout = nn.Dropout(dropout)
|
| | self.linear2 = nn.Linear(d_ff, d_model)
|
| | self.norm1 = nn.LayerNorm(d_model)
|
| | self.norm2 = nn.LayerNorm(d_model)
|
| |
|
| | def forward(self, src):
|
| | src2 = self.self_attn(src, src, src)[0]
|
| | src = src + self.dropout(src2)
|
| | src = self.norm1(src)
|
| | src2 = self.linear2(F.relu(self.linear1(src)))
|
| | src = src + self.dropout(src2)
|
| | src = self.norm2(src)
|
| | return src
|
| |
|
| |
|
| | class DecoderLayer(nn.Module):
|
| | def __init__(self, d_model, n_heads, d_ff=2048, dropout=0.1):
|
| | super(DecoderLayer, self).__init__()
|
| | self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout)
|
| | self.multihead_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout)
|
| | self.linear1 = nn.Linear(d_model, d_ff)
|
| | self.dropout = nn.Dropout(dropout)
|
| | self.linear2 = nn.Linear(d_ff, d_model)
|
| | self.norm1 = nn.LayerNorm(d_model)
|
| | self.norm2 = nn.LayerNorm(d_model)
|
| | self.norm3 = nn.LayerNorm(d_model)
|
| |
|
| | def forward(self, tgt, memory):
|
| | tgt2 = self.self_attn(tgt, tgt, tgt)[0]
|
| | tgt = tgt + self.dropout(tgt2)
|
| | tgt = self.norm1(tgt)
|
| | tgt2 = self.multihead_attn(tgt, memory, memory)[0]
|
| | tgt = tgt + self.dropout(tgt2)
|
| | tgt = self.norm2(tgt)
|
| | tgt2 = self.linear2(F.relu(self.linear1(tgt)))
|
| | tgt = tgt + self.dropout(tgt2)
|
| | tgt = self.norm3(tgt)
|
| | return tgt
|
| |
|
| |
|
| | class Informer(nn.Module):
|
| | def __init__(self, enc_in, dec_in, c_out, seq_len, label_len, out_len,
|
| | d_model=512, n_heads=8, e_layers=2, d_layers=1, dropout=0.1):
|
| | super(Informer, self).__init__()
|
| |
|
| | self.seq_len = seq_len
|
| | self.label_len = label_len
|
| | self.out_len = out_len
|
| |
|
| |
|
| | self.enc_embedding = nn.Linear(enc_in, d_model)
|
| | self.dec_embedding = nn.Linear(dec_in, d_model)
|
| |
|
| |
|
| | self.encoder = nn.ModuleList([
|
| | EncoderLayer(d_model, n_heads, dropout=dropout)
|
| | for _ in range(e_layers)
|
| | ])
|
| |
|
| |
|
| | self.decoder = nn.ModuleList([
|
| | DecoderLayer(d_model, n_heads, dropout=dropout)
|
| | for _ in range(d_layers)
|
| | ])
|
| |
|
| |
|
| | self.projection = nn.Linear(d_model, c_out)
|
| |
|
| | def forward(self, enc_inp, dec_inp):
|
| |
|
| | enc_out = self.enc_embedding(enc_inp)
|
| | dec_out = self.dec_embedding(dec_inp)
|
| |
|
| |
|
| | enc_out = enc_out.permute(1, 0, 2)
|
| | for layer in self.encoder:
|
| | enc_out = layer(enc_out)
|
| | memory = enc_out
|
| |
|
| |
|
| | dec_out = dec_out.permute(1, 0, 2)
|
| | for layer in self.decoder:
|
| | dec_out = layer(dec_out, memory)
|
| |
|
| |
|
| | dec_out = dec_out.permute(1, 0, 2)
|
| | output = self.projection(dec_out)
|
| |
|
| | return output[:, -self.out_len:, :]
|
| |
|