NoiceDetectorModel / NoiceDetectionModel.py
Airin-chan's picture
Upload 2 files
b4cf036 verified
from torch import nn
import torch
from torch.nn import functional as F
from torchvision import transforms as T
class PatchEmbedding (nn.Module) :
def __init__ (self,image_size,patch_size,embedding_size) :
super().__init__()
self.projection_layers = nn.Conv2d(in_channels=3,out_channels=embedding_size,kernel_size=patch_size,stride=patch_size)
self.n_patch = (image_size // patch_size)**2
def forward(self,x) :
x = self.projection_layers(x)
x = x.flatten(2)
x = x.transpose(1,2)
return x
class PositionalEmbedding (nn.Module) :
def __init__ (self,n_patch,embedding_size) :
super().__init__()
self.n_patch = n_patch
self.position = nn.Parameter(torch.normal(0.0,0.02,size=(1,self.n_patch + 1,embedding_size)))
self.cls_token = nn.Parameter(torch.normal(0.0,0.02,size=(1,1,embedding_size)))
self.embedding_size = embedding_size
def forward(self,x) :
batch = x.shape[0]
cls_token = torch.broadcast_to(self.cls_token,(batch,1,self.embedding_size))
x = torch.cat((cls_token,x),dim=1)
x = x + self.position
return x
class BlockTransformers (nn.Module) :
def __init__ (self,d_model,num_head,ffn_dim,droprate= 0.1) :
super().__init__()
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.MHA = nn.MultiheadAttention(embed_dim=d_model,num_heads=num_head,dropout=droprate)
self.FeedFordward = nn.Sequential(
nn.Linear(d_model,ffn_dim),
nn.GELU(),
nn.Linear(ffn_dim,d_model)
)
self.drop_out = nn.Dropout(droprate)
def forward(self,x) :
attn = self.norm1(x)
attn,_ = self.MHA(attn,attn,attn)
x = x+attn
ffn = self.norm2(x)
ffn = self.FeedFordward(x)
ffn = self.drop_out(x)
x = x+ffn
return x
class NoiceDetectorModel (nn.Module) :
def __init__(self,image_size,d_model,num_head,ffn_dim,droprate= 0.1) :
super().__init__()
self.patch_embedding = PatchEmbedding(image_size=image_size,patch_size=16,embedding_size=d_model)
self.positional_embedding = PositionalEmbedding(self.patch_embedding.n_patch,d_model)
self.blocklayers = nn.Sequential(
BlockTransformers(d_model,num_head,ffn_dim,droprate),
BlockTransformers(d_model,num_head,ffn_dim,droprate))
self.linear1 = nn.Linear(d_model,128)
self.relu = nn.ReLU()
self.linear2 = nn.Linear(128,3)
def forward(self,x) :
x = self.patch_embedding(x)
x = self.positional_embedding(x)
x = self.blocklayers(x)
x = x[:,-1,:]
x = self.linear1(x)
x = self.relu(x)
x = self.linear2(x)
return x
class ModelRunners :
def __init__(self,path) :
self.Model = NoiceDetectorModel(image_size=384,d_model=256,num_head=4,ffn_dim=784)
self.__checkpoint = torch.load(path)
self.Model.load_state_dict(self.__checkpoint)
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.Model.to(self.device)
self.Model.eval()
self.transform =T.Compose([
T.ToTensor(),
T.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])
])
def modelrun (self,x_target) :
if not isinstance(x_target,torch.Tensor) :
x_target = self.transform(x_target)
x_target = torch.unsqueeze(x_target,dim=0)
with torch.no_grad() :
pred = self.Model(x_target)
pred = F.softmax(pred,dim=-1)
if isinstance(pred,torch.Tensor) :
return pred.detach().numpy()
else :
return pred