Spaces:
Sleeping
Sleeping
File size: 1,784 Bytes
5be3c34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import torch
import torch.nn as nn
import torch.nn.functional as F
class NosePointRegressor(nn.Module):
def __init__(self, input_channels=1):
super(NosePointRegressor, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(input_channels, 16, kernel_size=3, stride=2, padding=1), # -> [B, 16, H/2, W/2]
nn.ReLU(),
nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1), # -> [B, 32, H/4, W/4]
nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1), # -> [B, 64, H/8, W/8]
nn.ReLU(),
nn.AdaptiveAvgPool2d((1, 1)), # -> [B, 64, 1, 1]
)
self.fc = nn.Sequential(
nn.Flatten(),
nn.Linear(64, 32),
nn.ReLU(),
nn.Linear(32, 2), # Predict (x, y) coordinate
nn.Sigmoid() # Normalize output to [0, 1]
)
def forward(self, x):
x = self.encoder(x)
x = self.fc(x)
return x # shape [B, 2], where values are in [0, 1]
import torchvision.models as models
import torch.nn as nn
class ResNetNoseRegressor(nn.Module):
def __init__(self, pretrained=True):
super().__init__()
resnet = models.resnet18(pretrained=pretrained)
self.backbone = nn.Sequential(*list(resnet.children())[:-2]) # Remove last FC layers
self.pool = nn.AdaptiveAvgPool2d((1, 1))
self.head = nn.Sequential(
nn.Flatten(),
nn.Linear(512, 128),
nn.ReLU(),
nn.Linear(128, 2),
nn.Sigmoid() # Normalized (x, y)
)
def forward(self, x):
x = self.backbone(x)
x = self.pool(x)
return self.head(x)
|