import torch import torch.nn as nn import torch.nn.functional as F class NosePointRegressor(nn.Module): def __init__(self, input_channels=1): super(NosePointRegressor, self).__init__() self.encoder = nn.Sequential( nn.Conv2d(input_channels, 16, kernel_size=3, stride=2, padding=1), # -> [B, 16, H/2, W/2] nn.ReLU(), nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1), # -> [B, 32, H/4, W/4] nn.ReLU(), nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1), # -> [B, 64, H/8, W/8] nn.ReLU(), nn.AdaptiveAvgPool2d((1, 1)), # -> [B, 64, 1, 1] ) self.fc = nn.Sequential( nn.Flatten(), nn.Linear(64, 32), nn.ReLU(), nn.Linear(32, 2), # Predict (x, y) coordinate nn.Sigmoid() # Normalize output to [0, 1] ) def forward(self, x): x = self.encoder(x) x = self.fc(x) return x # shape [B, 2], where values are in [0, 1] import torchvision.models as models import torch.nn as nn class ResNetNoseRegressor(nn.Module): def __init__(self, pretrained=True): super().__init__() resnet = models.resnet18(pretrained=pretrained) self.backbone = nn.Sequential(*list(resnet.children())[:-2]) # Remove last FC layers self.pool = nn.AdaptiveAvgPool2d((1, 1)) self.head = nn.Sequential( nn.Flatten(), nn.Linear(512, 128), nn.ReLU(), nn.Linear(128, 2), nn.Sigmoid() # Normalized (x, y) ) def forward(self, x): x = self.backbone(x) x = self.pool(x) return self.head(x)