File size: 1,281 Bytes
c8ddb9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""ACM and its variations"""

from typing import Any

import torch
from torch import nn

from .conv_utils import conv2d


class ACM(nn.Module):
    """Affine Combination Module from ManiGAN"""

    def __init__(self, img_chans: int, text_chans: int, inner_dim: int = 64) -> None:
        """
        Initialize the convolutional layers

        :param int img_chans: Channels in visual input
        :param int text_chans: Channels of textual input
        :param int inner_dim: Hyperparameters for inner dimensionality of features
        """
        super().__init__()
        self.conv = conv2d(in_channels=img_chans, out_channels=inner_dim)
        self.weights = conv2d(in_channels=inner_dim, out_channels=text_chans)
        self.biases = conv2d(in_channels=inner_dim, out_channels=text_chans)

    def forward(self, text: torch.Tensor, img: torch.Tensor) -> Any:
        """
        Propagate the textual and visual input through the ACM module

        :param torch.Tensor text: Textual input (can be hidden features)
        :param torch.Tensor img: Image input
        :return: Affine combination of text and image
        :rtype: torch.Tensor
        """
        img_features = self.conv(img)
        return text * self.weights(img_features) + self.biases(img_features)