from torch import nn from typing import Dict class MLPProjectionHead(nn.Module): def __init__(self, embedding_dim, projection_dim, dropout): super().__init__() self.projection = nn.Linear(embedding_dim, projection_dim) self.gelu = nn.GELU() self.fc = nn.Linear(projection_dim, projection_dim) self.dropout = nn.Dropout(dropout) self.layer_norm = nn.LayerNorm(projection_dim) def forward(self, x): projected = self.projection(x) x = self.gelu(projected) x = self.fc(x) x = self.dropout(x) x = x + projected x = self.layer_norm(x) return x class LinearProjectionHead(nn.Module): def __init__(self, embedding_dim, projection_dim): super().__init__() self.projection = nn.Linear(embedding_dim, projection_dim) def forward(self, x): return self.projection(x) def load_projection_head(embedding_dim: int, config_projection_head: Dict): if config_projection_head["name"].lower() == "mlp": projection_head = MLPProjectionHead( embedding_dim=embedding_dim, projection_dim=config_projection_head["proj_dim"], dropout=config_projection_head["dropout"] ) elif config_projection_head["name"].lower() == "linear": projection_head = LinearProjectionHead(embedding_dim=embedding_dim, projection_dim=config_projection_head["proj_dim"]) else: raise KeyError(f"Not supported text encoder: {config_projection_head}") return projection_head