File size: 1,010 Bytes
6835659 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | from __future__ import annotations
import numpy as np
import torch
class ProjectionHead:
"""
Projects embeddings from arbitrary dim -> shared dim.
When in_dim == out_dim: uses IDENTITY (pass-through).
This preserves pre-trained alignment (CLIP text-image, CLAP text-audio).
A random linear projection would destroy that alignment.
When in_dim != out_dim: uses a linear layer (would need training for
meaningful results; acceptable only if you train it).
"""
def __init__(self, in_dim: int, out_dim: int = 512):
self._identity = (in_dim == out_dim)
self.layer = None
if not self._identity:
self.layer = torch.nn.Linear(in_dim, out_dim, bias=False)
self.layer.eval()
@torch.no_grad()
def project(self, emb: np.ndarray) -> np.ndarray:
if self._identity:
return emb.astype("float32")
x = torch.from_numpy(emb).float()
y = self.layer(x)
return y.numpy().astype("float32")
|