Spaces:

pratik-250620
/

MultiModal-Coherence-AI

Running

Upload folder using huggingface_hub

6835659 verified 27 days ago

1.01 kB

	from __future__ import annotations

	import numpy as np
	import torch


	class ProjectionHead:
	"""
	Projects embeddings from arbitrary dim -> shared dim.

	When in_dim == out_dim: uses IDENTITY (pass-through).
	This preserves pre-trained alignment (CLIP text-image, CLAP text-audio).
	A random linear projection would destroy that alignment.

	When in_dim != out_dim: uses a linear layer (would need training for
	meaningful results; acceptable only if you train it).
	"""

	def __init__(self, in_dim: int, out_dim: int = 512):
	self._identity = (in_dim == out_dim)
	self.layer = None
	if not self._identity:
	self.layer = torch.nn.Linear(in_dim, out_dim, bias=False)
	self.layer.eval()

	@torch.no_grad()
	def project(self, emb: np.ndarray) -> np.ndarray:
	if self._identity:
	return emb.astype("float32")
	x = torch.from_numpy(emb).float()
	y = self.layer(x)
	return y.numpy().astype("float32")