import numpy as np from zstandard import ZstdCompressor from pathlib import Path import io from sentence_transformers import SentenceTransformer from torch.nn import EmbeddingBag import torch def save_data(path: Path, tensor: torch.Tensor): """Writes out the static embeddings to a .npy.zst file""" assert str(path).endswith(".npy.zst") buffer = io.BytesIO() np.save(buffer, tensor.detach().numpy()) with ( open(path, "wb") as outfile, ZstdCompressor().stream_writer(outfile) as writer, ): writer.write(buffer.getvalue()) model_path = Path("model") model_name = "sentence-transformers/static-similarity-mrl-multilingual-v1" vocab_size = 105_879 dimensions = 1024 def load_embeddings(): model = SentenceTransformer(model_name, device="cpu") embedding_bag: EmbeddingBag = model[0].embedding # type: ignore embeddings = torch.Tensor(embedding_bag.weight) print(embeddings.shape) assert embeddings.shape == torch.Size([vocab_size, dimensions]) print("float32") print(f" 1024 dim - {embeddings.shape[0] * 1024 * 4 / 1024 / 1024:,.1f} MiB") print(f" 512 dim - {embeddings.shape[0] * 512 * 4 / 1024 / 1024:,.1f} MiB") print(f" 256 dim - {embeddings.shape[0] * 256 * 4 / 1024 / 1024:,.1f} MiB") print("float16") print(f" 1024 dim - {embeddings.shape[0] * 1024 * 2 / 1024 / 1024:,.1f} MiB") print(f" 512 dim - {embeddings.shape[0] * 512 * 2 / 1024 / 1024:,.1f} MiB") print(f" 256 dim - {embeddings.shape[0] * 256 * 2 / 1024 / 1024:,.1f} MiB") for dim in (1024, 512, 384, 256, 128): truncated = embeddings[:, :dim] assert truncated.shape == torch.Size([vocab_size, dim]) save_data(model_path / f"static-embeddings.{dim}.fp32.npy.zst", embeddings) save_data( model_path / f"static-embeddings.{dim}.fp16.npy.zst", embeddings.to(dtype=torch.float16), ) save_data( model_path / f"static-embeddings.{dim}.int8.npy.zst", embeddings.to(dtype=torch.int8), ) def main() -> None: load_embeddings() if __name__ == "__main__": main()