static-embeddings / scripts /build_models.py
gregtatum's picture
Add the model files
1ffeda6
raw
history blame
2.16 kB
import numpy as np
from zstandard import ZstdCompressor
from pathlib import Path
import io
from sentence_transformers import SentenceTransformer
from torch.nn import EmbeddingBag
import torch
def save_data(path: Path, tensor: torch.Tensor):
"""Writes out the static embeddings to a .npy.zst file"""
assert str(path).endswith(".npy.zst")
buffer = io.BytesIO()
np.save(buffer, tensor.detach().numpy())
with (
open(path, "wb") as outfile,
ZstdCompressor().stream_writer(outfile) as writer,
):
writer.write(buffer.getvalue())
model_path = Path("model")
model_name = "sentence-transformers/static-similarity-mrl-multilingual-v1"
vocab_size = 105_879
dimensions = 1024
def load_embeddings():
model = SentenceTransformer(model_name, device="cpu")
embedding_bag: EmbeddingBag = model[0].embedding # type: ignore
embeddings = torch.Tensor(embedding_bag.weight)
print(embeddings.shape)
assert embeddings.shape == torch.Size([vocab_size, dimensions])
print("float32")
print(f" 1024 dim - {embeddings.shape[0] * 1024 * 4 / 1024 / 1024:,.1f} MiB")
print(f" 512 dim - {embeddings.shape[0] * 512 * 4 / 1024 / 1024:,.1f} MiB")
print(f" 256 dim - {embeddings.shape[0] * 256 * 4 / 1024 / 1024:,.1f} MiB")
print("float16")
print(f" 1024 dim - {embeddings.shape[0] * 1024 * 2 / 1024 / 1024:,.1f} MiB")
print(f" 512 dim - {embeddings.shape[0] * 512 * 2 / 1024 / 1024:,.1f} MiB")
print(f" 256 dim - {embeddings.shape[0] * 256 * 2 / 1024 / 1024:,.1f} MiB")
for dim in (1024, 512, 384, 256, 128):
truncated = embeddings[:, :dim]
assert truncated.shape == torch.Size([vocab_size, dim])
save_data(model_path / f"static-embeddings.{dim}.fp32.npy.zst", embeddings)
save_data(
model_path / f"static-embeddings.{dim}.fp16.npy.zst",
embeddings.to(dtype=torch.float16),
)
save_data(
model_path / f"static-embeddings.{dim}.int8.npy.zst",
embeddings.to(dtype=torch.int8),
)
def main() -> None:
load_embeddings()
if __name__ == "__main__":
main()