Initial commit

Files changed (9) hide show

.gitignore ADDED Viewed

+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.13

README.md ADDED Viewed

File without changes

main.py ADDED Viewed

+def main():
+    print("Hello from sentence-embeddings!")
+if __name__ == "__main__":
+    main()

multilingual.py ADDED Viewed

+from sentence_transformers import SentenceTransformer
+from torch.nn import EmbeddingBag
+import torch
+model = SentenceTransformer(
+    "sentence-transformers/static-similarity-mrl-multilingual-v1"
+)
+embedding_bag: EmbeddingBag = model[0].embedding  # type: ignore
+embeddings = torch.Tensor(embedding_bag.weight)
+print(embeddings.shape)
+assert embeddings.shape == torch.Size([105879, 1024])
+print("float32")
+print(f"  1024 dim - {embeddings.shape[0] * 1024 * 4 / 1024 / 1024:,.1f} MiB")
+print(f"   512 dim - {embeddings.shape[0] * 512 * 4 / 1024 / 1024:,.1f} MiB")
+print(f"   256 dim - {embeddings.shape[0] * 256 * 4 / 1024 / 1024:,.1f} MiB")
+print("float16")
+print(f"  1024 dim - {embeddings.shape[0] * 1024 * 2 / 1024 / 1024:,.1f} MiB")
+print(f"   512 dim - {embeddings.shape[0] * 512 * 2 / 1024 / 1024:,.1f} MiB")
+print(f"   256 dim - {embeddings.shape[0] * 256 * 2 / 1024 / 1024:,.1f} MiB")

potion.py ADDED Viewed

+from model2vec import StaticModel
+from tokenizers import Tokenizer
+import torch
+model = StaticModel.from_pretrained("minishlab/potion-multilingual-128M")
+embeddings = torch.from_numpy(model.embedding)
+print("Embedding shape:", embeddings.shape)
+bytes = embeddings.shape[0] * embeddings.shape[1] * 4
+print("MiB:", bytes / 1024 / 1024)
+tokenizer: Tokenizer = model.tokenizer
+print(tokenizer.to_str())

pyproject.toml ADDED Viewed

+[project]
+name = "sentence-embeddings"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = [
+    "model2vec>=0.6.0",
+    "sentence-transformers>=5.1.0",
+]

tomaarsen.py ADDED Viewed

+from sentence_transformers import SentenceTransformer
+from torch.nn import EmbeddingBag
+import torch
+model = SentenceTransformer("tomaarsen/static-retrieval-mrl-en-v1")
+embedding_bag: EmbeddingBag = model[0].embedding  # type: ignore
+embeddings = torch.Tensor(embedding_bag.weight)
+assert embeddings.shape == torch.Size([30522, 1024])
+print(f"1024 dim - {embeddings.shape[0] * 1024 * 4 / 1024 / 1024:,.1f} MiB:")
+print(f"512 dim - {embeddings.shape[0] * 512 * 4 / 1024 / 1024:,.1f} MiB:")
+print(f"256 dim - {embeddings.shape[0] * 256 * 4 / 1024 / 1024:,.1f} MiB:")
+print("Embeddings[0]", embeddings[0])

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff