Spaces:

dashVector
/

dashVectorSpace

Sleeping

justmotes commited on Dec 5, 2025

Commit

bf79e3e

1 Parent(s): 0b669a9

Fix: Add get_embedding function and caching

Files changed (1) hide show

src/data_pipeline.py CHANGED Viewed

@@ -6,18 +6,21 @@ from typing import List, Union
 import torch
 import torch.nn.functional as F
 def get_embeddings(model_name: str, texts: List[str]) -> np.ndarray:
     """
     Loads the specified model and generates embeddings for the given texts.
     Handles 'nomic' and 'qwen' specific requirements (trust_remote_code).
     """
-    print(f"Loading embedding model: {model_name}...")
-    trust_remote_code = False
-    if "nomic" in model_name or "qwen" in model_name:
-        trust_remote_code = True
-    model = SentenceTransformer(model_name, trust_remote_code=trust_remote_code, device='cpu')
     # Generate embeddings
     # Convert to numpy array if it returns a tensor or list
@@ -25,6 +28,13 @@ def get_embeddings(model_name: str, texts: List[str]) -> np.ndarray:
     return embeddings
 def mrl_slice(vectors: np.ndarray, dims: int) -> np.ndarray:
     """
     Slices the vectors to the specified dimensions AND applies L2 normalization *after* slicing.

 import torch
 import torch.nn.functional as F
+_MODEL_CACHE = {}
+def get_model(model_name: str):
+    if model_name not in _MODEL_CACHE:
+        print(f"Loading embedding model: {model_name}...")
+        trust_remote_code = "nomic" in model_name or "qwen" in model_name
+        _MODEL_CACHE[model_name] = SentenceTransformer(model_name, trust_remote_code=trust_remote_code, device='cpu')
+    return _MODEL_CACHE[model_name]
 def get_embeddings(model_name: str, texts: List[str]) -> np.ndarray:
     """
     Loads the specified model and generates embeddings for the given texts.
     Handles 'nomic' and 'qwen' specific requirements (trust_remote_code).
     """
+    model = get_model(model_name)
     # Generate embeddings
     # Convert to numpy array if it returns a tensor or list
     return embeddings
+def get_embedding(text: str, model_name: str = "nomic-ai/nomic-embed-text-v1.5") -> np.ndarray:
+    """
+    Generates a single embedding for a query string.
+    """
+    embeddings = get_embeddings(model_name, [text])
+    return embeddings[0]
 def mrl_slice(vectors: np.ndarray, dims: int) -> np.ndarray:
     """
     Slices the vectors to the specified dimensions AND applies L2 normalization *after* slicing.