Spaces:
Running on Zero
Running on Zero
Fix ZeroGPU retrieval: pin jina query embedder to CPU
Browse files
rag.py
CHANGED
|
@@ -58,8 +58,11 @@ def _chunks() -> dict[int, dict]:
|
|
| 58 |
@lru_cache(maxsize=1)
|
| 59 |
def _embedder():
|
| 60 |
# transformers ~=4.45 (pinned) loads jina's remote code without shims.
|
|
|
|
|
|
|
|
|
|
| 61 |
from sentence_transformers import SentenceTransformer
|
| 62 |
-
return SentenceTransformer(EMBED_MODEL, trust_remote_code=True)
|
| 63 |
|
| 64 |
|
| 65 |
def _embed_query(query: str) -> np.ndarray:
|
|
|
|
| 58 |
@lru_cache(maxsize=1)
|
| 59 |
def _embedder():
|
| 60 |
# transformers ~=4.45 (pinned) loads jina's remote code without shims.
|
| 61 |
+
# device="cpu" is REQUIRED on ZeroGPU: query embedding runs in retrieve(),
|
| 62 |
+
# outside the @spaces.GPU block, so CUDA isn't really allocated there — left
|
| 63 |
+
# on auto it lands on a phantom cuda device and returns zero vectors.
|
| 64 |
from sentence_transformers import SentenceTransformer
|
| 65 |
+
return SentenceTransformer(EMBED_MODEL, trust_remote_code=True, device="cpu")
|
| 66 |
|
| 67 |
|
| 68 |
def _embed_query(query: str) -> np.ndarray:
|