vivekchakraverty commited on
Commit
e48654b
·
verified ·
1 Parent(s): 0f4aa1b

Fix ZeroGPU retrieval: pin jina query embedder to CPU

Browse files
Files changed (1) hide show
  1. rag.py +4 -1
rag.py CHANGED
@@ -58,8 +58,11 @@ def _chunks() -> dict[int, dict]:
58
  @lru_cache(maxsize=1)
59
  def _embedder():
60
  # transformers ~=4.45 (pinned) loads jina's remote code without shims.
 
 
 
61
  from sentence_transformers import SentenceTransformer
62
- return SentenceTransformer(EMBED_MODEL, trust_remote_code=True)
63
 
64
 
65
  def _embed_query(query: str) -> np.ndarray:
 
58
  @lru_cache(maxsize=1)
59
  def _embedder():
60
  # transformers ~=4.45 (pinned) loads jina's remote code without shims.
61
+ # device="cpu" is REQUIRED on ZeroGPU: query embedding runs in retrieve(),
62
+ # outside the @spaces.GPU block, so CUDA isn't really allocated there — left
63
+ # on auto it lands on a phantom cuda device and returns zero vectors.
64
  from sentence_transformers import SentenceTransformer
65
+ return SentenceTransformer(EMBED_MODEL, trust_remote_code=True, device="cpu")
66
 
67
 
68
  def _embed_query(query: str) -> np.ndarray: