sofhiaazzhr commited on
Commit
2167a5b
·
1 Parent(s): 9cb950f

[NOTICKET][doc] validate embedding vector for NaN/Infinity in manhattan retriever

Browse files
Files changed (1) hide show
  1. src/rag/retrievers/document.py +4 -0
src/rag/retrievers/document.py CHANGED
@@ -1,5 +1,7 @@
1
  """Document retriever — handles PDF, DOCX, TXT chunks (source_type="document", non-tabular)."""
2
 
 
 
3
  from langchain_postgres import PGVector
4
  from langchain_postgres.vectorstores import DistanceStrategy
5
  from langchain_openai import AzureOpenAIEmbeddings
@@ -123,6 +125,8 @@ class DocumentRetriever(BaseRetriever):
123
  self, query: str, user_id: str, k: int, fetch_k: int
124
  ) -> list[RetrievalResult]:
125
  query_vector = await _embeddings.aembed_query(query)
 
 
126
  vector_str = "[" + ",".join(str(v) for v in query_vector) + "]"
127
 
128
  async with _pgvector_engine.connect() as conn:
 
1
  """Document retriever — handles PDF, DOCX, TXT chunks (source_type="document", non-tabular)."""
2
 
3
+ import math
4
+
5
  from langchain_postgres import PGVector
6
  from langchain_postgres.vectorstores import DistanceStrategy
7
  from langchain_openai import AzureOpenAIEmbeddings
 
125
  self, query: str, user_id: str, k: int, fetch_k: int
126
  ) -> list[RetrievalResult]:
127
  query_vector = await _embeddings.aembed_query(query)
128
+ if not all(math.isfinite(v) for v in query_vector):
129
+ raise ValueError("Embedding vector contains NaN or Infinity values.")
130
  vector_str = "[" + ",".join(str(v) for v in query_vector) + "]"
131
 
132
  async with _pgvector_engine.connect() as conn: