Commit ·
2167a5b
1
Parent(s): 9cb950f
[NOTICKET][doc] validate embedding vector for NaN/Infinity in manhattan retriever
Browse files
src/rag/retrievers/document.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
"""Document retriever — handles PDF, DOCX, TXT chunks (source_type="document", non-tabular)."""
|
| 2 |
|
|
|
|
|
|
|
| 3 |
from langchain_postgres import PGVector
|
| 4 |
from langchain_postgres.vectorstores import DistanceStrategy
|
| 5 |
from langchain_openai import AzureOpenAIEmbeddings
|
|
@@ -123,6 +125,8 @@ class DocumentRetriever(BaseRetriever):
|
|
| 123 |
self, query: str, user_id: str, k: int, fetch_k: int
|
| 124 |
) -> list[RetrievalResult]:
|
| 125 |
query_vector = await _embeddings.aembed_query(query)
|
|
|
|
|
|
|
| 126 |
vector_str = "[" + ",".join(str(v) for v in query_vector) + "]"
|
| 127 |
|
| 128 |
async with _pgvector_engine.connect() as conn:
|
|
|
|
| 1 |
"""Document retriever — handles PDF, DOCX, TXT chunks (source_type="document", non-tabular)."""
|
| 2 |
|
| 3 |
+
import math
|
| 4 |
+
|
| 5 |
from langchain_postgres import PGVector
|
| 6 |
from langchain_postgres.vectorstores import DistanceStrategy
|
| 7 |
from langchain_openai import AzureOpenAIEmbeddings
|
|
|
|
| 125 |
self, query: str, user_id: str, k: int, fetch_k: int
|
| 126 |
) -> list[RetrievalResult]:
|
| 127 |
query_vector = await _embeddings.aembed_query(query)
|
| 128 |
+
if not all(math.isfinite(v) for v in query_vector):
|
| 129 |
+
raise ValueError("Embedding vector contains NaN or Infinity values.")
|
| 130 |
vector_str = "[" + ",".join(str(v) for v in query_vector) + "]"
|
| 131 |
|
| 132 |
async with _pgvector_engine.connect() as conn:
|