Spaces:
Sleeping
Sleeping
| from aimakerspace.vectordatabase import VectorDatabase | |
| import asyncio | |
| import numpy as np | |
| from langchain_core.documents import Document | |
| from typing import List, Dict, Any, Callable | |
| class SimpleRetriever: | |
| """A simpler retriever class that doesn't rely on Pydantic.""" | |
| def __init__(self, vector_db, text_map, k=5): | |
| """Initialize the retriever. | |
| Args: | |
| vector_db: The vector database | |
| text_map: Mapping of IDs to full text content | |
| k: Number of results to return | |
| """ | |
| self.vector_db = vector_db | |
| self.text_map = text_map | |
| self.k = k | |
| def invoke(self, query: str) -> List[Document]: | |
| """Get documents relevant to the query.""" | |
| results = self.vector_db.search_by_text(query, k=self.k, return_as_text=False) | |
| # Return actual document content instead of just IDs | |
| documents = [] | |
| for doc_id, score in results: | |
| if doc_id in self.text_map: | |
| documents.append(Document( | |
| page_content=self.text_map[doc_id], | |
| metadata={"score": score, "id": doc_id} | |
| )) | |
| else: | |
| # Fallback for IDs without content | |
| documents.append(Document( | |
| page_content=f"Document {doc_id} content not available", | |
| metadata={"score": score, "id": doc_id} | |
| )) | |
| return documents | |
| # Make the class callable like a function | |
| def __call__(self, query): | |
| return self.invoke(query) | |
| def create_vector_store(embeddings, texts=None): | |
| """Create vector store from embeddings and texts. | |
| Args: | |
| embeddings: List of embeddings | |
| texts: List of text documents corresponding to embeddings | |
| """ | |
| vector_db = VectorDatabase() | |
| # Create a mapping of document IDs to actual content | |
| text_map = {} | |
| # Add the embeddings and content to the vector database | |
| if texts and len(texts) == len(embeddings): | |
| for i, (text, embedding) in enumerate(zip(texts, embeddings)): | |
| doc_id = f"text_{i}" | |
| vector_db.insert(doc_id, embedding) | |
| text_map[doc_id] = text | |
| else: | |
| for i, embedding in enumerate(embeddings): | |
| doc_id = f"text_{i}" | |
| vector_db.insert(doc_id, embedding) | |
| text_map[doc_id] = f"Content for document {doc_id} not available" | |
| # Add a simple retriever as the as_retriever method | |
| vector_db.as_retriever = lambda search_kwargs=None: SimpleRetriever( | |
| vector_db=vector_db, | |
| text_map=text_map, | |
| k=search_kwargs.get("k", 5) if search_kwargs else 5 | |
| ) | |
| return vector_db |