Spaces:
Sleeping
Sleeping
File size: 2,739 Bytes
0389a81 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | from aimakerspace.vectordatabase import VectorDatabase
import asyncio
import numpy as np
from langchain_core.documents import Document
from typing import List, Dict, Any, Callable
class SimpleRetriever:
"""A simpler retriever class that doesn't rely on Pydantic."""
def __init__(self, vector_db, text_map, k=5):
"""Initialize the retriever.
Args:
vector_db: The vector database
text_map: Mapping of IDs to full text content
k: Number of results to return
"""
self.vector_db = vector_db
self.text_map = text_map
self.k = k
def invoke(self, query: str) -> List[Document]:
"""Get documents relevant to the query."""
results = self.vector_db.search_by_text(query, k=self.k, return_as_text=False)
# Return actual document content instead of just IDs
documents = []
for doc_id, score in results:
if doc_id in self.text_map:
documents.append(Document(
page_content=self.text_map[doc_id],
metadata={"score": score, "id": doc_id}
))
else:
# Fallback for IDs without content
documents.append(Document(
page_content=f"Document {doc_id} content not available",
metadata={"score": score, "id": doc_id}
))
return documents
# Make the class callable like a function
def __call__(self, query):
return self.invoke(query)
def create_vector_store(embeddings, texts=None):
"""Create vector store from embeddings and texts.
Args:
embeddings: List of embeddings
texts: List of text documents corresponding to embeddings
"""
vector_db = VectorDatabase()
# Create a mapping of document IDs to actual content
text_map = {}
# Add the embeddings and content to the vector database
if texts and len(texts) == len(embeddings):
for i, (text, embedding) in enumerate(zip(texts, embeddings)):
doc_id = f"text_{i}"
vector_db.insert(doc_id, embedding)
text_map[doc_id] = text
else:
for i, embedding in enumerate(embeddings):
doc_id = f"text_{i}"
vector_db.insert(doc_id, embedding)
text_map[doc_id] = f"Content for document {doc_id} not available"
# Add a simple retriever as the as_retriever method
vector_db.as_retriever = lambda search_kwargs=None: SimpleRetriever(
vector_db=vector_db,
text_map=text_map,
k=search_kwargs.get("k", 5) if search_kwargs else 5
)
return vector_db |