Spaces:
Sleeping
Sleeping
| """Vector store service using Pinecone.""" | |
| from typing import List, Optional | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_pinecone import PineconeVectorStore | |
| from langchain.schema import Document | |
| from pinecone import Pinecone, ServerlessSpec | |
| from app.config import settings | |
| class VectorStoreService: | |
| """Manages Pinecone vector store operations.""" | |
| def __init__(self): | |
| self.embeddings: Optional[HuggingFaceEmbeddings] = None | |
| self.vector_store: Optional[PineconeVectorStore] = None | |
| self.pc: Optional[Pinecone] = None | |
| self._initialized = False | |
| async def initialize(self): | |
| """Initialize Pinecone and embeddings.""" | |
| if self._initialized: | |
| return | |
| # Initialize HuggingFace embeddings | |
| self.embeddings = HuggingFaceEmbeddings( | |
| model_name=settings.EMBEDDING_MODEL, | |
| model_kwargs={'device': 'cpu'}, | |
| encode_kwargs={'normalize_embeddings': True} | |
| ) | |
| # Initialize Pinecone | |
| self.pc = Pinecone(api_key=settings.PINECONE_API_KEY) | |
| # Create index if it doesn't exist | |
| existing_indexes = [idx.name for idx in self.pc.list_indexes()] | |
| if settings.PINECONE_INDEX_NAME not in existing_indexes: | |
| self.pc.create_index( | |
| name=settings.PINECONE_INDEX_NAME, | |
| dimension=384, # all-MiniLM-L6-v2 dimension | |
| metric="cosine", | |
| spec=ServerlessSpec(cloud="aws", region="us-east-1"), | |
| ) | |
| # Initialize vector store | |
| self.vector_store = PineconeVectorStore( | |
| index_name=settings.PINECONE_INDEX_NAME, | |
| embedding=self.embeddings, | |
| pinecone_api_key=settings.PINECONE_API_KEY, | |
| ) | |
| self._initialized = True | |
| async def add_documents(self, documents: List[Document]) -> int: | |
| """Add documents to the vector store.""" | |
| if not self._initialized: | |
| await self.initialize() | |
| self.vector_store.add_documents(documents) | |
| return len(documents) | |
| async def similarity_search( | |
| self, query: str, k: int = 4 | |
| ) -> List[Document]: | |
| """Search for similar documents.""" | |
| if not self._initialized: | |
| await self.initialize() | |
| results = self.vector_store.similarity_search(query, k=k) | |
| return results | |
| async def similarity_search_with_score( | |
| self, query: str, k: int = 4 | |
| ) -> List[tuple]: | |
| """Search for similar documents with relevance scores.""" | |
| if not self._initialized: | |
| await self.initialize() | |
| results = self.vector_store.similarity_search_with_score(query, k=k) | |
| return results | |
| def get_retriever(self, k: int = 4): | |
| """Get a retriever for use in chains.""" | |
| if not self._initialized: | |
| raise RuntimeError("Vector store not initialized") | |
| return self.vector_store.as_retriever( | |
| search_type="similarity", | |
| search_kwargs={"k": k}, | |
| ) | |
| vector_store_service = VectorStoreService() | |