"""Vector store service using Pinecone.""" from typing import List, Optional from langchain_huggingface import HuggingFaceEmbeddings from langchain_pinecone import PineconeVectorStore from langchain.schema import Document from pinecone import Pinecone, ServerlessSpec from app.config import settings class VectorStoreService: """Manages Pinecone vector store operations.""" def __init__(self): self.embeddings: Optional[HuggingFaceEmbeddings] = None self.vector_store: Optional[PineconeVectorStore] = None self.pc: Optional[Pinecone] = None self._initialized = False async def initialize(self): """Initialize Pinecone and embeddings.""" if self._initialized: return # Initialize HuggingFace embeddings self.embeddings = HuggingFaceEmbeddings( model_name=settings.EMBEDDING_MODEL, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': True} ) # Initialize Pinecone self.pc = Pinecone(api_key=settings.PINECONE_API_KEY) # Create index if it doesn't exist existing_indexes = [idx.name for idx in self.pc.list_indexes()] if settings.PINECONE_INDEX_NAME not in existing_indexes: self.pc.create_index( name=settings.PINECONE_INDEX_NAME, dimension=384, # all-MiniLM-L6-v2 dimension metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1"), ) # Initialize vector store self.vector_store = PineconeVectorStore( index_name=settings.PINECONE_INDEX_NAME, embedding=self.embeddings, pinecone_api_key=settings.PINECONE_API_KEY, ) self._initialized = True async def add_documents(self, documents: List[Document]) -> int: """Add documents to the vector store.""" if not self._initialized: await self.initialize() self.vector_store.add_documents(documents) return len(documents) async def similarity_search( self, query: str, k: int = 4 ) -> List[Document]: """Search for similar documents.""" if not self._initialized: await self.initialize() results = self.vector_store.similarity_search(query, k=k) return results async def similarity_search_with_score( self, query: str, k: int = 4 ) -> List[tuple]: """Search for similar documents with relevance scores.""" if not self._initialized: await self.initialize() results = self.vector_store.similarity_search_with_score(query, k=k) return results def get_retriever(self, k: int = 4): """Get a retriever for use in chains.""" if not self._initialized: raise RuntimeError("Vector store not initialized") return self.vector_store.as_retriever( search_type="similarity", search_kwargs={"k": k}, ) vector_store_service = VectorStoreService()