""" Contain Wrapper Class for ChormaDB client, that can process and store documents and retrive document chunks. """ # for chromaDB __import__("pysqlite3") import sys sys.modules["sqlite3"] = sys.modules.pop("pysqlite3") from typing import List, Optional, Tuple import chromadb class AdvancedClient: def __init__(self, vector_database_path: str = "vectorDB") -> None: self.client = chromadb.PersistentClient(path=vector_database_path) def create_collection( self, collection_id: str, file_datas: List[Tuple[str, int]], ): chunks = [] ids = [] for chunk, _id in file_datas: chunks.append(chunk) ids.append(str(_id)) #make sure IDs are string dtpye from .ModelCallingFunctions import generate_embedding embeddings = generate_embedding(texts=chunks) collection = self.client.create_collection(collection_id) collection.add( ids=ids, embeddings=embeddings, # type: ignore documents=chunks, ) def retrieve_chunks( self, collection_id: str, query: str = "NONE", query_embedding: Optional[List[float]] = None, number_of_chunks: int = 3, ): collection = self.client.get_collection(name=collection_id) if query_embedding == None: from .ModelCallingFunctions import generate_embedding query_emb = generate_embedding([query])[0] else: query_emb = query_embedding results = collection.query( query_embeddings=query_emb, n_results=number_of_chunks, ) return results["documents"][0] # pyright: ignore