Investor-API / utils /VectorDatabase.py
ashishbangwal's picture
id error resolved
c926830
"""
Contain Wrapper Class for ChormaDB client, that can process and store documents and retrive document chunks.
"""
# for chromaDB
__import__("pysqlite3")
import sys
sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
from typing import List, Optional, Tuple
import chromadb
class AdvancedClient:
def __init__(self, vector_database_path: str = "vectorDB") -> None:
self.client = chromadb.PersistentClient(path=vector_database_path)
def create_collection(
self,
collection_id: str,
file_datas: List[Tuple[str, int]],
):
chunks = []
ids = []
for chunk, _id in file_datas:
chunks.append(chunk)
ids.append(str(_id)) #make sure IDs are string dtpye
from .ModelCallingFunctions import generate_embedding
embeddings = generate_embedding(texts=chunks)
collection = self.client.create_collection(collection_id)
collection.add(
ids=ids,
embeddings=embeddings, # type: ignore
documents=chunks,
)
def retrieve_chunks(
self,
collection_id: str,
query: str = "NONE",
query_embedding: Optional[List[float]] = None,
number_of_chunks: int = 3,
):
collection = self.client.get_collection(name=collection_id)
if query_embedding == None:
from .ModelCallingFunctions import generate_embedding
query_emb = generate_embedding([query])[0]
else:
query_emb = query_embedding
results = collection.query(
query_embeddings=query_emb,
n_results=number_of_chunks,
)
return results["documents"][0] # pyright: ignore