Spaces:

ashishbangwal
/

Investor-API

Build error

Investor-API / utils /VectorDatabase.py

id error resolved

c926830 over 1 year ago

1.73 kB

	"""
	Contain Wrapper Class for ChormaDB client, that can process and store documents and retrive document chunks.
	"""

	# for chromaDB
	__import__("pysqlite3")
	import sys

	sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")

	from typing import List, Optional, Tuple
	import chromadb


	class AdvancedClient:

	def __init__(self, vector_database_path: str = "vectorDB") -> None:
	self.client = chromadb.PersistentClient(path=vector_database_path)

	def create_collection(
	self,
	collection_id: str,
	file_datas: List[Tuple[str, int]],
	):
	chunks = []
	ids = []

	for chunk, _id in file_datas:
	chunks.append(chunk)
	ids.append(str(_id)) #make sure IDs are string dtpye

	from .ModelCallingFunctions import generate_embedding

	embeddings = generate_embedding(texts=chunks)

	collection = self.client.create_collection(collection_id)
	collection.add(
	ids=ids,
	embeddings=embeddings, # type: ignore
	documents=chunks,
	)

	def retrieve_chunks(
	self,
	collection_id: str,
	query: str = "NONE",
	query_embedding: Optional[List[float]] = None,
	number_of_chunks: int = 3,
	):

	collection = self.client.get_collection(name=collection_id)

	if query_embedding == None:
	from .ModelCallingFunctions import generate_embedding

	query_emb = generate_embedding([query])[0]
	else:
	query_emb = query_embedding

	results = collection.query(
	query_embeddings=query_emb,
	n_results=number_of_chunks,
	)

	return results["documents"][0] # pyright: ignore