Spaces:

harshalmore31
/

Swarms

Sleeping

App Files Files Community

Swarms / new_features_examples /spike /memory.py

harshalmore31

Synced repo using 'sync_with_huggingface' Github Action

d8d14f1 verified about 1 year ago

raw

history blame contribute delete

4.21 kB

	from typing import Optional
	from pathlib import Path
	from loguru import logger
	from llama_index.core import VectorStoreIndex, SimpleDirectoryReader


	class LlamaIndexDB:
	"""A class to manage document indexing and querying using LlamaIndex.

	This class provides functionality to add documents from a directory and query the indexed documents.

	Args:
	data_dir (str): Directory containing documents to index. Defaults to "docs".
	**kwargs: Additional arguments passed to SimpleDirectoryReader and VectorStoreIndex.
	SimpleDirectoryReader kwargs:
	- filename_as_id (bool): Use filenames as document IDs
	- recursive (bool): Recursively read subdirectories
	- required_exts (List[str]): Only read files with these extensions
	- exclude_hidden (bool): Skip hidden files

	VectorStoreIndex kwargs:
	- service_context: Custom service context
	- embed_model: Custom embedding model
	- similarity_top_k (int): Number of similar docs to retrieve
	- store_nodes_override (bool): Override node storage
	"""

	def __init__(self, data_dir: str = "docs", **kwargs) -> None:
	"""Initialize the LlamaIndexDB with an empty index.

	Args:
	data_dir (str): Directory containing documents to index
	**kwargs: Additional arguments for SimpleDirectoryReader and VectorStoreIndex
	"""
	self.data_dir = data_dir
	self.index: Optional[VectorStoreIndex] = None
	self.reader_kwargs = {
	k: v
	for k, v in kwargs.items()
	if k
	in SimpleDirectoryReader.__init__.__code__.co_varnames
	}
	self.index_kwargs = {
	k: v
	for k, v in kwargs.items()
	if k not in self.reader_kwargs
	}

	logger.info("Initialized LlamaIndexDB")
	data_path = Path(self.data_dir)
	if not data_path.exists():
	logger.error(f"Directory not found: {self.data_dir}")
	raise FileNotFoundError(
	f"Directory {self.data_dir} does not exist"
	)

	try:
	documents = SimpleDirectoryReader(
	self.data_dir, **self.reader_kwargs
	).load_data()
	self.index = VectorStoreIndex.from_documents(
	documents, **self.index_kwargs
	)
	logger.success(
	f"Successfully indexed documents from {self.data_dir}"
	)
	except Exception as e:
	logger.error(f"Error indexing documents: {str(e)}")
	raise

	def query(self, query: str, **kwargs) -> str:
	"""Query the indexed documents.

	Args:
	query (str): The query string to search for
	**kwargs: Additional arguments passed to the query engine
	- similarity_top_k (int): Number of similar documents to retrieve
	- streaming (bool): Enable streaming response
	- response_mode (str): Response synthesis mode
	- max_tokens (int): Maximum tokens in response

	Returns:
	str: The response from the query engine

	Raises:
	ValueError: If no documents have been indexed yet
	"""
	if self.index is None:
	logger.error("No documents have been indexed yet")
	raise ValueError("Must add documents before querying")

	try:
	query_engine = self.index.as_query_engine(**kwargs)
	response = query_engine.query(query)
	print(response)
	logger.info(f"Successfully queried: {query}")
	return str(response)
	except Exception as e:
	logger.error(f"Error during query: {str(e)}")
	raise


	# # Example usage
	# llama_index_db = LlamaIndexDB(
	# data_dir="docs",
	# filename_as_id=True,
	# recursive=True,
	# required_exts=[".txt", ".pdf", ".docx"],
	# similarity_top_k=3
	# )
	# response = llama_index_db.query(
	# "What is the medical history of patient 1?",
	# streaming=True,
	# response_mode="compact"
	# )
	# print(response)