Spaces:

Harshdhsvguyt
/

policy_rag_assistant

Sleeping

App Files Files Community

policy_rag_assistant / main.py

Harshdhsvguyt

Upload 19 files

754d8d3 verified 2 months ago

raw

history blame contribute delete

3.21 kB

	import sys
	import os
	from dotenv import load_dotenv

	from src.loader import load_documents
	from src.chunking import chunk_documents
	from src.vectorstore import VectorStore
	from src.rag_pipeline import RAGPipeline
	from src.utils import ensure_directories

	# Load environment variables
	load_dotenv()


	def setup_vector_store():
	"""Initialize and populate vector store."""
	print("Loading documents...")
	docs = load_documents()

	if not docs:
	print("No documents found in data/policies/")
	sys.exit(1)

	print(f"Loaded {len(docs)} documents")

	print("Chunking documents...")
	chunked = chunk_documents(docs, chunk_size=500, overlap=100)
	print(f"Created {len(chunked)} chunks")

	print("Initializing vector store...")
	vector_store = VectorStore()
	vector_store.reset()
	vector_store.add_documents(chunked)

	print("Setup complete!")
	return vector_store


	def main():
	"""CLI interface for RAG pipeline."""
	ensure_directories()

	# ------------------------------------------------
	# Check API key
	# ------------------------------------------------
	if not os.getenv("GROQ_API_KEY"):
	print("Error: GROQ_API_KEY environment variable not set")
	sys.exit(1)

	# ------------------------------------------------
	# Get question from command line
	# ------------------------------------------------
	if len(sys.argv) < 2:
	print("Usage: python main.py 'Your question here'")
	sys.exit(1)

	question = " ".join(sys.argv[1:])

	# ------------------------------------------------
	# Setup RAG pipeline
	# ------------------------------------------------
	vector_store = setup_vector_store()
	rag_pipeline = RAGPipeline(vector_store)

	# ------------------------------------------------
	# Query
	# ------------------------------------------------
	print(f"\nQuestion: {question}\n")

	response = rag_pipeline.query(question, prompt_type="improved")

	# ------------------------------------------------
	# Display Results
	# ------------------------------------------------
	print("=" * 80)
	print("ANSWER:")
	print(response["answer"])

	print("\n" + "=" * 80)
	print(f"Confidence: {response.get('confidence', 'N/A')}")
	print(f"Sources Retrieved: {len(response['retrieved_chunks'])}")

	# Show retrieved chunk preview ( looks professional)
	if response.get("retrieved_chunks"):
	print("\nRETRIEVED CONTEXT PREVIEW:")
	for i, chunk in enumerate(response["retrieved_chunks"], 1):
	preview = chunk["text"][:120].replace("\n", " ")
	print(f"{i}. {preview}...")

	if response.get("evidence"):
	print("\nEVIDENCE:")
	for i, ev in enumerate(response["evidence"], 1):
	print(f"{i}. {ev}")

	# NEW: Evaluation Metrics
	if response.get("evaluation"):
	print("\n" + "=" * 80)
	print("EVALUATION:")
	for k, v in response["evaluation"].items():
	print(f"{k}: {v}")

	print("\n" + "=" * 80)


	if __name__ == "__main__":
	main()