""" Main script for testing RAG system """ from pathlib import Path from typing import List from config import DOCUMENTS_DIR from vectordb.document_processor import DocumentProcessor from vectordb.json_store import get_json_store # Changed from rag.retriever import get_retriever from rag.generator import get_generator def load_documents(file_paths: List[str]): """Load documents into JSON store""" print("\n" + "="*60) print("LOADING DOCUMENTS") print("="*60) processor = DocumentProcessor() vector_store = get_json_store() for file_path in file_paths: print(f"\nProcessing: {file_path}") chunks = processor.process_document(file_path) print(f"āœ“ Created {len(chunks)} chunks") texts = [chunk.text for chunk in chunks] metadatas = [chunk.metadata for chunk in chunks] ids = [f"{Path(file_path).stem}_{i}" for i in range(len(chunks))] vector_store.add_documents(texts, metadatas, ids) stats = vector_store.get_stats() print(f"\nāœ“ Total chunks in store: {stats['total_documents']}") print(f"āœ“ JSON file size: {stats['file_size_mb']:.2f} MB") # Export chunks only (without embeddings) vector_store.export_chunks_only() def query_system(query: str): """Query the RAG system""" print("\n" + "="*60) print(f"QUERY: {query}") print("="*60) retriever = get_retriever() generator = get_generator() print("\nšŸ” Retrieving relevant documents...") retrieved_docs = retriever.retrieve(query) print(f"āœ“ Found {len(retrieved_docs)} relevant chunks") for i, doc in enumerate(retrieved_docs, 1): print(f"\n[{i}] {doc['source']} (Chunk {doc['chunk_index']}, Similarity: {doc['similarity']:.3f})") print(f"Preview: {doc['text'][:150]}...") print("\nšŸ’¬ Generating response...") context = retriever.format_context(retrieved_docs) answer = generator.generate_response(query, context) print("\n" + "-"*60) print("ANSWER:") print("-"*60) print(answer) print("-"*60) def interactive_mode(): """Interactive query mode""" print("\n" + "="*60) print("INTERACTIVE MODE") print("="*60) print("Commands:") print(" - Type your question to query") print(" - Type 'stats' to see store statistics") print(" - Type 'quit' or 'exit' to stop") print("="*60 + "\n") vector_store = get_json_store() while True: query = input("\nšŸ’¬ Your question: ").strip() if query.lower() in ['quit', 'exit', 'q']: print("Goodbye!") break if query.lower() == 'stats': stats = vector_store.get_stats() print("\nšŸ“Š Store Statistics:") for key, value in stats.items(): print(f" {key}: {value}") continue if not query: continue query_system(query) def main(): """Main function""" print("\nšŸš€ Cortexa RAG System (JSON Storage)") print("="*60) docs = list(DOCUMENTS_DIR.glob("*")) docs = [d for d in docs if d.suffix in ['.pdf', '.txt', '.docx']] if not docs: print(f"\nāš ļø No documents found in {DOCUMENTS_DIR}") print("Please add PDF, TXT, or DOCX files to the documents folder.") return print(f"\nšŸ“„ Found {len(docs)} documents:") for doc in docs: print(f" - {doc.name}") load_choice = input("\nLoad documents into store? (y/n): ").strip().lower() if load_choice == 'y': load_documents([str(d) for d in docs]) print("\nStarting interactive query mode...") interactive_mode() if __name__ == "__main__": main()