#!/usr/bin/env python3 """ Test script for Smart RAG API """ import os import tempfile import sys from pathlib import Path # Add src to path sys.path.append('src') from src.document_processor import DocumentProcessor from src.vector_store import VectorStore from src.llm_handler import LLMHandler from src.utils import setup_directories, create_sample_files from config import Config def test_system_setup(): """Test system setup and dependencies""" print("🔧 Testing system setup...") config = Config() setup_directories(config) # Create sample files sample_dir = Path("sample_files") create_sample_files(sample_dir) print("✅ System setup test passed") def test_document_processing(): """Test document processing functionality""" print("📄 Testing document processing...") processor = DocumentProcessor() # Test text processing sample_text = "This is a test document for the Smart RAG API." with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: f.write(sample_text) temp_path = f.name try: chunks = processor.process_document(temp_path, '.txt') assert len(chunks) > 0, "No chunks created" assert sample_text in chunks[0], "Content not preserved" print("✅ Text processing test passed") finally: os.unlink(temp_path) def test_vector_store(): """Test vector store functionality""" print("🔍 Testing vector store...") processor = DocumentProcessor() vector_store = VectorStore(processor.embedding_model) # Add test documents test_chunks = [ "The Smart RAG API processes multiple document formats.", "It uses FAISS for vector similarity search.", "The system supports PDF, Word, and image files." ] vector_store.add_documents(test_chunks, "test_file", "test.txt") # Test search results = vector_store.search("What does the API process?", k=2) assert len(results) > 0, "No search results found" assert results[0]['score'] > 0, "Invalid similarity score" print("✅ Vector store test passed") def test_llm_handler(): """Test LLM handler functionality""" print("🤖 Testing LLM handler...") try: llm = LLMHandler() # Test answer generation question = "What is this system about?" context = ["The Smart RAG API is a document processing system that uses AI to answer questions."] answer = llm.generate_answer(question, context) assert len(answer) > 10, "Answer too short" assert "error" not in answer.lower() or "apologize" not in answer.lower(), f"Error in answer: {answer}" print("✅ LLM handler test passed") except Exception as e: print(f"⚠️ LLM handler test failed: {e}") print("This might be due to model loading issues, but the system can still work") def test_full_pipeline(): """Test the complete RAG pipeline""" print("🔄 Testing complete pipeline...") # Initialize components processor = DocumentProcessor() vector_store = VectorStore(processor.embedding_model) try: llm = LLMHandler() # Create test document test_content = """ Smart RAG API Documentation The Smart RAG API is an intelligent document processing system that can: 1. Process multiple file formats (PDF, Word, Text, Images, CSV, Databases) 2. Extract text using OCR for image-based documents 3. Create searchable embeddings using sentence transformers 4. Answer questions using advanced language models 5. Provide context-aware responses with source attribution Key features include: - Free and open-source implementation - No API keys required - Runs entirely on Hugging Face infrastructure - Supports both text and image-based queries The system is built with Python, FastAPI/Gradio, and uses state-of-the-art AI models from Hugging Face for all processing tasks. """ # Create temporary file with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: f.write(test_content) temp_path = f.name try: # Process document chunks = processor.process_document(temp_path, '.txt') vector_store.add_documents(chunks, "pipeline_test", "test_doc.txt") # Test queries test_questions = [ "What file formats does the API support?", "What are the key features?", "Is this system free to use?", "What models does it use?" ] for question in test_questions: print(f" Testing: {question}") # Search results = vector_store.search(question, k=3) contexts = [r['text'] for r in results] # Generate answer answer = llm.generate_answer(question, contexts) print(f" Answer: {answer[:100]}...") print() finally: os.unlink(temp_path) print("✅ Full pipeline test passed") except Exception as e: print(f"❌ Full pipeline test failed: {e}") def run_all_tests(): """Run all tests""" print("🚀 Starting Smart RAG API Tests\n") try: test_system_setup() print() test_document_processing() print() test_vector_store() print() test_llm_handler() print() test_full_pipeline() print() print("🎉 All tests completed!") except Exception as e: print(f"❌ Test failed: {e}") import traceback traceback.print_exc() if __name__ == "__main__": run_all_tests()