Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| Test script for Smart RAG API | |
| """ | |
| import os | |
| import tempfile | |
| import sys | |
| from pathlib import Path | |
| # Add src to path | |
| sys.path.append('src') | |
| from src.document_processor import DocumentProcessor | |
| from src.vector_store import VectorStore | |
| from src.llm_handler import LLMHandler | |
| from src.utils import setup_directories, create_sample_files | |
| from config import Config | |
| def test_system_setup(): | |
| """Test system setup and dependencies""" | |
| print("π§ Testing system setup...") | |
| config = Config() | |
| setup_directories(config) | |
| # Create sample files | |
| sample_dir = Path("sample_files") | |
| create_sample_files(sample_dir) | |
| print("β System setup test passed") | |
| def test_document_processing(): | |
| """Test document processing functionality""" | |
| print("π Testing document processing...") | |
| processor = DocumentProcessor() | |
| # Test text processing | |
| sample_text = "This is a test document for the Smart RAG API." | |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: | |
| f.write(sample_text) | |
| temp_path = f.name | |
| try: | |
| chunks = processor.process_document(temp_path, '.txt') | |
| assert len(chunks) > 0, "No chunks created" | |
| assert sample_text in chunks[0], "Content not preserved" | |
| print("β Text processing test passed") | |
| finally: | |
| os.unlink(temp_path) | |
| def test_vector_store(): | |
| """Test vector store functionality""" | |
| print("π Testing vector store...") | |
| processor = DocumentProcessor() | |
| vector_store = VectorStore(processor.embedding_model) | |
| # Add test documents | |
| test_chunks = [ | |
| "The Smart RAG API processes multiple document formats.", | |
| "It uses FAISS for vector similarity search.", | |
| "The system supports PDF, Word, and image files." | |
| ] | |
| vector_store.add_documents(test_chunks, "test_file", "test.txt") | |
| # Test search | |
| results = vector_store.search("What does the API process?", k=2) | |
| assert len(results) > 0, "No search results found" | |
| assert results[0]['score'] > 0, "Invalid similarity score" | |
| print("β Vector store test passed") | |
| def test_llm_handler(): | |
| """Test LLM handler functionality""" | |
| print("π€ Testing LLM handler...") | |
| try: | |
| llm = LLMHandler() | |
| # Test answer generation | |
| question = "What is this system about?" | |
| context = ["The Smart RAG API is a document processing system that uses AI to answer questions."] | |
| answer = llm.generate_answer(question, context) | |
| assert len(answer) > 10, "Answer too short" | |
| assert "error" not in answer.lower() or "apologize" not in answer.lower(), f"Error in answer: {answer}" | |
| print("β LLM handler test passed") | |
| except Exception as e: | |
| print(f"β οΈ LLM handler test failed: {e}") | |
| print("This might be due to model loading issues, but the system can still work") | |
| def test_full_pipeline(): | |
| """Test the complete RAG pipeline""" | |
| print("π Testing complete pipeline...") | |
| # Initialize components | |
| processor = DocumentProcessor() | |
| vector_store = VectorStore(processor.embedding_model) | |
| try: | |
| llm = LLMHandler() | |
| # Create test document | |
| test_content = """ | |
| Smart RAG API Documentation | |
| The Smart RAG API is an intelligent document processing system that can: | |
| 1. Process multiple file formats (PDF, Word, Text, Images, CSV, Databases) | |
| 2. Extract text using OCR for image-based documents | |
| 3. Create searchable embeddings using sentence transformers | |
| 4. Answer questions using advanced language models | |
| 5. Provide context-aware responses with source attribution | |
| Key features include: | |
| - Free and open-source implementation | |
| - No API keys required | |
| - Runs entirely on Hugging Face infrastructure | |
| - Supports both text and image-based queries | |
| The system is built with Python, FastAPI/Gradio, and uses state-of-the-art | |
| AI models from Hugging Face for all processing tasks. | |
| """ | |
| # Create temporary file | |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: | |
| f.write(test_content) | |
| temp_path = f.name | |
| try: | |
| # Process document | |
| chunks = processor.process_document(temp_path, '.txt') | |
| vector_store.add_documents(chunks, "pipeline_test", "test_doc.txt") | |
| # Test queries | |
| test_questions = [ | |
| "What file formats does the API support?", | |
| "What are the key features?", | |
| "Is this system free to use?", | |
| "What models does it use?" | |
| ] | |
| for question in test_questions: | |
| print(f" Testing: {question}") | |
| # Search | |
| results = vector_store.search(question, k=3) | |
| contexts = [r['text'] for r in results] | |
| # Generate answer | |
| answer = llm.generate_answer(question, contexts) | |
| print(f" Answer: {answer[:100]}...") | |
| print() | |
| finally: | |
| os.unlink(temp_path) | |
| print("β Full pipeline test passed") | |
| except Exception as e: | |
| print(f"β Full pipeline test failed: {e}") | |
| def run_all_tests(): | |
| """Run all tests""" | |
| print("π Starting Smart RAG API Tests\n") | |
| try: | |
| test_system_setup() | |
| print() | |
| test_document_processing() | |
| print() | |
| test_vector_store() | |
| print() | |
| test_llm_handler() | |
| print() | |
| test_full_pipeline() | |
| print() | |
| print("π All tests completed!") | |
| except Exception as e: | |
| print(f"β Test failed: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| if __name__ == "__main__": | |
| run_all_tests() |