Spaces:

sairika
/

Rag-based-api-task

Runtime error

File size: 6,141 Bytes

ee566a1

#!/usr/bin/env python3
"""
Test script for Smart RAG API
"""
import os
import tempfile
import sys
from pathlib import Path

# Add src to path
sys.path.append('src')

from src.document_processor import DocumentProcessor
from src.vector_store import VectorStore
from src.llm_handler import LLMHandler
from src.utils import setup_directories, create_sample_files
from config import Config

def test_system_setup():
    """Test system setup and dependencies"""
    print("🔧 Testing system setup...")
    
    config = Config()
    setup_directories(config)
    
    # Create sample files
    sample_dir = Path("sample_files")
    create_sample_files(sample_dir)
    
    print("✅ System setup test passed")

def test_document_processing():
    """Test document processing functionality"""
    print("📄 Testing document processing...")
    
    processor = DocumentProcessor()
    
    # Test text processing
    sample_text = "This is a test document for the Smart RAG API."
    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
        f.write(sample_text)
        temp_path = f.name
    
    try:
        chunks = processor.process_document(temp_path, '.txt')
        assert len(chunks) > 0, "No chunks created"
        assert sample_text in chunks[0], "Content not preserved"
        print("✅ Text processing test passed")
    finally:
        os.unlink(temp_path)

def test_vector_store():
    """Test vector store functionality"""
    print("🔍 Testing vector store...")
    
    processor = DocumentProcessor()
    vector_store = VectorStore(processor.embedding_model)
    
    # Add test documents
    test_chunks = [
        "The Smart RAG API processes multiple document formats.",
        "It uses FAISS for vector similarity search.",
        "The system supports PDF, Word, and image files."
    ]
    
    vector_store.add_documents(test_chunks, "test_file", "test.txt")
    
    # Test search
    results = vector_store.search("What does the API process?", k=2)
    assert len(results) > 0, "No search results found"
    assert results[0]['score'] > 0, "Invalid similarity score"
    
    print("✅ Vector store test passed")

def test_llm_handler():
    """Test LLM handler functionality"""
    print("🤖 Testing LLM handler...")
    
    try:
        llm = LLMHandler()
        
        # Test answer generation
        question = "What is this system about?"
        context = ["The Smart RAG API is a document processing system that uses AI to answer questions."]
        
        answer = llm.generate_answer(question, context)
        assert len(answer) > 10, "Answer too short"
        assert "error" not in answer.lower() or "apologize" not in answer.lower(), f"Error in answer: {answer}"
        
        print("✅ LLM handler test passed")
        
    except Exception as e:
        print(f"⚠️ LLM handler test failed: {e}")
        print("This might be due to model loading issues, but the system can still work")

def test_full_pipeline():
    """Test the complete RAG pipeline"""
    print("🔄 Testing complete pipeline...")
    
    # Initialize components
    processor = DocumentProcessor()
    vector_store = VectorStore(processor.embedding_model)
    
    try:
        llm = LLMHandler()
        
        # Create test document
        test_content = """
        Smart RAG API Documentation
        
        The Smart RAG API is an intelligent document processing system that can:
        1. Process multiple file formats (PDF, Word, Text, Images, CSV, Databases)
        2. Extract text using OCR for image-based documents
        3. Create searchable embeddings using sentence transformers
        4. Answer questions using advanced language models
        5. Provide context-aware responses with source attribution
        
        Key features include:
        - Free and open-source implementation
        - No API keys required
        - Runs entirely on Hugging Face infrastructure
        - Supports both text and image-based queries
        
        The system is built with Python, FastAPI/Gradio, and uses state-of-the-art
        AI models from Hugging Face for all processing tasks.
        """
        
        # Create temporary file
        with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
            f.write(test_content)
            temp_path = f.name
        
        try:
            # Process document
            chunks = processor.process_document(temp_path, '.txt')
            vector_store.add_documents(chunks, "pipeline_test", "test_doc.txt")
            
            # Test queries
            test_questions = [
                "What file formats does the API support?",
                "What are the key features?",
                "Is this system free to use?",
                "What models does it use?"
            ]
            
            for question in test_questions:
                print(f"   Testing: {question}")
                
                # Search
                results = vector_store.search(question, k=3)
                contexts = [r['text'] for r in results]
                
                # Generate answer
                answer = llm.generate_answer(question, contexts)
                
                print(f"   Answer: {answer[:100]}...")
                print()
        
        finally:
            os.unlink(temp_path)
        
        print("✅ Full pipeline test passed")
        
    except Exception as e:
        print(f"❌ Full pipeline test failed: {e}")

def run_all_tests():
    """Run all tests"""
    print("🚀 Starting Smart RAG API Tests\n")
    
    try:
        test_system_setup()
        print()
        
        test_document_processing()
        print()
        
        test_vector_store()
        print()
        
        test_llm_handler()
        print()
        
        test_full_pipeline()
        print()
        
        print("🎉 All tests completed!")
        
    except Exception as e:
        print(f"❌ Test failed: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    run_all_tests()