Rag-based-api-task / tests /test_api.py
sairika's picture
Create tests/test_api.py
ee566a1 verified
#!/usr/bin/env python3
"""
Test script for Smart RAG API
"""
import os
import tempfile
import sys
from pathlib import Path
# Add src to path
sys.path.append('src')
from src.document_processor import DocumentProcessor
from src.vector_store import VectorStore
from src.llm_handler import LLMHandler
from src.utils import setup_directories, create_sample_files
from config import Config
def test_system_setup():
"""Test system setup and dependencies"""
print("πŸ”§ Testing system setup...")
config = Config()
setup_directories(config)
# Create sample files
sample_dir = Path("sample_files")
create_sample_files(sample_dir)
print("βœ… System setup test passed")
def test_document_processing():
"""Test document processing functionality"""
print("πŸ“„ Testing document processing...")
processor = DocumentProcessor()
# Test text processing
sample_text = "This is a test document for the Smart RAG API."
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write(sample_text)
temp_path = f.name
try:
chunks = processor.process_document(temp_path, '.txt')
assert len(chunks) > 0, "No chunks created"
assert sample_text in chunks[0], "Content not preserved"
print("βœ… Text processing test passed")
finally:
os.unlink(temp_path)
def test_vector_store():
"""Test vector store functionality"""
print("πŸ” Testing vector store...")
processor = DocumentProcessor()
vector_store = VectorStore(processor.embedding_model)
# Add test documents
test_chunks = [
"The Smart RAG API processes multiple document formats.",
"It uses FAISS for vector similarity search.",
"The system supports PDF, Word, and image files."
]
vector_store.add_documents(test_chunks, "test_file", "test.txt")
# Test search
results = vector_store.search("What does the API process?", k=2)
assert len(results) > 0, "No search results found"
assert results[0]['score'] > 0, "Invalid similarity score"
print("βœ… Vector store test passed")
def test_llm_handler():
"""Test LLM handler functionality"""
print("πŸ€– Testing LLM handler...")
try:
llm = LLMHandler()
# Test answer generation
question = "What is this system about?"
context = ["The Smart RAG API is a document processing system that uses AI to answer questions."]
answer = llm.generate_answer(question, context)
assert len(answer) > 10, "Answer too short"
assert "error" not in answer.lower() or "apologize" not in answer.lower(), f"Error in answer: {answer}"
print("βœ… LLM handler test passed")
except Exception as e:
print(f"⚠️ LLM handler test failed: {e}")
print("This might be due to model loading issues, but the system can still work")
def test_full_pipeline():
"""Test the complete RAG pipeline"""
print("πŸ”„ Testing complete pipeline...")
# Initialize components
processor = DocumentProcessor()
vector_store = VectorStore(processor.embedding_model)
try:
llm = LLMHandler()
# Create test document
test_content = """
Smart RAG API Documentation
The Smart RAG API is an intelligent document processing system that can:
1. Process multiple file formats (PDF, Word, Text, Images, CSV, Databases)
2. Extract text using OCR for image-based documents
3. Create searchable embeddings using sentence transformers
4. Answer questions using advanced language models
5. Provide context-aware responses with source attribution
Key features include:
- Free and open-source implementation
- No API keys required
- Runs entirely on Hugging Face infrastructure
- Supports both text and image-based queries
The system is built with Python, FastAPI/Gradio, and uses state-of-the-art
AI models from Hugging Face for all processing tasks.
"""
# Create temporary file
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write(test_content)
temp_path = f.name
try:
# Process document
chunks = processor.process_document(temp_path, '.txt')
vector_store.add_documents(chunks, "pipeline_test", "test_doc.txt")
# Test queries
test_questions = [
"What file formats does the API support?",
"What are the key features?",
"Is this system free to use?",
"What models does it use?"
]
for question in test_questions:
print(f" Testing: {question}")
# Search
results = vector_store.search(question, k=3)
contexts = [r['text'] for r in results]
# Generate answer
answer = llm.generate_answer(question, contexts)
print(f" Answer: {answer[:100]}...")
print()
finally:
os.unlink(temp_path)
print("βœ… Full pipeline test passed")
except Exception as e:
print(f"❌ Full pipeline test failed: {e}")
def run_all_tests():
"""Run all tests"""
print("πŸš€ Starting Smart RAG API Tests\n")
try:
test_system_setup()
print()
test_document_processing()
print()
test_vector_store()
print()
test_llm_handler()
print()
test_full_pipeline()
print()
print("πŸŽ‰ All tests completed!")
except Exception as e:
print(f"❌ Test failed: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
run_all_tests()