Spaces:
Runtime error
Runtime error
File size: 6,141 Bytes
ee566a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
#!/usr/bin/env python3
"""
Test script for Smart RAG API
"""
import os
import tempfile
import sys
from pathlib import Path
# Add src to path
sys.path.append('src')
from src.document_processor import DocumentProcessor
from src.vector_store import VectorStore
from src.llm_handler import LLMHandler
from src.utils import setup_directories, create_sample_files
from config import Config
def test_system_setup():
"""Test system setup and dependencies"""
print("π§ Testing system setup...")
config = Config()
setup_directories(config)
# Create sample files
sample_dir = Path("sample_files")
create_sample_files(sample_dir)
print("β
System setup test passed")
def test_document_processing():
"""Test document processing functionality"""
print("π Testing document processing...")
processor = DocumentProcessor()
# Test text processing
sample_text = "This is a test document for the Smart RAG API."
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write(sample_text)
temp_path = f.name
try:
chunks = processor.process_document(temp_path, '.txt')
assert len(chunks) > 0, "No chunks created"
assert sample_text in chunks[0], "Content not preserved"
print("β
Text processing test passed")
finally:
os.unlink(temp_path)
def test_vector_store():
"""Test vector store functionality"""
print("π Testing vector store...")
processor = DocumentProcessor()
vector_store = VectorStore(processor.embedding_model)
# Add test documents
test_chunks = [
"The Smart RAG API processes multiple document formats.",
"It uses FAISS for vector similarity search.",
"The system supports PDF, Word, and image files."
]
vector_store.add_documents(test_chunks, "test_file", "test.txt")
# Test search
results = vector_store.search("What does the API process?", k=2)
assert len(results) > 0, "No search results found"
assert results[0]['score'] > 0, "Invalid similarity score"
print("β
Vector store test passed")
def test_llm_handler():
"""Test LLM handler functionality"""
print("π€ Testing LLM handler...")
try:
llm = LLMHandler()
# Test answer generation
question = "What is this system about?"
context = ["The Smart RAG API is a document processing system that uses AI to answer questions."]
answer = llm.generate_answer(question, context)
assert len(answer) > 10, "Answer too short"
assert "error" not in answer.lower() or "apologize" not in answer.lower(), f"Error in answer: {answer}"
print("β
LLM handler test passed")
except Exception as e:
print(f"β οΈ LLM handler test failed: {e}")
print("This might be due to model loading issues, but the system can still work")
def test_full_pipeline():
"""Test the complete RAG pipeline"""
print("π Testing complete pipeline...")
# Initialize components
processor = DocumentProcessor()
vector_store = VectorStore(processor.embedding_model)
try:
llm = LLMHandler()
# Create test document
test_content = """
Smart RAG API Documentation
The Smart RAG API is an intelligent document processing system that can:
1. Process multiple file formats (PDF, Word, Text, Images, CSV, Databases)
2. Extract text using OCR for image-based documents
3. Create searchable embeddings using sentence transformers
4. Answer questions using advanced language models
5. Provide context-aware responses with source attribution
Key features include:
- Free and open-source implementation
- No API keys required
- Runs entirely on Hugging Face infrastructure
- Supports both text and image-based queries
The system is built with Python, FastAPI/Gradio, and uses state-of-the-art
AI models from Hugging Face for all processing tasks.
"""
# Create temporary file
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write(test_content)
temp_path = f.name
try:
# Process document
chunks = processor.process_document(temp_path, '.txt')
vector_store.add_documents(chunks, "pipeline_test", "test_doc.txt")
# Test queries
test_questions = [
"What file formats does the API support?",
"What are the key features?",
"Is this system free to use?",
"What models does it use?"
]
for question in test_questions:
print(f" Testing: {question}")
# Search
results = vector_store.search(question, k=3)
contexts = [r['text'] for r in results]
# Generate answer
answer = llm.generate_answer(question, contexts)
print(f" Answer: {answer[:100]}...")
print()
finally:
os.unlink(temp_path)
print("β
Full pipeline test passed")
except Exception as e:
print(f"β Full pipeline test failed: {e}")
def run_all_tests():
"""Run all tests"""
print("π Starting Smart RAG API Tests\n")
try:
test_system_setup()
print()
test_document_processing()
print()
test_vector_store()
print()
test_llm_handler()
print()
test_full_pipeline()
print()
print("π All tests completed!")
except Exception as e:
print(f"β Test failed: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
run_all_tests() |