File size: 6,141 Bytes
ee566a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#!/usr/bin/env python3
"""
Test script for Smart RAG API
"""
import os
import tempfile
import sys
from pathlib import Path

# Add src to path
sys.path.append('src')

from src.document_processor import DocumentProcessor
from src.vector_store import VectorStore
from src.llm_handler import LLMHandler
from src.utils import setup_directories, create_sample_files
from config import Config

def test_system_setup():
    """Test system setup and dependencies"""
    print("πŸ”§ Testing system setup...")
    
    config = Config()
    setup_directories(config)
    
    # Create sample files
    sample_dir = Path("sample_files")
    create_sample_files(sample_dir)
    
    print("βœ… System setup test passed")

def test_document_processing():
    """Test document processing functionality"""
    print("πŸ“„ Testing document processing...")
    
    processor = DocumentProcessor()
    
    # Test text processing
    sample_text = "This is a test document for the Smart RAG API."
    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
        f.write(sample_text)
        temp_path = f.name
    
    try:
        chunks = processor.process_document(temp_path, '.txt')
        assert len(chunks) > 0, "No chunks created"
        assert sample_text in chunks[0], "Content not preserved"
        print("βœ… Text processing test passed")
    finally:
        os.unlink(temp_path)

def test_vector_store():
    """Test vector store functionality"""
    print("πŸ” Testing vector store...")
    
    processor = DocumentProcessor()
    vector_store = VectorStore(processor.embedding_model)
    
    # Add test documents
    test_chunks = [
        "The Smart RAG API processes multiple document formats.",
        "It uses FAISS for vector similarity search.",
        "The system supports PDF, Word, and image files."
    ]
    
    vector_store.add_documents(test_chunks, "test_file", "test.txt")
    
    # Test search
    results = vector_store.search("What does the API process?", k=2)
    assert len(results) > 0, "No search results found"
    assert results[0]['score'] > 0, "Invalid similarity score"
    
    print("βœ… Vector store test passed")

def test_llm_handler():
    """Test LLM handler functionality"""
    print("πŸ€– Testing LLM handler...")
    
    try:
        llm = LLMHandler()
        
        # Test answer generation
        question = "What is this system about?"
        context = ["The Smart RAG API is a document processing system that uses AI to answer questions."]
        
        answer = llm.generate_answer(question, context)
        assert len(answer) > 10, "Answer too short"
        assert "error" not in answer.lower() or "apologize" not in answer.lower(), f"Error in answer: {answer}"
        
        print("βœ… LLM handler test passed")
        
    except Exception as e:
        print(f"⚠️ LLM handler test failed: {e}")
        print("This might be due to model loading issues, but the system can still work")

def test_full_pipeline():
    """Test the complete RAG pipeline"""
    print("πŸ”„ Testing complete pipeline...")
    
    # Initialize components
    processor = DocumentProcessor()
    vector_store = VectorStore(processor.embedding_model)
    
    try:
        llm = LLMHandler()
        
        # Create test document
        test_content = """
        Smart RAG API Documentation
        
        The Smart RAG API is an intelligent document processing system that can:
        1. Process multiple file formats (PDF, Word, Text, Images, CSV, Databases)
        2. Extract text using OCR for image-based documents
        3. Create searchable embeddings using sentence transformers
        4. Answer questions using advanced language models
        5. Provide context-aware responses with source attribution
        
        Key features include:
        - Free and open-source implementation
        - No API keys required
        - Runs entirely on Hugging Face infrastructure
        - Supports both text and image-based queries
        
        The system is built with Python, FastAPI/Gradio, and uses state-of-the-art
        AI models from Hugging Face for all processing tasks.
        """
        
        # Create temporary file
        with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
            f.write(test_content)
            temp_path = f.name
        
        try:
            # Process document
            chunks = processor.process_document(temp_path, '.txt')
            vector_store.add_documents(chunks, "pipeline_test", "test_doc.txt")
            
            # Test queries
            test_questions = [
                "What file formats does the API support?",
                "What are the key features?",
                "Is this system free to use?",
                "What models does it use?"
            ]
            
            for question in test_questions:
                print(f"   Testing: {question}")
                
                # Search
                results = vector_store.search(question, k=3)
                contexts = [r['text'] for r in results]
                
                # Generate answer
                answer = llm.generate_answer(question, contexts)
                
                print(f"   Answer: {answer[:100]}...")
                print()
        
        finally:
            os.unlink(temp_path)
        
        print("βœ… Full pipeline test passed")
        
    except Exception as e:
        print(f"❌ Full pipeline test failed: {e}")

def run_all_tests():
    """Run all tests"""
    print("πŸš€ Starting Smart RAG API Tests\n")
    
    try:
        test_system_setup()
        print()
        
        test_document_processing()
        print()
        
        test_vector_store()
        print()
        
        test_llm_handler()
        print()
        
        test_full_pipeline()
        print()
        
        print("πŸŽ‰ All tests completed!")
        
    except Exception as e:
        print(f"❌ Test failed: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    run_all_tests()