sairika commited on
Commit
ee566a1
Β·
verified Β·
1 Parent(s): 6a70d5b

Create tests/test_api.py

Browse files
Files changed (1) hide show
  1. tests/test_api.py +195 -0
tests/test_api.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script for Smart RAG API
4
+ """
5
+ import os
6
+ import tempfile
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ # Add src to path
11
+ sys.path.append('src')
12
+
13
+ from src.document_processor import DocumentProcessor
14
+ from src.vector_store import VectorStore
15
+ from src.llm_handler import LLMHandler
16
+ from src.utils import setup_directories, create_sample_files
17
+ from config import Config
18
+
19
+ def test_system_setup():
20
+ """Test system setup and dependencies"""
21
+ print("πŸ”§ Testing system setup...")
22
+
23
+ config = Config()
24
+ setup_directories(config)
25
+
26
+ # Create sample files
27
+ sample_dir = Path("sample_files")
28
+ create_sample_files(sample_dir)
29
+
30
+ print("βœ… System setup test passed")
31
+
32
+ def test_document_processing():
33
+ """Test document processing functionality"""
34
+ print("πŸ“„ Testing document processing...")
35
+
36
+ processor = DocumentProcessor()
37
+
38
+ # Test text processing
39
+ sample_text = "This is a test document for the Smart RAG API."
40
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
41
+ f.write(sample_text)
42
+ temp_path = f.name
43
+
44
+ try:
45
+ chunks = processor.process_document(temp_path, '.txt')
46
+ assert len(chunks) > 0, "No chunks created"
47
+ assert sample_text in chunks[0], "Content not preserved"
48
+ print("βœ… Text processing test passed")
49
+ finally:
50
+ os.unlink(temp_path)
51
+
52
+ def test_vector_store():
53
+ """Test vector store functionality"""
54
+ print("πŸ” Testing vector store...")
55
+
56
+ processor = DocumentProcessor()
57
+ vector_store = VectorStore(processor.embedding_model)
58
+
59
+ # Add test documents
60
+ test_chunks = [
61
+ "The Smart RAG API processes multiple document formats.",
62
+ "It uses FAISS for vector similarity search.",
63
+ "The system supports PDF, Word, and image files."
64
+ ]
65
+
66
+ vector_store.add_documents(test_chunks, "test_file", "test.txt")
67
+
68
+ # Test search
69
+ results = vector_store.search("What does the API process?", k=2)
70
+ assert len(results) > 0, "No search results found"
71
+ assert results[0]['score'] > 0, "Invalid similarity score"
72
+
73
+ print("βœ… Vector store test passed")
74
+
75
+ def test_llm_handler():
76
+ """Test LLM handler functionality"""
77
+ print("πŸ€– Testing LLM handler...")
78
+
79
+ try:
80
+ llm = LLMHandler()
81
+
82
+ # Test answer generation
83
+ question = "What is this system about?"
84
+ context = ["The Smart RAG API is a document processing system that uses AI to answer questions."]
85
+
86
+ answer = llm.generate_answer(question, context)
87
+ assert len(answer) > 10, "Answer too short"
88
+ assert "error" not in answer.lower() or "apologize" not in answer.lower(), f"Error in answer: {answer}"
89
+
90
+ print("βœ… LLM handler test passed")
91
+
92
+ except Exception as e:
93
+ print(f"⚠️ LLM handler test failed: {e}")
94
+ print("This might be due to model loading issues, but the system can still work")
95
+
96
+ def test_full_pipeline():
97
+ """Test the complete RAG pipeline"""
98
+ print("πŸ”„ Testing complete pipeline...")
99
+
100
+ # Initialize components
101
+ processor = DocumentProcessor()
102
+ vector_store = VectorStore(processor.embedding_model)
103
+
104
+ try:
105
+ llm = LLMHandler()
106
+
107
+ # Create test document
108
+ test_content = """
109
+ Smart RAG API Documentation
110
+
111
+ The Smart RAG API is an intelligent document processing system that can:
112
+ 1. Process multiple file formats (PDF, Word, Text, Images, CSV, Databases)
113
+ 2. Extract text using OCR for image-based documents
114
+ 3. Create searchable embeddings using sentence transformers
115
+ 4. Answer questions using advanced language models
116
+ 5. Provide context-aware responses with source attribution
117
+
118
+ Key features include:
119
+ - Free and open-source implementation
120
+ - No API keys required
121
+ - Runs entirely on Hugging Face infrastructure
122
+ - Supports both text and image-based queries
123
+
124
+ The system is built with Python, FastAPI/Gradio, and uses state-of-the-art
125
+ AI models from Hugging Face for all processing tasks.
126
+ """
127
+
128
+ # Create temporary file
129
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
130
+ f.write(test_content)
131
+ temp_path = f.name
132
+
133
+ try:
134
+ # Process document
135
+ chunks = processor.process_document(temp_path, '.txt')
136
+ vector_store.add_documents(chunks, "pipeline_test", "test_doc.txt")
137
+
138
+ # Test queries
139
+ test_questions = [
140
+ "What file formats does the API support?",
141
+ "What are the key features?",
142
+ "Is this system free to use?",
143
+ "What models does it use?"
144
+ ]
145
+
146
+ for question in test_questions:
147
+ print(f" Testing: {question}")
148
+
149
+ # Search
150
+ results = vector_store.search(question, k=3)
151
+ contexts = [r['text'] for r in results]
152
+
153
+ # Generate answer
154
+ answer = llm.generate_answer(question, contexts)
155
+
156
+ print(f" Answer: {answer[:100]}...")
157
+ print()
158
+
159
+ finally:
160
+ os.unlink(temp_path)
161
+
162
+ print("βœ… Full pipeline test passed")
163
+
164
+ except Exception as e:
165
+ print(f"❌ Full pipeline test failed: {e}")
166
+
167
+ def run_all_tests():
168
+ """Run all tests"""
169
+ print("πŸš€ Starting Smart RAG API Tests\n")
170
+
171
+ try:
172
+ test_system_setup()
173
+ print()
174
+
175
+ test_document_processing()
176
+ print()
177
+
178
+ test_vector_store()
179
+ print()
180
+
181
+ test_llm_handler()
182
+ print()
183
+
184
+ test_full_pipeline()
185
+ print()
186
+
187
+ print("πŸŽ‰ All tests completed!")
188
+
189
+ except Exception as e:
190
+ print(f"❌ Test failed: {e}")
191
+ import traceback
192
+ traceback.print_exc()
193
+
194
+ if __name__ == "__main__":
195
+ run_all_tests()