Spaces:

sairika
/

Rag-based-api-task

Runtime error

App Files Files Community

Rag-based-api-task / tests /test_api.py

sairika

Create tests/test_api.py

ee566a1 verified 5 months ago

raw

history blame contribute delete

6.14 kB

	#!/usr/bin/env python3
	"""
	Test script for Smart RAG API
	"""
	import os
	import tempfile
	import sys
	from pathlib import Path

	# Add src to path
	sys.path.append('src')

	from src.document_processor import DocumentProcessor
	from src.vector_store import VectorStore
	from src.llm_handler import LLMHandler
	from src.utils import setup_directories, create_sample_files
	from config import Config

	def test_system_setup():
	"""Test system setup and dependencies"""
	print("🔧 Testing system setup...")

	config = Config()
	setup_directories(config)

	# Create sample files
	sample_dir = Path("sample_files")
	create_sample_files(sample_dir)

	print("✅ System setup test passed")

	def test_document_processing():
	"""Test document processing functionality"""
	print("📄 Testing document processing...")

	processor = DocumentProcessor()

	# Test text processing
	sample_text = "This is a test document for the Smart RAG API."
	with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
	f.write(sample_text)
	temp_path = f.name

	try:
	chunks = processor.process_document(temp_path, '.txt')
	assert len(chunks) > 0, "No chunks created"
	assert sample_text in chunks[0], "Content not preserved"
	print("✅ Text processing test passed")
	finally:
	os.unlink(temp_path)

	def test_vector_store():
	"""Test vector store functionality"""
	print("🔍 Testing vector store...")

	processor = DocumentProcessor()
	vector_store = VectorStore(processor.embedding_model)

	# Add test documents
	test_chunks = [
	"The Smart RAG API processes multiple document formats.",
	"It uses FAISS for vector similarity search.",
	"The system supports PDF, Word, and image files."
	]

	vector_store.add_documents(test_chunks, "test_file", "test.txt")

	# Test search
	results = vector_store.search("What does the API process?", k=2)
	assert len(results) > 0, "No search results found"
	assert results[0]['score'] > 0, "Invalid similarity score"

	print("✅ Vector store test passed")

	def test_llm_handler():
	"""Test LLM handler functionality"""
	print("🤖 Testing LLM handler...")

	try:
	llm = LLMHandler()

	# Test answer generation
	question = "What is this system about?"
	context = ["The Smart RAG API is a document processing system that uses AI to answer questions."]

	answer = llm.generate_answer(question, context)
	assert len(answer) > 10, "Answer too short"
	assert "error" not in answer.lower() or "apologize" not in answer.lower(), f"Error in answer: {answer}"

	print("✅ LLM handler test passed")

	except Exception as e:
	print(f"⚠️ LLM handler test failed: {e}")
	print("This might be due to model loading issues, but the system can still work")

	def test_full_pipeline():
	"""Test the complete RAG pipeline"""
	print("🔄 Testing complete pipeline...")

	# Initialize components
	processor = DocumentProcessor()
	vector_store = VectorStore(processor.embedding_model)

	try:
	llm = LLMHandler()

	# Create test document
	test_content = """
	Smart RAG API Documentation

	The Smart RAG API is an intelligent document processing system that can:
	1. Process multiple file formats (PDF, Word, Text, Images, CSV, Databases)
	2. Extract text using OCR for image-based documents
	3. Create searchable embeddings using sentence transformers
	4. Answer questions using advanced language models
	5. Provide context-aware responses with source attribution

	Key features include:
	- Free and open-source implementation
	- No API keys required
	- Runs entirely on Hugging Face infrastructure
	- Supports both text and image-based queries

	The system is built with Python, FastAPI/Gradio, and uses state-of-the-art
	AI models from Hugging Face for all processing tasks.
	"""

	# Create temporary file
	with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
	f.write(test_content)
	temp_path = f.name

	try:
	# Process document
	chunks = processor.process_document(temp_path, '.txt')
	vector_store.add_documents(chunks, "pipeline_test", "test_doc.txt")

	# Test queries
	test_questions = [
	"What file formats does the API support?",
	"What are the key features?",
	"Is this system free to use?",
	"What models does it use?"
	]

	for question in test_questions:
	print(f" Testing: {question}")

	# Search
	results = vector_store.search(question, k=3)
	contexts = [r['text'] for r in results]

	# Generate answer
	answer = llm.generate_answer(question, contexts)

	print(f" Answer: {answer[:100]}...")
	print()

	finally:
	os.unlink(temp_path)

	print("✅ Full pipeline test passed")

	except Exception as e:
	print(f"❌ Full pipeline test failed: {e}")

	def run_all_tests():
	"""Run all tests"""
	print("🚀 Starting Smart RAG API Tests\n")

	try:
	test_system_setup()
	print()

	test_document_processing()
	print()

	test_vector_store()
	print()

	test_llm_handler()
	print()

	test_full_pipeline()
	print()

	print("🎉 All tests completed!")

	except Exception as e:
	print(f"❌ Test failed: {e}")
	import traceback
	traceback.print_exc()

	if __name__ == "__main__":
	run_all_tests()