convAI / test_deployment.py
sinhapiyush86's picture
Upload 15 files
afad319 verified
#!/usr/bin/env python3
"""
# Test Script for Hugging Face Deployment
This script provides comprehensive testing for the RAG system deployment on Hugging Face Spaces.
## Overview
The test script validates all components required for successful deployment:
- Package imports and dependencies
- Model loading capabilities
- RAG system functionality
- PDF processing components
- Streamlit application integration
## Test Categories
1. **Import Tests**: Verify all required packages can be imported
2. **Model Tests**: Check if AI models can be loaded successfully
3. **Component Tests**: Validate RAG system and PDF processor functionality
4. **Integration Tests**: Ensure Streamlit app can be imported
5. **File Structure Tests**: Confirm all required files are present
6. **Requirements Tests**: Validate dependencies are properly specified
## Usage
Run the script to check deployment readiness:
```bash
python test_deployment.py
```
## Expected Output
The script provides detailed feedback on each test:
- βœ… PASS: Component is ready for deployment
- ❌ FAIL: Component needs attention before deployment
- ⚠️ WARNING: Optional component missing but not critical
"""
import os
import sys
import tempfile
from pathlib import Path
def test_imports():
"""
Test if all required packages can be imported successfully
This function checks that all essential dependencies are available:
- Streamlit for the web interface
- PyTorch for deep learning models
- Transformers for language models
- Sentence Transformers for embeddings
- FAISS for vector search
- Rank BM25 for sparse retrieval
- PyPDF for document processing
Returns:
bool: True if all imports succeed, False otherwise
"""
print("πŸ” Testing imports...")
# Test Streamlit import (core web framework)
try:
import streamlit
print(f"βœ… Streamlit: {streamlit.__version__}")
except ImportError as e:
print(f"❌ Streamlit import failed: {e}")
return False
# Test PyTorch import (deep learning framework)
try:
import torch
print(f"βœ… PyTorch: {torch.__version__}")
except ImportError as e:
print(f"❌ PyTorch import failed: {e}")
return False
# Test Transformers import (Hugging Face models)
try:
import transformers
print(f"βœ… Transformers: {transformers.__version__}")
except ImportError as e:
print(f"❌ Transformers import failed: {e}")
return False
# Test Sentence Transformers import (embeddings)
try:
import sentence_transformers
print(f"βœ… Sentence Transformers: {sentence_transformers.__version__}")
except ImportError as e:
print(f"❌ Sentence Transformers import failed: {e}")
return False
# Test FAISS import (vector search)
try:
import faiss
print(f"βœ… FAISS: {faiss.__version__}")
except ImportError as e:
print(f"❌ FAISS import failed: {e}")
return False
# Test Rank BM25 import (sparse retrieval)
try:
import rank_bm25
print("βœ… Rank BM25")
except ImportError as e:
print(f"❌ Rank BM25 import failed: {e}")
return False
# Test PyPDF import (PDF processing)
try:
import pypdf
print(f"βœ… PyPDF: {pypdf.__version__}")
except ImportError as e:
print(f"❌ PyPDF import failed: {e}")
return False
return True
def test_rag_system():
"""
Test the RAG system initialization and basic functionality
This function validates:
- RAG system can be instantiated
- System statistics can be retrieved
- Basic system configuration is working
Returns:
bool: True if RAG system tests pass, False otherwise
"""
print("\nπŸ” Testing RAG system...")
try:
from rag_system import SimpleRAGSystem
# Test RAG system initialization
rag = SimpleRAGSystem()
print("βœ… RAG system initialized")
# Test statistics retrieval
stats = rag.get_stats()
print(f"βœ… Stats retrieved: {stats}")
return True
except Exception as e:
print(f"❌ RAG system test failed: {e}")
return False
def test_pdf_processor():
"""
Test the PDF processor functionality
This function validates:
- PDF processor can be instantiated
- Query preprocessing works correctly
- Basic text processing capabilities
Returns:
bool: True if PDF processor tests pass, False otherwise
"""
print("\nπŸ” Testing PDF processor...")
try:
from pdf_processor import SimplePDFProcessor
# Test PDF processor initialization
processor = SimplePDFProcessor()
print("βœ… PDF processor initialized")
# Test query preprocessing functionality
processed_query = processor.preprocess_query("What is the revenue?")
print(f"βœ… Query preprocessing: '{processed_query}'")
return True
except Exception as e:
print(f"❌ PDF processor test failed: {e}")
return False
def test_model_loading():
"""
Test if AI models can be loaded successfully
This function validates:
- Sentence transformer model loading
- Language model tokenizer loading
- Language model loading with CPU configuration
- Fallback model capabilities
Returns:
bool: True if model loading tests pass, False otherwise
"""
print("\nπŸ” Testing model loading...")
try:
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM
# Test embedding model loading
embedder = SentenceTransformer("all-MiniLM-L6-v2")
print("βœ… Embedding model loaded")
# Test tokenizer loading
tokenizer = AutoTokenizer.from_pretrained(
"Qwen/Qwen2.5-1.5B-Instruct", trust_remote_code=True
)
print("βœ… Tokenizer loaded")
# Test model loading with CPU configuration
model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2.5-1.5B-Instruct",
trust_remote_code=True,
torch_dtype="auto",
device_map="cpu",
)
print("βœ… Generative model loaded")
return True
except Exception as e:
print(f"❌ Model loading failed: {e}")
return False
def test_streamlit_app():
"""
Test if Streamlit app can be imported and initialized
This function validates:
- Main app.py can be imported
- No critical import errors in the application
- Basic app structure is correct
Returns:
bool: True if Streamlit app tests pass, False otherwise
"""
print("\nπŸ” Testing Streamlit app...")
try:
# Test if app.py can be imported
import app
print("βœ… Streamlit app imported successfully")
return True
except Exception as e:
print(f"❌ Streamlit app test failed: {e}")
return False
def test_file_structure():
"""
Test if all required files exist in the project
This function checks for essential files:
- Main application files
- Configuration files
- Documentation files
Returns:
bool: True if all required files exist, False otherwise
"""
print("\nπŸ” Testing file structure...")
# List of required files for deployment
required_files = [
"app.py", # Main Streamlit application
"rag_system.py", # Core RAG system
"pdf_processor.py", # PDF processing utilities
"requirements.txt", # Python dependencies
"README.md", # Project documentation
]
missing_files = []
for file in required_files:
if os.path.exists(file):
print(f"βœ… {file}")
else:
print(f"❌ {file} (missing)")
missing_files.append(file)
if missing_files:
print(f"❌ Missing files: {missing_files}")
return False
return True
def test_requirements():
"""
Test if requirements.txt contains all essential packages
This function validates:
- Essential packages are listed
- Package versions are specified
- No obvious missing dependencies
Returns:
bool: True if requirements are valid, False otherwise
"""
print("\nπŸ” Testing requirements.txt...")
try:
with open("requirements.txt", "r") as f:
requirements = f.read()
# List of essential packages that must be present
essential_packages = [
"streamlit", # Web framework
"torch", # Deep learning
"transformers", # Language models
"sentence-transformers", # Embeddings
"faiss-cpu", # Vector search
"rank-bm25", # Sparse retrieval
"pypdf", # PDF processing
]
missing_packages = []
for package in essential_packages:
if package in requirements:
print(f"βœ… {package}")
else:
print(f"❌ {package} (missing)")
missing_packages.append(package)
if missing_packages:
print(f"❌ Missing packages: {missing_packages}")
return False
return True
except Exception as e:
print(f"❌ Requirements test failed: {e}")
return False
def main():
"""
Run all deployment tests and provide comprehensive feedback
This function:
1. Executes all test categories
2. Tracks test results
3. Provides summary statistics
4. Gives deployment recommendations
The tests are designed to catch common deployment issues early.
"""
print("πŸš€ Hugging Face Deployment Test\n")
# Define all test functions with descriptive names
tests = [
("File Structure", test_file_structure),
("Requirements", test_requirements),
("Imports", test_imports),
("Model Loading", test_model_loading),
("PDF Processor", test_pdf_processor),
("RAG System", test_rag_system),
("Streamlit App", test_streamlit_app),
]
# Execute all tests and collect results
results = []
for test_name, test_func in tests:
try:
result = test_func()
results.append((test_name, result))
except Exception as e:
print(f"❌ {test_name} test failed with exception: {e}")
results.append((test_name, False))
# =============================================================================
# RESULTS SUMMARY
# =============================================================================
# Display comprehensive test results
print("\n" + "=" * 50)
print("πŸ“Š Test Results Summary")
print("=" * 50)
passed = 0
total = len(results)
# Show individual test results
for test_name, result in results:
status = "βœ… PASS" if result else "❌ FAIL"
print(f"{test_name:20} {status}")
if result:
passed += 1
# Display overall statistics
print(f"\nOverall: {passed}/{total} tests passed")
# =============================================================================
# DEPLOYMENT RECOMMENDATIONS
# =============================================================================
if passed == total:
print("πŸŽ‰ All tests passed! Ready for Hugging Face deployment.")
print("\nNext steps:")
print("1. Create a new Hugging Face Space")
print("2. Upload all files from this directory")
print("3. Set the SDK to 'Docker'")
print("4. Deploy and test your RAG system!")
else:
print("⚠️ Some tests failed. Please fix the issues before deployment.")
print("\nTroubleshooting:")
print("1. Install missing dependencies: pip install -r requirements.txt")
print("2. Check file permissions and paths")
print("3. Verify model download permissions")
print("4. Test locally first: streamlit run app.py")
# =============================================================================
# SCRIPT ENTRY POINT
# =============================================================================
if __name__ == "__main__":
main()