Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| # Test Script for Hugging Face Deployment | |
| This script provides comprehensive testing for the RAG system deployment on Hugging Face Spaces. | |
| ## Overview | |
| The test script validates all components required for successful deployment: | |
| - Package imports and dependencies | |
| - Model loading capabilities | |
| - RAG system functionality | |
| - PDF processing components | |
| - Streamlit application integration | |
| ## Test Categories | |
| 1. **Import Tests**: Verify all required packages can be imported | |
| 2. **Model Tests**: Check if AI models can be loaded successfully | |
| 3. **Component Tests**: Validate RAG system and PDF processor functionality | |
| 4. **Integration Tests**: Ensure Streamlit app can be imported | |
| 5. **File Structure Tests**: Confirm all required files are present | |
| 6. **Requirements Tests**: Validate dependencies are properly specified | |
| ## Usage | |
| Run the script to check deployment readiness: | |
| ```bash | |
| python test_deployment.py | |
| ``` | |
| ## Expected Output | |
| The script provides detailed feedback on each test: | |
| - β PASS: Component is ready for deployment | |
| - β FAIL: Component needs attention before deployment | |
| - β οΈ WARNING: Optional component missing but not critical | |
| """ | |
| import os | |
| import sys | |
| import tempfile | |
| from pathlib import Path | |
| def test_imports(): | |
| """ | |
| Test if all required packages can be imported successfully | |
| This function checks that all essential dependencies are available: | |
| - Streamlit for the web interface | |
| - PyTorch for deep learning models | |
| - Transformers for language models | |
| - Sentence Transformers for embeddings | |
| - FAISS for vector search | |
| - Rank BM25 for sparse retrieval | |
| - PyPDF for document processing | |
| Returns: | |
| bool: True if all imports succeed, False otherwise | |
| """ | |
| print("π Testing imports...") | |
| # Test Streamlit import (core web framework) | |
| try: | |
| import streamlit | |
| print(f"β Streamlit: {streamlit.__version__}") | |
| except ImportError as e: | |
| print(f"β Streamlit import failed: {e}") | |
| return False | |
| # Test PyTorch import (deep learning framework) | |
| try: | |
| import torch | |
| print(f"β PyTorch: {torch.__version__}") | |
| except ImportError as e: | |
| print(f"β PyTorch import failed: {e}") | |
| return False | |
| # Test Transformers import (Hugging Face models) | |
| try: | |
| import transformers | |
| print(f"β Transformers: {transformers.__version__}") | |
| except ImportError as e: | |
| print(f"β Transformers import failed: {e}") | |
| return False | |
| # Test Sentence Transformers import (embeddings) | |
| try: | |
| import sentence_transformers | |
| print(f"β Sentence Transformers: {sentence_transformers.__version__}") | |
| except ImportError as e: | |
| print(f"β Sentence Transformers import failed: {e}") | |
| return False | |
| # Test FAISS import (vector search) | |
| try: | |
| import faiss | |
| print(f"β FAISS: {faiss.__version__}") | |
| except ImportError as e: | |
| print(f"β FAISS import failed: {e}") | |
| return False | |
| # Test Rank BM25 import (sparse retrieval) | |
| try: | |
| import rank_bm25 | |
| print("β Rank BM25") | |
| except ImportError as e: | |
| print(f"β Rank BM25 import failed: {e}") | |
| return False | |
| # Test PyPDF import (PDF processing) | |
| try: | |
| import pypdf | |
| print(f"β PyPDF: {pypdf.__version__}") | |
| except ImportError as e: | |
| print(f"β PyPDF import failed: {e}") | |
| return False | |
| return True | |
| def test_rag_system(): | |
| """ | |
| Test the RAG system initialization and basic functionality | |
| This function validates: | |
| - RAG system can be instantiated | |
| - System statistics can be retrieved | |
| - Basic system configuration is working | |
| Returns: | |
| bool: True if RAG system tests pass, False otherwise | |
| """ | |
| print("\nπ Testing RAG system...") | |
| try: | |
| from rag_system import SimpleRAGSystem | |
| # Test RAG system initialization | |
| rag = SimpleRAGSystem() | |
| print("β RAG system initialized") | |
| # Test statistics retrieval | |
| stats = rag.get_stats() | |
| print(f"β Stats retrieved: {stats}") | |
| return True | |
| except Exception as e: | |
| print(f"β RAG system test failed: {e}") | |
| return False | |
| def test_pdf_processor(): | |
| """ | |
| Test the PDF processor functionality | |
| This function validates: | |
| - PDF processor can be instantiated | |
| - Query preprocessing works correctly | |
| - Basic text processing capabilities | |
| Returns: | |
| bool: True if PDF processor tests pass, False otherwise | |
| """ | |
| print("\nπ Testing PDF processor...") | |
| try: | |
| from pdf_processor import SimplePDFProcessor | |
| # Test PDF processor initialization | |
| processor = SimplePDFProcessor() | |
| print("β PDF processor initialized") | |
| # Test query preprocessing functionality | |
| processed_query = processor.preprocess_query("What is the revenue?") | |
| print(f"β Query preprocessing: '{processed_query}'") | |
| return True | |
| except Exception as e: | |
| print(f"β PDF processor test failed: {e}") | |
| return False | |
| def test_model_loading(): | |
| """ | |
| Test if AI models can be loaded successfully | |
| This function validates: | |
| - Sentence transformer model loading | |
| - Language model tokenizer loading | |
| - Language model loading with CPU configuration | |
| - Fallback model capabilities | |
| Returns: | |
| bool: True if model loading tests pass, False otherwise | |
| """ | |
| print("\nπ Testing model loading...") | |
| try: | |
| from sentence_transformers import SentenceTransformer | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| # Test embedding model loading | |
| embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
| print("β Embedding model loaded") | |
| # Test tokenizer loading | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| "Qwen/Qwen2.5-1.5B-Instruct", trust_remote_code=True | |
| ) | |
| print("β Tokenizer loaded") | |
| # Test model loading with CPU configuration | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "Qwen/Qwen2.5-1.5B-Instruct", | |
| trust_remote_code=True, | |
| torch_dtype="auto", | |
| device_map="cpu", | |
| ) | |
| print("β Generative model loaded") | |
| return True | |
| except Exception as e: | |
| print(f"β Model loading failed: {e}") | |
| return False | |
| def test_streamlit_app(): | |
| """ | |
| Test if Streamlit app can be imported and initialized | |
| This function validates: | |
| - Main app.py can be imported | |
| - No critical import errors in the application | |
| - Basic app structure is correct | |
| Returns: | |
| bool: True if Streamlit app tests pass, False otherwise | |
| """ | |
| print("\nπ Testing Streamlit app...") | |
| try: | |
| # Test if app.py can be imported | |
| import app | |
| print("β Streamlit app imported successfully") | |
| return True | |
| except Exception as e: | |
| print(f"β Streamlit app test failed: {e}") | |
| return False | |
| def test_file_structure(): | |
| """ | |
| Test if all required files exist in the project | |
| This function checks for essential files: | |
| - Main application files | |
| - Configuration files | |
| - Documentation files | |
| Returns: | |
| bool: True if all required files exist, False otherwise | |
| """ | |
| print("\nπ Testing file structure...") | |
| # List of required files for deployment | |
| required_files = [ | |
| "app.py", # Main Streamlit application | |
| "rag_system.py", # Core RAG system | |
| "pdf_processor.py", # PDF processing utilities | |
| "requirements.txt", # Python dependencies | |
| "README.md", # Project documentation | |
| ] | |
| missing_files = [] | |
| for file in required_files: | |
| if os.path.exists(file): | |
| print(f"β {file}") | |
| else: | |
| print(f"β {file} (missing)") | |
| missing_files.append(file) | |
| if missing_files: | |
| print(f"β Missing files: {missing_files}") | |
| return False | |
| return True | |
| def test_requirements(): | |
| """ | |
| Test if requirements.txt contains all essential packages | |
| This function validates: | |
| - Essential packages are listed | |
| - Package versions are specified | |
| - No obvious missing dependencies | |
| Returns: | |
| bool: True if requirements are valid, False otherwise | |
| """ | |
| print("\nπ Testing requirements.txt...") | |
| try: | |
| with open("requirements.txt", "r") as f: | |
| requirements = f.read() | |
| # List of essential packages that must be present | |
| essential_packages = [ | |
| "streamlit", # Web framework | |
| "torch", # Deep learning | |
| "transformers", # Language models | |
| "sentence-transformers", # Embeddings | |
| "faiss-cpu", # Vector search | |
| "rank-bm25", # Sparse retrieval | |
| "pypdf", # PDF processing | |
| ] | |
| missing_packages = [] | |
| for package in essential_packages: | |
| if package in requirements: | |
| print(f"β {package}") | |
| else: | |
| print(f"β {package} (missing)") | |
| missing_packages.append(package) | |
| if missing_packages: | |
| print(f"β Missing packages: {missing_packages}") | |
| return False | |
| return True | |
| except Exception as e: | |
| print(f"β Requirements test failed: {e}") | |
| return False | |
| def main(): | |
| """ | |
| Run all deployment tests and provide comprehensive feedback | |
| This function: | |
| 1. Executes all test categories | |
| 2. Tracks test results | |
| 3. Provides summary statistics | |
| 4. Gives deployment recommendations | |
| The tests are designed to catch common deployment issues early. | |
| """ | |
| print("π Hugging Face Deployment Test\n") | |
| # Define all test functions with descriptive names | |
| tests = [ | |
| ("File Structure", test_file_structure), | |
| ("Requirements", test_requirements), | |
| ("Imports", test_imports), | |
| ("Model Loading", test_model_loading), | |
| ("PDF Processor", test_pdf_processor), | |
| ("RAG System", test_rag_system), | |
| ("Streamlit App", test_streamlit_app), | |
| ] | |
| # Execute all tests and collect results | |
| results = [] | |
| for test_name, test_func in tests: | |
| try: | |
| result = test_func() | |
| results.append((test_name, result)) | |
| except Exception as e: | |
| print(f"β {test_name} test failed with exception: {e}") | |
| results.append((test_name, False)) | |
| # ============================================================================= | |
| # RESULTS SUMMARY | |
| # ============================================================================= | |
| # Display comprehensive test results | |
| print("\n" + "=" * 50) | |
| print("π Test Results Summary") | |
| print("=" * 50) | |
| passed = 0 | |
| total = len(results) | |
| # Show individual test results | |
| for test_name, result in results: | |
| status = "β PASS" if result else "β FAIL" | |
| print(f"{test_name:20} {status}") | |
| if result: | |
| passed += 1 | |
| # Display overall statistics | |
| print(f"\nOverall: {passed}/{total} tests passed") | |
| # ============================================================================= | |
| # DEPLOYMENT RECOMMENDATIONS | |
| # ============================================================================= | |
| if passed == total: | |
| print("π All tests passed! Ready for Hugging Face deployment.") | |
| print("\nNext steps:") | |
| print("1. Create a new Hugging Face Space") | |
| print("2. Upload all files from this directory") | |
| print("3. Set the SDK to 'Docker'") | |
| print("4. Deploy and test your RAG system!") | |
| else: | |
| print("β οΈ Some tests failed. Please fix the issues before deployment.") | |
| print("\nTroubleshooting:") | |
| print("1. Install missing dependencies: pip install -r requirements.txt") | |
| print("2. Check file permissions and paths") | |
| print("3. Verify model download permissions") | |
| print("4. Test locally first: streamlit run app.py") | |
| # ============================================================================= | |
| # SCRIPT ENTRY POINT | |
| # ============================================================================= | |
| if __name__ == "__main__": | |
| main() | |