#!/usr/bin/env python3 """ # Test Script for Hugging Face Deployment This script provides comprehensive testing for the RAG system deployment on Hugging Face Spaces. ## Overview The test script validates all components required for successful deployment: - Package imports and dependencies - Model loading capabilities - RAG system functionality - PDF processing components - Streamlit application integration ## Test Categories 1. **Import Tests**: Verify all required packages can be imported 2. **Model Tests**: Check if AI models can be loaded successfully 3. **Component Tests**: Validate RAG system and PDF processor functionality 4. **Integration Tests**: Ensure Streamlit app can be imported 5. **File Structure Tests**: Confirm all required files are present 6. **Requirements Tests**: Validate dependencies are properly specified ## Usage Run the script to check deployment readiness: ```bash python test_deployment.py ``` ## Expected Output The script provides detailed feedback on each test: - āœ… PASS: Component is ready for deployment - āŒ FAIL: Component needs attention before deployment - āš ļø WARNING: Optional component missing but not critical """ import os import sys import tempfile from pathlib import Path def test_imports(): """ Test if all required packages can be imported successfully This function checks that all essential dependencies are available: - Streamlit for the web interface - PyTorch for deep learning models - Transformers for language models - Sentence Transformers for embeddings - FAISS for vector search - Rank BM25 for sparse retrieval - PyPDF for document processing Returns: bool: True if all imports succeed, False otherwise """ print("šŸ” Testing imports...") # Test Streamlit import (core web framework) try: import streamlit print(f"āœ… Streamlit: {streamlit.__version__}") except ImportError as e: print(f"āŒ Streamlit import failed: {e}") return False # Test PyTorch import (deep learning framework) try: import torch print(f"āœ… PyTorch: {torch.__version__}") except ImportError as e: print(f"āŒ PyTorch import failed: {e}") return False # Test Transformers import (Hugging Face models) try: import transformers print(f"āœ… Transformers: {transformers.__version__}") except ImportError as e: print(f"āŒ Transformers import failed: {e}") return False # Test Sentence Transformers import (embeddings) try: import sentence_transformers print(f"āœ… Sentence Transformers: {sentence_transformers.__version__}") except ImportError as e: print(f"āŒ Sentence Transformers import failed: {e}") return False # Test FAISS import (vector search) try: import faiss print(f"āœ… FAISS: {faiss.__version__}") except ImportError as e: print(f"āŒ FAISS import failed: {e}") return False # Test Rank BM25 import (sparse retrieval) try: import rank_bm25 print("āœ… Rank BM25") except ImportError as e: print(f"āŒ Rank BM25 import failed: {e}") return False # Test PyPDF import (PDF processing) try: import pypdf print(f"āœ… PyPDF: {pypdf.__version__}") except ImportError as e: print(f"āŒ PyPDF import failed: {e}") return False return True def test_rag_system(): """ Test the RAG system initialization and basic functionality This function validates: - RAG system can be instantiated - System statistics can be retrieved - Basic system configuration is working Returns: bool: True if RAG system tests pass, False otherwise """ print("\nšŸ” Testing RAG system...") try: from rag_system import SimpleRAGSystem # Test RAG system initialization rag = SimpleRAGSystem() print("āœ… RAG system initialized") # Test statistics retrieval stats = rag.get_stats() print(f"āœ… Stats retrieved: {stats}") return True except Exception as e: print(f"āŒ RAG system test failed: {e}") return False def test_pdf_processor(): """ Test the PDF processor functionality This function validates: - PDF processor can be instantiated - Query preprocessing works correctly - Basic text processing capabilities Returns: bool: True if PDF processor tests pass, False otherwise """ print("\nšŸ” Testing PDF processor...") try: from pdf_processor import SimplePDFProcessor # Test PDF processor initialization processor = SimplePDFProcessor() print("āœ… PDF processor initialized") # Test query preprocessing functionality processed_query = processor.preprocess_query("What is the revenue?") print(f"āœ… Query preprocessing: '{processed_query}'") return True except Exception as e: print(f"āŒ PDF processor test failed: {e}") return False def test_model_loading(): """ Test if AI models can be loaded successfully This function validates: - Sentence transformer model loading - Language model tokenizer loading - Language model loading with CPU configuration - Fallback model capabilities Returns: bool: True if model loading tests pass, False otherwise """ print("\nšŸ” Testing model loading...") try: from sentence_transformers import SentenceTransformer from transformers import AutoTokenizer, AutoModelForCausalLM # Test embedding model loading embedder = SentenceTransformer("all-MiniLM-L6-v2") print("āœ… Embedding model loaded") # Test tokenizer loading tokenizer = AutoTokenizer.from_pretrained( "Qwen/Qwen2.5-1.5B-Instruct", trust_remote_code=True ) print("āœ… Tokenizer loaded") # Test model loading with CPU configuration model = AutoModelForCausalLM.from_pretrained( "Qwen/Qwen2.5-1.5B-Instruct", trust_remote_code=True, torch_dtype="auto", device_map="cpu", ) print("āœ… Generative model loaded") return True except Exception as e: print(f"āŒ Model loading failed: {e}") return False def test_streamlit_app(): """ Test if Streamlit app can be imported and initialized This function validates: - Main app.py can be imported - No critical import errors in the application - Basic app structure is correct Returns: bool: True if Streamlit app tests pass, False otherwise """ print("\nšŸ” Testing Streamlit app...") try: # Test if app.py can be imported import app print("āœ… Streamlit app imported successfully") return True except Exception as e: print(f"āŒ Streamlit app test failed: {e}") return False def test_file_structure(): """ Test if all required files exist in the project This function checks for essential files: - Main application files - Configuration files - Documentation files Returns: bool: True if all required files exist, False otherwise """ print("\nšŸ” Testing file structure...") # List of required files for deployment required_files = [ "app.py", # Main Streamlit application "rag_system.py", # Core RAG system "pdf_processor.py", # PDF processing utilities "requirements.txt", # Python dependencies "README.md", # Project documentation ] missing_files = [] for file in required_files: if os.path.exists(file): print(f"āœ… {file}") else: print(f"āŒ {file} (missing)") missing_files.append(file) if missing_files: print(f"āŒ Missing files: {missing_files}") return False return True def test_requirements(): """ Test if requirements.txt contains all essential packages This function validates: - Essential packages are listed - Package versions are specified - No obvious missing dependencies Returns: bool: True if requirements are valid, False otherwise """ print("\nšŸ” Testing requirements.txt...") try: with open("requirements.txt", "r") as f: requirements = f.read() # List of essential packages that must be present essential_packages = [ "streamlit", # Web framework "torch", # Deep learning "transformers", # Language models "sentence-transformers", # Embeddings "faiss-cpu", # Vector search "rank-bm25", # Sparse retrieval "pypdf", # PDF processing ] missing_packages = [] for package in essential_packages: if package in requirements: print(f"āœ… {package}") else: print(f"āŒ {package} (missing)") missing_packages.append(package) if missing_packages: print(f"āŒ Missing packages: {missing_packages}") return False return True except Exception as e: print(f"āŒ Requirements test failed: {e}") return False def main(): """ Run all deployment tests and provide comprehensive feedback This function: 1. Executes all test categories 2. Tracks test results 3. Provides summary statistics 4. Gives deployment recommendations The tests are designed to catch common deployment issues early. """ print("šŸš€ Hugging Face Deployment Test\n") # Define all test functions with descriptive names tests = [ ("File Structure", test_file_structure), ("Requirements", test_requirements), ("Imports", test_imports), ("Model Loading", test_model_loading), ("PDF Processor", test_pdf_processor), ("RAG System", test_rag_system), ("Streamlit App", test_streamlit_app), ] # Execute all tests and collect results results = [] for test_name, test_func in tests: try: result = test_func() results.append((test_name, result)) except Exception as e: print(f"āŒ {test_name} test failed with exception: {e}") results.append((test_name, False)) # ============================================================================= # RESULTS SUMMARY # ============================================================================= # Display comprehensive test results print("\n" + "=" * 50) print("šŸ“Š Test Results Summary") print("=" * 50) passed = 0 total = len(results) # Show individual test results for test_name, result in results: status = "āœ… PASS" if result else "āŒ FAIL" print(f"{test_name:20} {status}") if result: passed += 1 # Display overall statistics print(f"\nOverall: {passed}/{total} tests passed") # ============================================================================= # DEPLOYMENT RECOMMENDATIONS # ============================================================================= if passed == total: print("šŸŽ‰ All tests passed! Ready for Hugging Face deployment.") print("\nNext steps:") print("1. Create a new Hugging Face Space") print("2. Upload all files from this directory") print("3. Set the SDK to 'Docker'") print("4. Deploy and test your RAG system!") else: print("āš ļø Some tests failed. Please fix the issues before deployment.") print("\nTroubleshooting:") print("1. Install missing dependencies: pip install -r requirements.txt") print("2. Check file permissions and paths") print("3. Verify model download permissions") print("4. Test locally first: streamlit run app.py") # ============================================================================= # SCRIPT ENTRY POINT # ============================================================================= if __name__ == "__main__": main()