Spaces:

sinhapiyush86
/

convAI

Sleeping

File size: 12,368 Bytes

#!/usr/bin/env python3
"""
# Test Script for Hugging Face Deployment

This script provides comprehensive testing for the RAG system deployment on Hugging Face Spaces.

## Overview

The test script validates all components required for successful deployment:
- Package imports and dependencies
- Model loading capabilities
- RAG system functionality
- PDF processing components
- Streamlit application integration

## Test Categories

1. **Import Tests**: Verify all required packages can be imported
2. **Model Tests**: Check if AI models can be loaded successfully
3. **Component Tests**: Validate RAG system and PDF processor functionality
4. **Integration Tests**: Ensure Streamlit app can be imported
5. **File Structure Tests**: Confirm all required files are present
6. **Requirements Tests**: Validate dependencies are properly specified

## Usage

Run the script to check deployment readiness:
```bash
python test_deployment.py
```

## Expected Output

The script provides detailed feedback on each test:
- ✅ PASS: Component is ready for deployment
- ❌ FAIL: Component needs attention before deployment
- ⚠️ WARNING: Optional component missing but not critical
"""

import os
import sys
import tempfile
from pathlib import Path


def test_imports():
    """
    Test if all required packages can be imported successfully

    This function checks that all essential dependencies are available:
    - Streamlit for the web interface
    - PyTorch for deep learning models
    - Transformers for language models
    - Sentence Transformers for embeddings
    - FAISS for vector search
    - Rank BM25 for sparse retrieval
    - PyPDF for document processing

    Returns:
        bool: True if all imports succeed, False otherwise
    """
    print("🔍 Testing imports...")

    # Test Streamlit import (core web framework)
    try:
        import streamlit

        print(f"✅ Streamlit: {streamlit.__version__}")
    except ImportError as e:
        print(f"❌ Streamlit import failed: {e}")
        return False

    # Test PyTorch import (deep learning framework)
    try:
        import torch

        print(f"✅ PyTorch: {torch.__version__}")
    except ImportError as e:
        print(f"❌ PyTorch import failed: {e}")
        return False

    # Test Transformers import (Hugging Face models)
    try:
        import transformers

        print(f"✅ Transformers: {transformers.__version__}")
    except ImportError as e:
        print(f"❌ Transformers import failed: {e}")
        return False

    # Test Sentence Transformers import (embeddings)
    try:
        import sentence_transformers

        print(f"✅ Sentence Transformers: {sentence_transformers.__version__}")
    except ImportError as e:
        print(f"❌ Sentence Transformers import failed: {e}")
        return False

    # Test FAISS import (vector search)
    try:
        import faiss

        print(f"✅ FAISS: {faiss.__version__}")
    except ImportError as e:
        print(f"❌ FAISS import failed: {e}")
        return False

    # Test Rank BM25 import (sparse retrieval)
    try:
        import rank_bm25

        print("✅ Rank BM25")
    except ImportError as e:
        print(f"❌ Rank BM25 import failed: {e}")
        return False

    # Test PyPDF import (PDF processing)
    try:
        import pypdf

        print(f"✅ PyPDF: {pypdf.__version__}")
    except ImportError as e:
        print(f"❌ PyPDF import failed: {e}")
        return False

    return True


def test_rag_system():
    """
    Test the RAG system initialization and basic functionality

    This function validates:
    - RAG system can be instantiated
    - System statistics can be retrieved
    - Basic system configuration is working

    Returns:
        bool: True if RAG system tests pass, False otherwise
    """
    print("\n🔍 Testing RAG system...")

    try:
        from rag_system import SimpleRAGSystem

        # Test RAG system initialization
        rag = SimpleRAGSystem()
        print("✅ RAG system initialized")

        # Test statistics retrieval
        stats = rag.get_stats()
        print(f"✅ Stats retrieved: {stats}")

        return True

    except Exception as e:
        print(f"❌ RAG system test failed: {e}")
        return False


def test_pdf_processor():
    """
    Test the PDF processor functionality

    This function validates:
    - PDF processor can be instantiated
    - Query preprocessing works correctly
    - Basic text processing capabilities

    Returns:
        bool: True if PDF processor tests pass, False otherwise
    """
    print("\n🔍 Testing PDF processor...")

    try:
        from pdf_processor import SimplePDFProcessor

        # Test PDF processor initialization
        processor = SimplePDFProcessor()
        print("✅ PDF processor initialized")

        # Test query preprocessing functionality
        processed_query = processor.preprocess_query("What is the revenue?")
        print(f"✅ Query preprocessing: '{processed_query}'")

        return True

    except Exception as e:
        print(f"❌ PDF processor test failed: {e}")
        return False


def test_model_loading():
    """
    Test if AI models can be loaded successfully

    This function validates:
    - Sentence transformer model loading
    - Language model tokenizer loading
    - Language model loading with CPU configuration
    - Fallback model capabilities

    Returns:
        bool: True if model loading tests pass, False otherwise
    """
    print("\n🔍 Testing model loading...")

    try:
        from sentence_transformers import SentenceTransformer
        from transformers import AutoTokenizer, AutoModelForCausalLM

        # Test embedding model loading
        embedder = SentenceTransformer("all-MiniLM-L6-v2")
        print("✅ Embedding model loaded")

        # Test tokenizer loading
        tokenizer = AutoTokenizer.from_pretrained(
            "Qwen/Qwen2.5-1.5B-Instruct", trust_remote_code=True
        )
        print("✅ Tokenizer loaded")

        # Test model loading with CPU configuration
        model = AutoModelForCausalLM.from_pretrained(
            "Qwen/Qwen2.5-1.5B-Instruct",
            trust_remote_code=True,
            torch_dtype="auto",
            device_map="cpu",
        )
        print("✅ Generative model loaded")

        return True

    except Exception as e:
        print(f"❌ Model loading failed: {e}")
        return False


def test_streamlit_app():
    """
    Test if Streamlit app can be imported and initialized

    This function validates:
    - Main app.py can be imported
    - No critical import errors in the application
    - Basic app structure is correct

    Returns:
        bool: True if Streamlit app tests pass, False otherwise
    """
    print("\n🔍 Testing Streamlit app...")

    try:
        # Test if app.py can be imported
        import app

        print("✅ Streamlit app imported successfully")
        return True

    except Exception as e:
        print(f"❌ Streamlit app test failed: {e}")
        return False


def test_file_structure():
    """
    Test if all required files exist in the project

    This function checks for essential files:
    - Main application files
    - Configuration files
    - Documentation files

    Returns:
        bool: True if all required files exist, False otherwise
    """
    print("\n🔍 Testing file structure...")

    # List of required files for deployment
    required_files = [
        "app.py",  # Main Streamlit application
        "rag_system.py",  # Core RAG system
        "pdf_processor.py",  # PDF processing utilities
        "requirements.txt",  # Python dependencies
        "README.md",  # Project documentation
    ]

    missing_files = []
    for file in required_files:
        if os.path.exists(file):
            print(f"✅ {file}")
        else:
            print(f"❌ {file} (missing)")
            missing_files.append(file)

    if missing_files:
        print(f"❌ Missing files: {missing_files}")
        return False

    return True


def test_requirements():
    """
    Test if requirements.txt contains all essential packages

    This function validates:
    - Essential packages are listed
    - Package versions are specified
    - No obvious missing dependencies

    Returns:
        bool: True if requirements are valid, False otherwise
    """
    print("\n🔍 Testing requirements.txt...")

    try:
        with open("requirements.txt", "r") as f:
            requirements = f.read()

        # List of essential packages that must be present
        essential_packages = [
            "streamlit",  # Web framework
            "torch",  # Deep learning
            "transformers",  # Language models
            "sentence-transformers",  # Embeddings
            "faiss-cpu",  # Vector search
            "rank-bm25",  # Sparse retrieval
            "pypdf",  # PDF processing
        ]

        missing_packages = []
        for package in essential_packages:
            if package in requirements:
                print(f"✅ {package}")
            else:
                print(f"❌ {package} (missing)")
                missing_packages.append(package)

        if missing_packages:
            print(f"❌ Missing packages: {missing_packages}")
            return False

        return True

    except Exception as e:
        print(f"❌ Requirements test failed: {e}")
        return False


def main():
    """
    Run all deployment tests and provide comprehensive feedback

    This function:
    1. Executes all test categories
    2. Tracks test results
    3. Provides summary statistics
    4. Gives deployment recommendations

    The tests are designed to catch common deployment issues early.
    """
    print("🚀 Hugging Face Deployment Test\n")

    # Define all test functions with descriptive names
    tests = [
        ("File Structure", test_file_structure),
        ("Requirements", test_requirements),
        ("Imports", test_imports),
        ("Model Loading", test_model_loading),
        ("PDF Processor", test_pdf_processor),
        ("RAG System", test_rag_system),
        ("Streamlit App", test_streamlit_app),
    ]

    # Execute all tests and collect results
    results = []
    for test_name, test_func in tests:
        try:
            result = test_func()
            results.append((test_name, result))
        except Exception as e:
            print(f"❌ {test_name} test failed with exception: {e}")
            results.append((test_name, False))

    # =============================================================================
    # RESULTS SUMMARY
    # =============================================================================

    # Display comprehensive test results
    print("\n" + "=" * 50)
    print("📊 Test Results Summary")
    print("=" * 50)

    passed = 0
    total = len(results)

    # Show individual test results
    for test_name, result in results:
        status = "✅ PASS" if result else "❌ FAIL"
        print(f"{test_name:20} {status}")
        if result:
            passed += 1

    # Display overall statistics
    print(f"\nOverall: {passed}/{total} tests passed")

    # =============================================================================
    # DEPLOYMENT RECOMMENDATIONS
    # =============================================================================

    if passed == total:
        print("🎉 All tests passed! Ready for Hugging Face deployment.")
        print("\nNext steps:")
        print("1. Create a new Hugging Face Space")
        print("2. Upload all files from this directory")
        print("3. Set the SDK to 'Docker'")
        print("4. Deploy and test your RAG system!")
    else:
        print("⚠️  Some tests failed. Please fix the issues before deployment.")
        print("\nTroubleshooting:")
        print("1. Install missing dependencies: pip install -r requirements.txt")
        print("2. Check file permissions and paths")
        print("3. Verify model download permissions")
        print("4. Test locally first: streamlit run app.py")


# =============================================================================
# SCRIPT ENTRY POINT
# =============================================================================

if __name__ == "__main__":
    main()