Spaces:

sinhapiyush86
/

convAI

Sleeping

App Files Files Community

convAI / test_deployment.py

sinhapiyush86

Upload 15 files

afad319 verified 6 months ago

raw

history blame contribute delete

12.4 kB

	#!/usr/bin/env python3
	"""
	# Test Script for Hugging Face Deployment

	This script provides comprehensive testing for the RAG system deployment on Hugging Face Spaces.

	## Overview

	The test script validates all components required for successful deployment:
	- Package imports and dependencies
	- Model loading capabilities
	- RAG system functionality
	- PDF processing components
	- Streamlit application integration

	## Test Categories

	1. Import Tests: Verify all required packages can be imported
	2. Model Tests: Check if AI models can be loaded successfully
	3. Component Tests: Validate RAG system and PDF processor functionality
	4. Integration Tests: Ensure Streamlit app can be imported
	5. File Structure Tests: Confirm all required files are present
	6. Requirements Tests: Validate dependencies are properly specified

	## Usage

	Run the script to check deployment readiness:
	```bash
	python test_deployment.py
	```

	## Expected Output

	The script provides detailed feedback on each test:
	- ✅ PASS: Component is ready for deployment
	- ❌ FAIL: Component needs attention before deployment
	- ⚠️ WARNING: Optional component missing but not critical
	"""

	import os
	import sys
	import tempfile
	from pathlib import Path


	def test_imports():
	"""
	Test if all required packages can be imported successfully

	This function checks that all essential dependencies are available:
	- Streamlit for the web interface
	- PyTorch for deep learning models
	- Transformers for language models
	- Sentence Transformers for embeddings
	- FAISS for vector search
	- Rank BM25 for sparse retrieval
	- PyPDF for document processing

	Returns:
	bool: True if all imports succeed, False otherwise
	"""
	print("🔍 Testing imports...")

	# Test Streamlit import (core web framework)
	try:
	import streamlit

	print(f"✅ Streamlit: {streamlit.__version__}")
	except ImportError as e:
	print(f"❌ Streamlit import failed: {e}")
	return False

	# Test PyTorch import (deep learning framework)
	try:
	import torch

	print(f"✅ PyTorch: {torch.__version__}")
	except ImportError as e:
	print(f"❌ PyTorch import failed: {e}")
	return False

	# Test Transformers import (Hugging Face models)
	try:
	import transformers

	print(f"✅ Transformers: {transformers.__version__}")
	except ImportError as e:
	print(f"❌ Transformers import failed: {e}")
	return False

	# Test Sentence Transformers import (embeddings)
	try:
	import sentence_transformers

	print(f"✅ Sentence Transformers: {sentence_transformers.__version__}")
	except ImportError as e:
	print(f"❌ Sentence Transformers import failed: {e}")
	return False

	# Test FAISS import (vector search)
	try:
	import faiss

	print(f"✅ FAISS: {faiss.__version__}")
	except ImportError as e:
	print(f"❌ FAISS import failed: {e}")
	return False

	# Test Rank BM25 import (sparse retrieval)
	try:
	import rank_bm25

	print("✅ Rank BM25")
	except ImportError as e:
	print(f"❌ Rank BM25 import failed: {e}")
	return False

	# Test PyPDF import (PDF processing)
	try:
	import pypdf

	print(f"✅ PyPDF: {pypdf.__version__}")
	except ImportError as e:
	print(f"❌ PyPDF import failed: {e}")
	return False

	return True


	def test_rag_system():
	"""
	Test the RAG system initialization and basic functionality

	This function validates:
	- RAG system can be instantiated
	- System statistics can be retrieved
	- Basic system configuration is working

	Returns:
	bool: True if RAG system tests pass, False otherwise
	"""
	print("\n🔍 Testing RAG system...")

	try:
	from rag_system import SimpleRAGSystem

	# Test RAG system initialization
	rag = SimpleRAGSystem()
	print("✅ RAG system initialized")

	# Test statistics retrieval
	stats = rag.get_stats()
	print(f"✅ Stats retrieved: {stats}")

	return True

	except Exception as e:
	print(f"❌ RAG system test failed: {e}")
	return False


	def test_pdf_processor():
	"""
	Test the PDF processor functionality

	This function validates:
	- PDF processor can be instantiated
	- Query preprocessing works correctly
	- Basic text processing capabilities

	Returns:
	bool: True if PDF processor tests pass, False otherwise
	"""
	print("\n🔍 Testing PDF processor...")

	try:
	from pdf_processor import SimplePDFProcessor

	# Test PDF processor initialization
	processor = SimplePDFProcessor()
	print("✅ PDF processor initialized")

	# Test query preprocessing functionality
	processed_query = processor.preprocess_query("What is the revenue?")
	print(f"✅ Query preprocessing: '{processed_query}'")

	return True

	except Exception as e:
	print(f"❌ PDF processor test failed: {e}")
	return False


	def test_model_loading():
	"""
	Test if AI models can be loaded successfully

	This function validates:
	- Sentence transformer model loading
	- Language model tokenizer loading
	- Language model loading with CPU configuration
	- Fallback model capabilities

	Returns:
	bool: True if model loading tests pass, False otherwise
	"""
	print("\n🔍 Testing model loading...")

	try:
	from sentence_transformers import SentenceTransformer
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Test embedding model loading
	embedder = SentenceTransformer("all-MiniLM-L6-v2")
	print("✅ Embedding model loaded")

	# Test tokenizer loading
	tokenizer = AutoTokenizer.from_pretrained(
	"Qwen/Qwen2.5-1.5B-Instruct", trust_remote_code=True
	)
	print("✅ Tokenizer loaded")

	# Test model loading with CPU configuration
	model = AutoModelForCausalLM.from_pretrained(
	"Qwen/Qwen2.5-1.5B-Instruct",
	trust_remote_code=True,
	torch_dtype="auto",
	device_map="cpu",
	)
	print("✅ Generative model loaded")

	return True

	except Exception as e:
	print(f"❌ Model loading failed: {e}")
	return False


	def test_streamlit_app():
	"""
	Test if Streamlit app can be imported and initialized

	This function validates:
	- Main app.py can be imported
	- No critical import errors in the application
	- Basic app structure is correct

	Returns:
	bool: True if Streamlit app tests pass, False otherwise
	"""
	print("\n🔍 Testing Streamlit app...")

	try:
	# Test if app.py can be imported
	import app

	print("✅ Streamlit app imported successfully")
	return True

	except Exception as e:
	print(f"❌ Streamlit app test failed: {e}")
	return False


	def test_file_structure():
	"""
	Test if all required files exist in the project

	This function checks for essential files:
	- Main application files
	- Configuration files
	- Documentation files

	Returns:
	bool: True if all required files exist, False otherwise
	"""
	print("\n🔍 Testing file structure...")

	# List of required files for deployment
	required_files = [
	"app.py", # Main Streamlit application
	"rag_system.py", # Core RAG system
	"pdf_processor.py", # PDF processing utilities
	"requirements.txt", # Python dependencies
	"README.md", # Project documentation
	]

	missing_files = []
	for file in required_files:
	if os.path.exists(file):
	print(f"✅ {file}")
	else:
	print(f"❌ {file} (missing)")
	missing_files.append(file)

	if missing_files:
	print(f"❌ Missing files: {missing_files}")
	return False

	return True


	def test_requirements():
	"""
	Test if requirements.txt contains all essential packages

	This function validates:
	- Essential packages are listed
	- Package versions are specified
	- No obvious missing dependencies

	Returns:
	bool: True if requirements are valid, False otherwise
	"""
	print("\n🔍 Testing requirements.txt...")

	try:
	with open("requirements.txt", "r") as f:
	requirements = f.read()

	# List of essential packages that must be present
	essential_packages = [
	"streamlit", # Web framework
	"torch", # Deep learning
	"transformers", # Language models
	"sentence-transformers", # Embeddings
	"faiss-cpu", # Vector search
	"rank-bm25", # Sparse retrieval
	"pypdf", # PDF processing
	]

	missing_packages = []
	for package in essential_packages:
	if package in requirements:
	print(f"✅ {package}")
	else:
	print(f"❌ {package} (missing)")
	missing_packages.append(package)

	if missing_packages:
	print(f"❌ Missing packages: {missing_packages}")
	return False

	return True

	except Exception as e:
	print(f"❌ Requirements test failed: {e}")
	return False


	def main():
	"""
	Run all deployment tests and provide comprehensive feedback

	This function:
	1. Executes all test categories
	2. Tracks test results
	3. Provides summary statistics
	4. Gives deployment recommendations

	The tests are designed to catch common deployment issues early.
	"""
	print("🚀 Hugging Face Deployment Test\n")

	# Define all test functions with descriptive names
	tests = [
	("File Structure", test_file_structure),
	("Requirements", test_requirements),
	("Imports", test_imports),
	("Model Loading", test_model_loading),
	("PDF Processor", test_pdf_processor),
	("RAG System", test_rag_system),
	("Streamlit App", test_streamlit_app),
	]

	# Execute all tests and collect results
	results = []
	for test_name, test_func in tests:
	try:
	result = test_func()
	results.append((test_name, result))
	except Exception as e:
	print(f"❌ {test_name} test failed with exception: {e}")
	results.append((test_name, False))

	# =============================================================================
	# RESULTS SUMMARY
	# =============================================================================

	# Display comprehensive test results
	print("\n" + "=" * 50)
	print("📊 Test Results Summary")
	print("=" * 50)

	passed = 0
	total = len(results)

	# Show individual test results
	for test_name, result in results:
	status = "✅ PASS" if result else "❌ FAIL"
	print(f"{test_name:20} {status}")
	if result:
	passed += 1

	# Display overall statistics
	print(f"\nOverall: {passed}/{total} tests passed")

	# =============================================================================
	# DEPLOYMENT RECOMMENDATIONS
	# =============================================================================

	if passed == total:
	print("🎉 All tests passed! Ready for Hugging Face deployment.")
	print("\nNext steps:")
	print("1. Create a new Hugging Face Space")
	print("2. Upload all files from this directory")
	print("3. Set the SDK to 'Docker'")
	print("4. Deploy and test your RAG system!")
	else:
	print("⚠️ Some tests failed. Please fix the issues before deployment.")
	print("\nTroubleshooting:")
	print("1. Install missing dependencies: pip install -r requirements.txt")
	print("2. Check file permissions and paths")
	print("3. Verify model download permissions")
	print("4. Test locally first: streamlit run app.py")


	# =============================================================================
	# SCRIPT ENTRY POINT
	# =============================================================================

	if __name__ == "__main__":
	main()