Spaces:

Betimes-Solution
/

PDFtoDocx-OCR

Sleeping

App Files Files Community

PDFtoDocx-OCR / test_setup.py

Chirapath

Upload 7 files

6063ce4 verified 4 months ago

raw

history blame contribute delete

5.45 kB

	#!/usr/bin/env python3
	"""
	Simple test script to verify the PDF OCR Service setup
	Run this to check if everything is working properly
	"""

	import sys
	import os
	from pathlib import Path

	def test_imports():
	"""Test if all required modules can be imported"""
	print("🧪 Testing imports...")

	required_imports = [
	('dotenv', 'python-dotenv'),
	('gradio', 'gradio'),
	('azure.ai.documentintelligence', 'azure-ai-documentintelligence'),
	('azure.core', 'azure-core'),
	('fitz', 'PyMuPDF'),
	('PIL', 'Pillow'),
	('cv2', 'opencv-python'),
	('numpy', 'numpy'),
	]

	optional_imports = [
	('pytesseract', 'pytesseract'),
	('docx', 'python-docx'),
	]

	all_good = True

	# Test required imports
	for module, package in required_imports:
	try:
	__import__(module)
	print(f"✅ {package}")
	except ImportError:
	print(f"❌ {package} - Run: pip install {package}")
	all_good = False

	# Test optional imports
	for module, package in optional_imports:
	try:
	__import__(module)
	print(f"✅ {package} (optional)")
	except ImportError:
	print(f"⚠️ {package} (optional) - Run: pip install {package}")

	return all_good

	def test_files():
	"""Test if all required files exist"""
	print("\n📁 Testing files...")

	required_files = ['ocr_service.py', 'backend.py', 'requirements.txt', '.env']

	# Check for UI file (either ui.py or app.py)
	ui_file = None
	if Path('ui.py').exists():
	ui_file = 'ui.py'
	elif Path('app.py').exists():
	ui_file = 'app.py'

	all_good = True
	for file in required_files:
	if Path(file).exists():
	print(f"✅ {file}")
	else:
	print(f"❌ {file} missing")
	all_good = False

	# Check UI file
	if ui_file:
	print(f"✅ {ui_file} (UI file)")
	else:
	print("❌ UI file missing (need either ui.py or app.py)")
	all_good = False

	return all_good

	def test_env_config():
	"""Test environment configuration"""
	print("\n🔧 Testing environment...")

	try:
	from dotenv import load_dotenv
	load_dotenv()

	endpoint = os.getenv('AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT')
	key = os.getenv('AZURE_DOCUMENT_INTELLIGENCE_KEY')

	if endpoint and key:
	if endpoint.startswith('https://') and endpoint.endswith('/'):
	print("✅ Azure endpoint configured properly")
	else:
	print("⚠️ Azure endpoint format may be incorrect")

	if len(key) > 20:
	print("✅ Azure key configured")
	else:
	print("⚠️ Azure key may be incorrect")

	return True
	else:
	print("⚠️ Azure credentials not configured")
	print(" Update your .env file with valid credentials")
	return False

	except ImportError:
	print("❌ python-dotenv not available")
	return False

	def test_service():
	"""Test if the service can be imported and initialized"""
	print("\n🚀 Testing service initialization...")

	try:
	from backend import BackendManager
	manager = BackendManager()

	methods = manager.get_available_methods()
	print(f"✅ Service initialized successfully")
	print(f" Available methods: {methods}")

	if 'azure' in methods:
	print("✅ Azure OCR ready")
	else:
	print("⚠️ Azure OCR not available (check credentials)")

	return True

	except Exception as e:
	print(f"❌ Service initialization failed: {e}")
	return False

	def main():
	"""Run all tests"""
	print("🧪 PDF OCR Service Setup Test")
	print("=" * 40)

	tests = [
	("Import test", test_imports),
	("File test", test_files),
	("Environment test", test_env_config),
	("Service test", test_service),
	]

	results = {}
	for test_name, test_func in tests:
	print(f"\n{'='*40}")
	print(f"{test_name.upper()}")
	print('='*40)
	results[test_name] = test_func()

	# Summary
	print(f"\n{'='*40}")
	print("TEST SUMMARY")
	print('='*40)

	all_passed = True
	for test_name, passed in results.items():
	status = "✅ PASS" if passed else "❌ FAIL"
	print(f"{status} {test_name}")
	if not passed:
	all_passed = False

	print('='*40)
	if all_passed:
	print("🎉 All tests passed! You can run the service with:")
	print(" python ui.py")
	else:
	print("⚠️ Some tests failed. Please fix the issues above.")
	print("\nQuick fixes:")
	print("1. Install missing packages: pip install -r requirements.txt")
	print("2. Configure your .env file with Azure credentials")
	print("3. Ensure all files are present")

	return all_passed

	if __name__ == "__main__":
	success = main()
	sys.exit(0 if success else 1)