PDFtoDocx-OCR / test_setup.py
Chirapath's picture
Upload 7 files
6063ce4 verified
#!/usr/bin/env python3
"""
Simple test script to verify the PDF OCR Service setup
Run this to check if everything is working properly
"""
import sys
import os
from pathlib import Path
def test_imports():
"""Test if all required modules can be imported"""
print("πŸ§ͺ Testing imports...")
required_imports = [
('dotenv', 'python-dotenv'),
('gradio', 'gradio'),
('azure.ai.documentintelligence', 'azure-ai-documentintelligence'),
('azure.core', 'azure-core'),
('fitz', 'PyMuPDF'),
('PIL', 'Pillow'),
('cv2', 'opencv-python'),
('numpy', 'numpy'),
]
optional_imports = [
('pytesseract', 'pytesseract'),
('docx', 'python-docx'),
]
all_good = True
# Test required imports
for module, package in required_imports:
try:
__import__(module)
print(f"βœ… {package}")
except ImportError:
print(f"❌ {package} - Run: pip install {package}")
all_good = False
# Test optional imports
for module, package in optional_imports:
try:
__import__(module)
print(f"βœ… {package} (optional)")
except ImportError:
print(f"⚠️ {package} (optional) - Run: pip install {package}")
return all_good
def test_files():
"""Test if all required files exist"""
print("\nπŸ“ Testing files...")
required_files = ['ocr_service.py', 'backend.py', 'requirements.txt', '.env']
# Check for UI file (either ui.py or app.py)
ui_file = None
if Path('ui.py').exists():
ui_file = 'ui.py'
elif Path('app.py').exists():
ui_file = 'app.py'
all_good = True
for file in required_files:
if Path(file).exists():
print(f"βœ… {file}")
else:
print(f"❌ {file} missing")
all_good = False
# Check UI file
if ui_file:
print(f"βœ… {ui_file} (UI file)")
else:
print("❌ UI file missing (need either ui.py or app.py)")
all_good = False
return all_good
def test_env_config():
"""Test environment configuration"""
print("\nπŸ”§ Testing environment...")
try:
from dotenv import load_dotenv
load_dotenv()
endpoint = os.getenv('AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT')
key = os.getenv('AZURE_DOCUMENT_INTELLIGENCE_KEY')
if endpoint and key:
if endpoint.startswith('https://') and endpoint.endswith('/'):
print("βœ… Azure endpoint configured properly")
else:
print("⚠️ Azure endpoint format may be incorrect")
if len(key) > 20:
print("βœ… Azure key configured")
else:
print("⚠️ Azure key may be incorrect")
return True
else:
print("⚠️ Azure credentials not configured")
print(" Update your .env file with valid credentials")
return False
except ImportError:
print("❌ python-dotenv not available")
return False
def test_service():
"""Test if the service can be imported and initialized"""
print("\nπŸš€ Testing service initialization...")
try:
from backend import BackendManager
manager = BackendManager()
methods = manager.get_available_methods()
print(f"βœ… Service initialized successfully")
print(f" Available methods: {methods}")
if 'azure' in methods:
print("βœ… Azure OCR ready")
else:
print("⚠️ Azure OCR not available (check credentials)")
return True
except Exception as e:
print(f"❌ Service initialization failed: {e}")
return False
def main():
"""Run all tests"""
print("πŸ§ͺ PDF OCR Service Setup Test")
print("=" * 40)
tests = [
("Import test", test_imports),
("File test", test_files),
("Environment test", test_env_config),
("Service test", test_service),
]
results = {}
for test_name, test_func in tests:
print(f"\n{'='*40}")
print(f"{test_name.upper()}")
print('='*40)
results[test_name] = test_func()
# Summary
print(f"\n{'='*40}")
print("TEST SUMMARY")
print('='*40)
all_passed = True
for test_name, passed in results.items():
status = "βœ… PASS" if passed else "❌ FAIL"
print(f"{status} {test_name}")
if not passed:
all_passed = False
print('='*40)
if all_passed:
print("πŸŽ‰ All tests passed! You can run the service with:")
print(" python ui.py")
else:
print("⚠️ Some tests failed. Please fix the issues above.")
print("\nQuick fixes:")
print("1. Install missing packages: pip install -r requirements.txt")
print("2. Configure your .env file with Azure credentials")
print("3. Ensure all files are present")
return all_passed
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)