Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Simple test script to verify the PDF OCR Service setup | |
| Run this to check if everything is working properly | |
| """ | |
| import sys | |
| import os | |
| from pathlib import Path | |
| def test_imports(): | |
| """Test if all required modules can be imported""" | |
| print("π§ͺ Testing imports...") | |
| required_imports = [ | |
| ('dotenv', 'python-dotenv'), | |
| ('gradio', 'gradio'), | |
| ('azure.ai.documentintelligence', 'azure-ai-documentintelligence'), | |
| ('azure.core', 'azure-core'), | |
| ('fitz', 'PyMuPDF'), | |
| ('PIL', 'Pillow'), | |
| ('cv2', 'opencv-python'), | |
| ('numpy', 'numpy'), | |
| ] | |
| optional_imports = [ | |
| ('pytesseract', 'pytesseract'), | |
| ('docx', 'python-docx'), | |
| ] | |
| all_good = True | |
| # Test required imports | |
| for module, package in required_imports: | |
| try: | |
| __import__(module) | |
| print(f"β {package}") | |
| except ImportError: | |
| print(f"β {package} - Run: pip install {package}") | |
| all_good = False | |
| # Test optional imports | |
| for module, package in optional_imports: | |
| try: | |
| __import__(module) | |
| print(f"β {package} (optional)") | |
| except ImportError: | |
| print(f"β οΈ {package} (optional) - Run: pip install {package}") | |
| return all_good | |
| def test_files(): | |
| """Test if all required files exist""" | |
| print("\nπ Testing files...") | |
| required_files = ['ocr_service.py', 'backend.py', 'requirements.txt', '.env'] | |
| # Check for UI file (either ui.py or app.py) | |
| ui_file = None | |
| if Path('ui.py').exists(): | |
| ui_file = 'ui.py' | |
| elif Path('app.py').exists(): | |
| ui_file = 'app.py' | |
| all_good = True | |
| for file in required_files: | |
| if Path(file).exists(): | |
| print(f"β {file}") | |
| else: | |
| print(f"β {file} missing") | |
| all_good = False | |
| # Check UI file | |
| if ui_file: | |
| print(f"β {ui_file} (UI file)") | |
| else: | |
| print("β UI file missing (need either ui.py or app.py)") | |
| all_good = False | |
| return all_good | |
| def test_env_config(): | |
| """Test environment configuration""" | |
| print("\nπ§ Testing environment...") | |
| try: | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| endpoint = os.getenv('AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT') | |
| key = os.getenv('AZURE_DOCUMENT_INTELLIGENCE_KEY') | |
| if endpoint and key: | |
| if endpoint.startswith('https://') and endpoint.endswith('/'): | |
| print("β Azure endpoint configured properly") | |
| else: | |
| print("β οΈ Azure endpoint format may be incorrect") | |
| if len(key) > 20: | |
| print("β Azure key configured") | |
| else: | |
| print("β οΈ Azure key may be incorrect") | |
| return True | |
| else: | |
| print("β οΈ Azure credentials not configured") | |
| print(" Update your .env file with valid credentials") | |
| return False | |
| except ImportError: | |
| print("β python-dotenv not available") | |
| return False | |
| def test_service(): | |
| """Test if the service can be imported and initialized""" | |
| print("\nπ Testing service initialization...") | |
| try: | |
| from backend import BackendManager | |
| manager = BackendManager() | |
| methods = manager.get_available_methods() | |
| print(f"β Service initialized successfully") | |
| print(f" Available methods: {methods}") | |
| if 'azure' in methods: | |
| print("β Azure OCR ready") | |
| else: | |
| print("β οΈ Azure OCR not available (check credentials)") | |
| return True | |
| except Exception as e: | |
| print(f"β Service initialization failed: {e}") | |
| return False | |
| def main(): | |
| """Run all tests""" | |
| print("π§ͺ PDF OCR Service Setup Test") | |
| print("=" * 40) | |
| tests = [ | |
| ("Import test", test_imports), | |
| ("File test", test_files), | |
| ("Environment test", test_env_config), | |
| ("Service test", test_service), | |
| ] | |
| results = {} | |
| for test_name, test_func in tests: | |
| print(f"\n{'='*40}") | |
| print(f"{test_name.upper()}") | |
| print('='*40) | |
| results[test_name] = test_func() | |
| # Summary | |
| print(f"\n{'='*40}") | |
| print("TEST SUMMARY") | |
| print('='*40) | |
| all_passed = True | |
| for test_name, passed in results.items(): | |
| status = "β PASS" if passed else "β FAIL" | |
| print(f"{status} {test_name}") | |
| if not passed: | |
| all_passed = False | |
| print('='*40) | |
| if all_passed: | |
| print("π All tests passed! You can run the service with:") | |
| print(" python ui.py") | |
| else: | |
| print("β οΈ Some tests failed. Please fix the issues above.") | |
| print("\nQuick fixes:") | |
| print("1. Install missing packages: pip install -r requirements.txt") | |
| print("2. Configure your .env file with Azure credentials") | |
| print("3. Ensure all files are present") | |
| return all_passed | |
| if __name__ == "__main__": | |
| success = main() | |
| sys.exit(0 if success else 1) |