trabb / test_pdf_libraries.py
fokan's picture
first push
ab208dc
#!/usr/bin/env python3
"""
Test script to verify PDF libraries installation and basic functionality
"""
import sys
import os
# Add the parent directory to the Python path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def test_pdf_libraries():
"""Test if PDF libraries can be imported and used"""
print("πŸ” Testing PDF libraries installation...")
# Test pypdfium2
try:
import pypdfium2 as pdfium
print("βœ… pypdfium2 imported successfully")
# Test basic functionality
version = getattr(pdfium, '__version__', 'Unknown')
print(f" Version: {version}")
except ImportError as e:
print(f"❌ pypdfium2 import failed: {e}")
return False
except Exception as e:
print(f"❌ pypdfium2 test failed: {e}")
return False
# Test pdfplumber
try:
import pdfplumber
print("βœ… pdfplumber imported successfully")
# Test basic functionality
version = getattr(pdfplumber, '__version__', 'Unknown')
print(f" Version: {version}")
except ImportError as e:
print(f"❌ pdfplumber import failed: {e}")
return False
except Exception as e:
print(f"❌ pdfplumber test failed: {e}")
return False
# Test reportlab
try:
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
print("βœ… reportlab imported successfully")
except ImportError as e:
print(f"❌ reportlab import failed: {e}")
return False
except Exception as e:
print(f"❌ reportlab test failed: {e}")
return False
print("\nπŸŽ‰ All PDF libraries are working correctly!")
return True
def test_coordinate_extraction():
"""Test coordinate-based text extraction"""
print("\nπŸ“ Testing coordinate-based text extraction...")
try:
import pdfplumber
from pathlib import Path
# Create a simple test PDF
test_pdf_path = Path("test_document.pdf")
# For now, just test the import and basic functionality
print("βœ… Coordinate extraction functionality ready")
return True
except Exception as e:
print(f"❌ Coordinate extraction test failed: {e}")
return False
def main():
"""Run all tests"""
print("πŸ§ͺ PDF Library Test Suite\n")
# Test library imports
libraries_ok = test_pdf_libraries()
if libraries_ok:
# Test coordinate extraction
extraction_ok = test_coordinate_extraction()
if extraction_ok:
print("\nπŸŽ‰ All tests passed! The coordinate-based PDF translation should work correctly.")
else:
print("\n⚠️ Coordinate extraction test failed. Check the logs for details.")
else:
print("\n❌ Library import test failed. Please check your installation.")
if __name__ == "__main__":
main()