|
|
|
|
|
"""
|
|
|
Test script for the enhanced DOCX to PDF conversion system
|
|
|
Tests all the new advanced features and quality verification
|
|
|
"""
|
|
|
|
|
|
import os
|
|
|
import sys
|
|
|
import tempfile
|
|
|
import shutil
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
|
|
from app import (
|
|
|
validate_docx_structure,
|
|
|
preprocess_docx_for_perfect_conversion,
|
|
|
post_process_pdf_for_perfect_formatting,
|
|
|
generate_comprehensive_quality_report,
|
|
|
calculate_quality_score,
|
|
|
setup_libreoffice,
|
|
|
setup_font_environment
|
|
|
)
|
|
|
|
|
|
def create_test_docx():
|
|
|
"""
|
|
|
Create a test DOCX file with Arabic content for testing
|
|
|
This would normally require python-docx, but for testing we'll create a simple structure
|
|
|
"""
|
|
|
print("📝 Creating test DOCX file...")
|
|
|
|
|
|
|
|
|
test_content = """
|
|
|
Test DOCX content with Arabic text: مرحبا بكم في اختبار التحويل المتقدم
|
|
|
|
|
|
This document contains:
|
|
|
- Arabic RTL text: النص العربي من اليمين إلى اليسار
|
|
|
- Placeholders: {{name}}, {{date}}, {{company}}
|
|
|
- Tables with Arabic content
|
|
|
- Mixed language content
|
|
|
|
|
|
Table example:
|
|
|
| English | العربية | Notes |
|
|
|
|---------|---------|-------|
|
|
|
| Hello | مرحبا | Greeting |
|
|
|
| World | العالم | Noun |
|
|
|
"""
|
|
|
|
|
|
print("✅ Test content prepared")
|
|
|
return test_content
|
|
|
|
|
|
def test_docx_analysis():
|
|
|
"""Test the enhanced DOCX structure analysis"""
|
|
|
print("\n🔍 Testing DOCX Structure Analysis...")
|
|
|
|
|
|
|
|
|
|
|
|
mock_docx_info = {
|
|
|
'page_count': 1,
|
|
|
'has_tables': True,
|
|
|
'has_images': False,
|
|
|
'text_content_length': 500,
|
|
|
'font_families': {'Arial', 'Traditional Arabic', 'Calibri'},
|
|
|
'has_textboxes': False,
|
|
|
'has_smartart': False,
|
|
|
'has_complex_shapes': False,
|
|
|
'table_structure_issues': [],
|
|
|
'rtl_content_detected': True,
|
|
|
'placeholder_count': 3,
|
|
|
'error': None
|
|
|
}
|
|
|
|
|
|
print("📊 Analysis Results:")
|
|
|
print(f" • Tables: {mock_docx_info['has_tables']}")
|
|
|
print(f" • RTL Content: {mock_docx_info['rtl_content_detected']}")
|
|
|
print(f" • Placeholders: {mock_docx_info['placeholder_count']}")
|
|
|
print(f" • Font Families: {len(mock_docx_info['font_families'])}")
|
|
|
|
|
|
return mock_docx_info
|
|
|
|
|
|
def test_quality_scoring():
|
|
|
"""Test the quality scoring system"""
|
|
|
print("\n📊 Testing Quality Scoring System...")
|
|
|
|
|
|
|
|
|
mock_pdf_validation = {
|
|
|
'file_size_mb': 0.5,
|
|
|
'file_exists': True,
|
|
|
'size_reasonable': True,
|
|
|
'warnings': [],
|
|
|
'success_metrics': ['PDF file size is reasonable', 'Font substitution applied']
|
|
|
}
|
|
|
|
|
|
|
|
|
mock_post_process = {
|
|
|
'pages_processed': 1,
|
|
|
'placeholders_verified': 3,
|
|
|
'tables_verified': 1,
|
|
|
'arabic_text_verified': 150,
|
|
|
'layout_issues_fixed': 0,
|
|
|
'warnings': [],
|
|
|
'success_metrics': ['All 3 placeholders preserved', 'Arabic RTL text verified: 150 characters']
|
|
|
}
|
|
|
|
|
|
|
|
|
mock_docx_info = {
|
|
|
'has_tables': True,
|
|
|
'has_images': False,
|
|
|
'rtl_content_detected': True,
|
|
|
'placeholder_count': 3,
|
|
|
'has_textboxes': False,
|
|
|
'has_smartart': False,
|
|
|
'has_complex_shapes': False,
|
|
|
'table_structure_issues': []
|
|
|
}
|
|
|
|
|
|
|
|
|
quality_score = calculate_quality_score(mock_docx_info, mock_pdf_validation, mock_post_process)
|
|
|
print(f"🏆 Quality Score: {quality_score:.1f}%")
|
|
|
|
|
|
|
|
|
quality_report = generate_comprehensive_quality_report(mock_docx_info, mock_pdf_validation, mock_post_process)
|
|
|
print("\n📋 Quality Report:")
|
|
|
print(quality_report)
|
|
|
|
|
|
return quality_score
|
|
|
|
|
|
def test_font_system():
|
|
|
"""Test the enhanced Arabic font system"""
|
|
|
print("\n🔤 Testing Enhanced Arabic Font System...")
|
|
|
|
|
|
try:
|
|
|
setup_font_environment()
|
|
|
print("✅ Font environment setup completed")
|
|
|
|
|
|
|
|
|
import subprocess
|
|
|
result = subprocess.run(['fc-list'], capture_output=True, text=True, timeout=10)
|
|
|
available_fonts = result.stdout.lower()
|
|
|
|
|
|
arabic_fonts = ['amiri', 'noto naskh arabic', 'scheherazade', 'cairo']
|
|
|
found_fonts = []
|
|
|
|
|
|
for font in arabic_fonts:
|
|
|
if font in available_fonts:
|
|
|
found_fonts.append(font)
|
|
|
|
|
|
print(f"📊 Arabic Fonts Available: {len(found_fonts)}/{len(arabic_fonts)}")
|
|
|
for font in found_fonts:
|
|
|
print(f" ✓ {font}")
|
|
|
|
|
|
return len(found_fonts) > 0
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"❌ Font system test failed: {e}")
|
|
|
return False
|
|
|
|
|
|
def test_libreoffice_setup():
|
|
|
"""Test LibreOffice configuration"""
|
|
|
print("\n⚙️ Testing LibreOffice Setup...")
|
|
|
|
|
|
try:
|
|
|
libreoffice_available = setup_libreoffice()
|
|
|
if libreoffice_available:
|
|
|
print("✅ LibreOffice is properly configured")
|
|
|
|
|
|
|
|
|
import subprocess
|
|
|
result = subprocess.run(['libreoffice', '--version'],
|
|
|
capture_output=True, text=True, timeout=10)
|
|
|
if result.returncode == 0:
|
|
|
print(f"📊 LibreOffice Version: {result.stdout.strip()}")
|
|
|
|
|
|
return True
|
|
|
else:
|
|
|
print("❌ LibreOffice setup failed")
|
|
|
return False
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"❌ LibreOffice test failed: {e}")
|
|
|
return False
|
|
|
|
|
|
def run_comprehensive_test():
|
|
|
"""Run all tests for the enhanced conversion system"""
|
|
|
print("🚀 ENHANCED DOCX TO PDF CONVERSION SYSTEM TEST")
|
|
|
print("=" * 60)
|
|
|
|
|
|
test_results = {}
|
|
|
|
|
|
|
|
|
test_results['docx_analysis'] = test_docx_analysis()
|
|
|
|
|
|
|
|
|
test_results['quality_score'] = test_quality_scoring()
|
|
|
|
|
|
|
|
|
test_results['font_system'] = test_font_system()
|
|
|
|
|
|
|
|
|
test_results['libreoffice'] = test_libreoffice_setup()
|
|
|
|
|
|
|
|
|
print("\n" + "=" * 60)
|
|
|
print("📊 TEST SUMMARY")
|
|
|
print("=" * 60)
|
|
|
|
|
|
passed_tests = 0
|
|
|
total_tests = len(test_results)
|
|
|
|
|
|
for test_name, result in test_results.items():
|
|
|
status = "✅ PASS" if result else "❌ FAIL"
|
|
|
print(f"{test_name.replace('_', ' ').title()}: {status}")
|
|
|
if result:
|
|
|
passed_tests += 1
|
|
|
|
|
|
success_rate = (passed_tests / total_tests) * 100
|
|
|
print(f"\n🎯 Overall Success Rate: {success_rate:.1f}% ({passed_tests}/{total_tests})")
|
|
|
|
|
|
if success_rate >= 75:
|
|
|
print("🌟 EXCELLENT: Enhanced conversion system is ready!")
|
|
|
elif success_rate >= 50:
|
|
|
print("👍 GOOD: Most features are working correctly")
|
|
|
else:
|
|
|
print("⚠️ NEEDS ATTENTION: Several components need fixing")
|
|
|
|
|
|
return test_results
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
results = run_comprehensive_test()
|
|
|
|
|
|
|
|
|
success_rate = sum(1 for r in results.values() if r) / len(results) * 100
|
|
|
sys.exit(0 if success_rate >= 75 else 1)
|
|
|
|