| | |
| | """ |
| | Test script for the enhanced DOCX to PDF conversion system |
| | Tests all the new advanced features and quality verification |
| | """ |
| |
|
| | import os |
| | import sys |
| | import tempfile |
| | import shutil |
| | from pathlib import Path |
| |
|
| | |
| | sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) |
| |
|
| | from app import ( |
| | validate_docx_structure, |
| | preprocess_docx_for_perfect_conversion, |
| | post_process_pdf_for_perfect_formatting, |
| | generate_comprehensive_quality_report, |
| | calculate_quality_score, |
| | setup_libreoffice, |
| | setup_font_environment |
| | ) |
| |
|
| | def create_test_docx(): |
| | """ |
| | Create a test DOCX file with Arabic content for testing |
| | This would normally require python-docx, but for testing we'll create a simple structure |
| | """ |
| | print("📝 Creating test DOCX file...") |
| | |
| | |
| | test_content = """ |
| | Test DOCX content with Arabic text: مرحبا بكم في اختبار التحويل المتقدم |
| | |
| | This document contains: |
| | - Arabic RTL text: النص العربي من اليمين إلى اليسار |
| | - Placeholders: {{name}}, {{date}}, {{company}} |
| | - Tables with Arabic content |
| | - Mixed language content |
| | |
| | Table example: |
| | | English | العربية | Notes | |
| | |---------|---------|-------| |
| | | Hello | مرحبا | Greeting | |
| | | World | العالم | Noun | |
| | """ |
| | |
| | print("✅ Test content prepared") |
| | return test_content |
| |
|
| | def test_docx_analysis(): |
| | """Test the enhanced DOCX structure analysis""" |
| | print("\n🔍 Testing DOCX Structure Analysis...") |
| | |
| | |
| | |
| | mock_docx_info = { |
| | 'page_count': 1, |
| | 'has_tables': True, |
| | 'has_images': False, |
| | 'text_content_length': 500, |
| | 'font_families': {'Arial', 'Traditional Arabic', 'Calibri'}, |
| | 'has_textboxes': False, |
| | 'has_smartart': False, |
| | 'has_complex_shapes': False, |
| | 'table_structure_issues': [], |
| | 'rtl_content_detected': True, |
| | 'placeholder_count': 3, |
| | 'error': None |
| | } |
| | |
| | print("📊 Analysis Results:") |
| | print(f" • Tables: {mock_docx_info['has_tables']}") |
| | print(f" • RTL Content: {mock_docx_info['rtl_content_detected']}") |
| | print(f" • Placeholders: {mock_docx_info['placeholder_count']}") |
| | print(f" • Font Families: {len(mock_docx_info['font_families'])}") |
| | |
| | return mock_docx_info |
| |
|
| | def test_quality_scoring(): |
| | """Test the quality scoring system""" |
| | print("\n📊 Testing Quality Scoring System...") |
| | |
| | |
| | mock_pdf_validation = { |
| | 'file_size_mb': 0.5, |
| | 'file_exists': True, |
| | 'size_reasonable': True, |
| | 'warnings': [], |
| | 'success_metrics': ['PDF file size is reasonable', 'Font substitution applied'] |
| | } |
| | |
| | |
| | mock_post_process = { |
| | 'pages_processed': 1, |
| | 'placeholders_verified': 3, |
| | 'tables_verified': 1, |
| | 'arabic_text_verified': 150, |
| | 'layout_issues_fixed': 0, |
| | 'warnings': [], |
| | 'success_metrics': ['All 3 placeholders preserved', 'Arabic RTL text verified: 150 characters'] |
| | } |
| | |
| | |
| | mock_docx_info = { |
| | 'has_tables': True, |
| | 'has_images': False, |
| | 'rtl_content_detected': True, |
| | 'placeholder_count': 3, |
| | 'has_textboxes': False, |
| | 'has_smartart': False, |
| | 'has_complex_shapes': False, |
| | 'table_structure_issues': [] |
| | } |
| | |
| | |
| | quality_score = calculate_quality_score(mock_docx_info, mock_pdf_validation, mock_post_process) |
| | print(f"🏆 Quality Score: {quality_score:.1f}%") |
| | |
| | |
| | quality_report = generate_comprehensive_quality_report(mock_docx_info, mock_pdf_validation, mock_post_process) |
| | print("\n📋 Quality Report:") |
| | print(quality_report) |
| | |
| | return quality_score |
| |
|
| | def test_font_system(): |
| | """Test the enhanced Arabic font system""" |
| | print("\n🔤 Testing Enhanced Arabic Font System...") |
| | |
| | try: |
| | setup_font_environment() |
| | print("✅ Font environment setup completed") |
| | |
| | |
| | import subprocess |
| | result = subprocess.run(['fc-list'], capture_output=True, text=True, timeout=10) |
| | available_fonts = result.stdout.lower() |
| | |
| | arabic_fonts = ['amiri', 'noto naskh arabic', 'scheherazade', 'cairo'] |
| | found_fonts = [] |
| | |
| | for font in arabic_fonts: |
| | if font in available_fonts: |
| | found_fonts.append(font) |
| | |
| | print(f"📊 Arabic Fonts Available: {len(found_fonts)}/{len(arabic_fonts)}") |
| | for font in found_fonts: |
| | print(f" ✓ {font}") |
| | |
| | return len(found_fonts) > 0 |
| | |
| | except Exception as e: |
| | print(f"❌ Font system test failed: {e}") |
| | return False |
| |
|
| | def test_libreoffice_setup(): |
| | """Test LibreOffice configuration""" |
| | print("\n⚙️ Testing LibreOffice Setup...") |
| | |
| | try: |
| | libreoffice_available = setup_libreoffice() |
| | if libreoffice_available: |
| | print("✅ LibreOffice is properly configured") |
| | |
| | |
| | import subprocess |
| | result = subprocess.run(['libreoffice', '--version'], |
| | capture_output=True, text=True, timeout=10) |
| | if result.returncode == 0: |
| | print(f"📊 LibreOffice Version: {result.stdout.strip()}") |
| | |
| | return True |
| | else: |
| | print("❌ LibreOffice setup failed") |
| | return False |
| | |
| | except Exception as e: |
| | print(f"❌ LibreOffice test failed: {e}") |
| | return False |
| |
|
| | def run_comprehensive_test(): |
| | """Run all tests for the enhanced conversion system""" |
| | print("🚀 ENHANCED DOCX TO PDF CONVERSION SYSTEM TEST") |
| | print("=" * 60) |
| | |
| | test_results = {} |
| | |
| | |
| | test_results['docx_analysis'] = test_docx_analysis() |
| | |
| | |
| | test_results['quality_score'] = test_quality_scoring() |
| | |
| | |
| | test_results['font_system'] = test_font_system() |
| | |
| | |
| | test_results['libreoffice'] = test_libreoffice_setup() |
| | |
| | |
| | print("\n" + "=" * 60) |
| | print("📊 TEST SUMMARY") |
| | print("=" * 60) |
| | |
| | passed_tests = 0 |
| | total_tests = len(test_results) |
| | |
| | for test_name, result in test_results.items(): |
| | status = "✅ PASS" if result else "❌ FAIL" |
| | print(f"{test_name.replace('_', ' ').title()}: {status}") |
| | if result: |
| | passed_tests += 1 |
| | |
| | success_rate = (passed_tests / total_tests) * 100 |
| | print(f"\n🎯 Overall Success Rate: {success_rate:.1f}% ({passed_tests}/{total_tests})") |
| | |
| | if success_rate >= 75: |
| | print("🌟 EXCELLENT: Enhanced conversion system is ready!") |
| | elif success_rate >= 50: |
| | print("👍 GOOD: Most features are working correctly") |
| | else: |
| | print("⚠️ NEEDS ATTENTION: Several components need fixing") |
| | |
| | return test_results |
| |
|
| | if __name__ == "__main__": |
| | |
| | results = run_comprehensive_test() |
| | |
| | |
| | success_rate = sum(1 for r in results.values() if r) / len(results) * 100 |
| | sys.exit(0 if success_rate >= 75 else 1) |
| |
|