|
|
|
|
|
""" |
|
|
Quick test for the enhanced quality scoring system |
|
|
""" |
|
|
|
|
|
import sys |
|
|
import os |
|
|
|
|
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) |
|
|
|
|
|
from app import ( |
|
|
calculate_quality_score, |
|
|
generate_comprehensive_quality_report, |
|
|
suggest_quality_improvements |
|
|
) |
|
|
|
|
|
def test_quality_scoring(): |
|
|
"""Test the enhanced quality scoring with the actual data from your conversion""" |
|
|
print("🧪 Testing Enhanced Quality Scoring System") |
|
|
print("=" * 50) |
|
|
|
|
|
|
|
|
docx_info = { |
|
|
'text_content_length': 1573, |
|
|
'font_families': {'Arial'}, |
|
|
'has_tables': True, |
|
|
'has_images': True, |
|
|
'rtl_content_detected': True, |
|
|
'placeholder_count': 9, |
|
|
'has_textboxes': False, |
|
|
'has_smartart': False, |
|
|
'has_complex_shapes': False, |
|
|
'table_structure_issues': ['Complex cell merging detected'] |
|
|
} |
|
|
|
|
|
pdf_validation = { |
|
|
'file_size_mb': 0.12, |
|
|
'file_exists': True, |
|
|
'size_reasonable': True, |
|
|
'warnings': [], |
|
|
'success_metrics': [ |
|
|
'PDF file size is reasonable', |
|
|
'Document contains tables - formatting preservation critical', |
|
|
'Document contains images - quality preservation applied', |
|
|
'Font substitution applied for 1 font families' |
|
|
] |
|
|
} |
|
|
|
|
|
post_process_results = { |
|
|
'pages_processed': 1, |
|
|
'placeholders_verified': 9, |
|
|
'tables_verified': 1, |
|
|
'arabic_text_verified': 150, |
|
|
'layout_issues_fixed': 0, |
|
|
'warnings': [], |
|
|
'success_metrics': [ |
|
|
'All 9 placeholders preserved', |
|
|
'Arabic RTL text verified: 150 characters', |
|
|
'Table structure preserved' |
|
|
] |
|
|
} |
|
|
|
|
|
|
|
|
quality_score = calculate_quality_score(docx_info, pdf_validation, post_process_results) |
|
|
print(f"🏆 Enhanced Quality Score: {quality_score:.1f}%") |
|
|
|
|
|
|
|
|
quality_report = generate_comprehensive_quality_report(docx_info, pdf_validation, post_process_results) |
|
|
print("\n📋 Enhanced Quality Report:") |
|
|
print(quality_report) |
|
|
|
|
|
|
|
|
suggestions = suggest_quality_improvements(docx_info, pdf_validation, post_process_results, quality_score) |
|
|
print(f"\n💡 Improvement Suggestions:") |
|
|
for suggestion in suggestions: |
|
|
print(suggestion) |
|
|
|
|
|
return quality_score |
|
|
|
|
|
def test_different_scenarios(): |
|
|
"""Test quality scoring with different scenarios""" |
|
|
print("\n" + "=" * 50) |
|
|
print("🔬 Testing Different Quality Scenarios") |
|
|
print("=" * 50) |
|
|
|
|
|
scenarios = [ |
|
|
{ |
|
|
'name': 'Perfect Conversion', |
|
|
'docx_info': { |
|
|
'text_content_length': 1000, |
|
|
'font_families': {'Arial'}, |
|
|
'has_tables': True, |
|
|
'has_images': False, |
|
|
'rtl_content_detected': True, |
|
|
'placeholder_count': 5, |
|
|
'has_textboxes': False, |
|
|
'has_smartart': False, |
|
|
'has_complex_shapes': False, |
|
|
'table_structure_issues': [] |
|
|
}, |
|
|
'pdf_validation': { |
|
|
'file_size_mb': 0.5, |
|
|
'warnings': [], |
|
|
'success_metrics': ['Perfect conversion', 'All elements preserved'] |
|
|
}, |
|
|
'post_process_results': { |
|
|
'pages_processed': 1, |
|
|
'placeholders_verified': 5, |
|
|
'tables_verified': 1, |
|
|
'arabic_text_verified': 200, |
|
|
'warnings': [], |
|
|
'success_metrics': ['All placeholders preserved', 'Arabic text verified'] |
|
|
} |
|
|
}, |
|
|
{ |
|
|
'name': 'Complex Document with Issues', |
|
|
'docx_info': { |
|
|
'text_content_length': 5000, |
|
|
'font_families': {'Arial', 'Traditional Arabic'}, |
|
|
'has_tables': True, |
|
|
'has_images': True, |
|
|
'rtl_content_detected': True, |
|
|
'placeholder_count': 10, |
|
|
'has_textboxes': True, |
|
|
'has_smartart': True, |
|
|
'has_complex_shapes': True, |
|
|
'table_structure_issues': ['Nested tables', 'Complex merging'] |
|
|
}, |
|
|
'pdf_validation': { |
|
|
'file_size_mb': 2.5, |
|
|
'warnings': ['Large file size'], |
|
|
'success_metrics': ['Basic conversion completed'] |
|
|
}, |
|
|
'post_process_results': { |
|
|
'pages_processed': 3, |
|
|
'placeholders_verified': 8, |
|
|
'tables_verified': 2, |
|
|
'arabic_text_verified': 500, |
|
|
'warnings': ['Some layout issues detected'], |
|
|
'success_metrics': ['Most elements preserved'] |
|
|
} |
|
|
} |
|
|
] |
|
|
|
|
|
for scenario in scenarios: |
|
|
print(f"\n📊 Scenario: {scenario['name']}") |
|
|
score = calculate_quality_score( |
|
|
scenario['docx_info'], |
|
|
scenario['pdf_validation'], |
|
|
scenario['post_process_results'] |
|
|
) |
|
|
print(f" Quality Score: {score:.1f}%") |
|
|
|
|
|
if score >= 95: |
|
|
print(" Result: 🌟 EXCELLENT") |
|
|
elif score >= 85: |
|
|
print(" Result: ✅ VERY GOOD") |
|
|
elif score >= 75: |
|
|
print(" Result: 👍 GOOD") |
|
|
elif score >= 65: |
|
|
print(" Result: ⚠️ FAIR") |
|
|
else: |
|
|
print(" Result: ❌ NEEDS IMPROVEMENT") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
actual_score = test_quality_scoring() |
|
|
|
|
|
|
|
|
test_different_scenarios() |
|
|
|
|
|
print(f"\n" + "=" * 50) |
|
|
print(f"🎯 SUMMARY") |
|
|
print(f"=" * 50) |
|
|
print(f"Your document achieved: {actual_score:.1f}%") |
|
|
|
|
|
if actual_score >= 90: |
|
|
print("🌟 Excellent quality! The enhanced system is working perfectly.") |
|
|
elif actual_score >= 80: |
|
|
print("✅ Good quality! Minor improvements applied successfully.") |
|
|
elif actual_score >= 70: |
|
|
print("👍 Acceptable quality. The system detected and addressed issues.") |
|
|
else: |
|
|
print("⚠️ Quality needs improvement. The system provided detailed suggestions.") |
|
|
|
|
|
print(f"\n💡 The enhanced quality scoring system now provides:") |
|
|
print(f" • More accurate quality assessment") |
|
|
print(f" • Detailed improvement suggestions") |
|
|
print(f" • Better handling of complex documents") |
|
|
print(f" • Comprehensive quality reports") |
|
|
|