Spaces:

Hoctar77
/

DocumentCheckerTool

Sleeping

App Files Files Community

Hoctar77 commited on Jan 5, 2025

Commit

dbf4df0

verified ·

1 Parent(s): b8f25d8

January 2025 updates

Browse files

Files changed (1) hide show

app.py +997 -333

app.py CHANGED Viewed

@@ -9,11 +9,14 @@ import logging
 import traceback
 from datetime import datetime
 from enum import Enum, auto
-from typing import Dict, List, Any, Tuple, Optional, Pattern, Callable
 from dataclasses import dataclass
 from functools import wraps
 from abc import ABC, abstractmethod
 # import tempfile  # For creating temporary files
 # Third-party imports
 import gradio as gr
@@ -342,285 +345,34 @@ class DocumentCheckerConfig:
         Returns:
             Dict[str, List[PatternConfig]]: Dictionary of pattern configurations by category
         """
-        patterns = {
-            'terminology': [
-                PatternConfig(
-                    pattern=r'\btitle 14 of the Code of Federal Regulations \(14 CFR\)\b',
-                    description="Ignore 'title 14 of the Code of Federal Regulations (14 CFR)'",
-                    is_error=False  # Set to False to ignore this phrase
-                ),
-                PatternConfig(
-                    pattern=r'\btitle 14, Code of Federal Regulations \(14 CFR\)\b',
-                    description="Ignore 'title 14, Code of Federal Regulations (14 CFR)'",
-                    is_error=False
-                ),
-                PatternConfig(
-                    pattern=r'\btitle 49 of the United States Code \(49 U.S.C.\)\b',
-                    description="Ignore 'title 49 of the United States Code (49 U.S.C.)'",
-                    is_error=False
-                ),
-                PatternConfig(
-                    pattern=r'\btitle 49, United States Code \(49 U.S.C.\)\b',
-                    description="Ignore 'title 49, United States Code (49 U.S.C.)'",
-                    is_error=False
-                ),
-                PatternConfig(
-                    pattern=r'\bAD Compliance Team \(AD CRT\)\b',
-                    description="Ignore 'AD Compliance Team (AD CRT)'",
-                    is_error=False
-                ),
-                PatternConfig(
-                    pattern=r'\bUSC\b',
-                    description="USC should be U.S.C.", # Per GPO Style Manual
-                    is_error=True,
-                    replacement="U.S.C."
-                ),
-                PatternConfig(
-                    pattern=r'\bCFR Part\b',
-                    description="CFR Part should be CFR part (lowercase)", # Per FAA Order 1320.46
-                    is_error=True,
-                    replacement="CFR part"
-                ),
-                PatternConfig(
-                    pattern=r'\bC\.F\.R\.\b',
-                    description="C.F.R. should be CFR", # GPO Style Manual
-                    is_error=True,
-                    replacement="CFR"
-                ),
-                PatternConfig(
-                    pattern=r'\bWe\b',
-                    description="'We' should be 'The FAA'",
-                    is_error=True,
-                    replacement="The FAA"
-                ),
-                PatternConfig(
-                    pattern=r'\bwe\b',
-                    description="'we' should be 'the FAA'",
-                    is_error=True,
-                    replacement="the FAA"
-                ),
-                PatternConfig(
-                    pattern=r'\bcancelled\b',
-                    description="'cancelled' should be 'canceled'", # Per GPO Style Manual
-                    is_error=True,
-                    replacement="canceled"
-                ),
-                PatternConfig(
-                    pattern=r'\bshall\b',
-                    description="'shall' should be 'must'", # Per FAA Order 1320.46
-                    is_error=True,
-                    replacement="must"
-                ),
-                PatternConfig(
-                    pattern=r'\b\&\b',
-                    description="'&' should be 'and'", # Per April 17, 2024 Use ampersand instead or 'and' email from Judith Watson
-                    is_error=True,
-                    replacement="and"
-                ),
-                PatternConfig(
-                    pattern=r'\bflight crew\b',
-                    description="'flight crew' should be 'flightcrew'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
-                    is_error=True,
-                    replacement="flightcrew"
-                ),
-                PatternConfig(
-                    pattern=r'\bchairman\b',
-                    description="'chairman' should be 'chair'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
-                    is_error=True,
-                    replacement="chair"
-                ),
-                PatternConfig(
-                    pattern=r'\bflagman\b',
-                    description="'flagman' should be 'flagger' or 'flagperson'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
-                    is_error=True,
-                    replacement="flagperson"
-                ),
-                PatternConfig(
-                    pattern=r'\bman\b',
-                    description="'man' should be 'individual' or 'person'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
-                    is_error=True,
-                    replacement="person"
-                ),
-                PatternConfig(
-                    pattern=r'\bmanmade\b',
-                    description="'manmade' should be 'personmade'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
-                    is_error=True,
-                    replacement="personmade"
-                ),
-                PatternConfig(
-                    pattern=r'\bmanpower\b',
-                    description="'manpower' should be 'labor force'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
-                    is_error=True,
-                    replacement="labor force"
-                ),
-                PatternConfig(
-                    pattern=r'\bnotice to airman\b',
-                    description="'notice to airman' should be 'notice to air missions'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
-                    is_error=True,
-                    replacement="notice to air missions"
-                ),
-                PatternConfig(
-                    pattern=r'\bnotice to airmen\b',
-                    description="'notice to airmen' should be 'notice to air missions'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
-                    is_error=True,
-                    replacement="notice to air missions"
-                ),
-                PatternConfig(
-                    pattern=r'\bcockpit\b',
-                    description="'cockpit' should be 'flight deck'", # Per AIR-600 Quick Reference Guide for Authors, Reviewers, and Writers/Editors
-                    is_error=True,
-                    replacement="flight deck"
-                ),
-                PatternConfig(
-                    pattern=r'\bA321 neo\b',
-                    description="'A321 neo' should be 'A321neo'", # Per TCDS
-                    is_error=True,
-                    replacement="A321neo"
-                )
-            ],
-            'section_symbol': [
-                PatternConfig(
-                    pattern=r'^§',
-                    description="Don't start a sentence with the section symbol. Write out 'Section'",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'\b14 CFR §\s*\d+\.\d+\b',
-                    description="14 CFR should not use section symbol",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'§\s*\d+\.\d+\s+(?:and|or)\s+\d+\.\d+',
-                    description="Missing section symbol in multiple sections",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'§\s*\d+\.\d+\s+through\s+\d+\.\d+',
-                    description="Missing section symbol in range of sections",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'§\s*\d+\.\d+\s+or\s+§?\s*\d+\.\d+',
-                    description="Inconsistent section symbol usage with 'or'",
-                    is_error=True
-                )
-            ],
-            'spacing': [
-                PatternConfig(
-                    pattern=r'([^\s]+)[ ]{2,}([^\s]+)',  # Capture words before and after double space
-                    description="Remove double spacing between '{0}' and '{1}'",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*[A-Z]?)',  # Capture doc type and number
-                    description="Add space between '{0}' and '{1}'",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'(§|§§)(\d+\.\d+)',  # Removed (?<!\s) to catch all section symbols
-                    description="Add space after '{0}' before '{1}'",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'(?<!\s)(Part)(\d+)',  # Capture 'Part' and number
-                    description="Add space between '{0}' and '{1}'",
-                    is_error=True
-                )
-            ],
-            'dates': [
-                PatternConfig(
-                    pattern=r'(?<![\w/-])\d{1,2}/\d{1,2}/\d{2,4}(?![\w/-])',
-                    description="Use 'Month Day, Year' format instead of MM/DD/YYYY",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'(?<![\w/-])\d{1,2}-\d{1,2}-\d{2,4}(?![\w/-])',
-                    description="Use 'Month Day, Year' format instead of MM-DD-YYYY",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'(?<![\w/-])\d{4}-\d{1,2}-\d{1,2}(?![\w/-])',
-                    description="Use 'Month Day, Year' format instead of YYYY-MM-DD",
-                    is_error=True
-                )
-            ],
-            'placeholders': [
-                PatternConfig(
-                    pattern=r'\bTBD\b',
-                    description="Remove TBD placeholder",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'\bTo be determined\b',
-                    description="Remove 'To be determined' placeholder",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'\bTo be added\b',
-                    description="Remove 'To be added' placeholder",
-                    is_error=True
-                )
-            ],
-            'reference_terms': [
-                PatternConfig(
-                    pattern=r'\babove\b',
-                    description="Avoid using 'above' for references",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'\bbelow\b',
-                    description="Avoid using 'below' for references",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'(?:^|(?<=[.!?]\s))There\s+(?:is|are)\b',
-                    description="Avoid starting sentences with 'There is/are'",
-                    is_error=True
-                )
-            ],
-            'periods': [
-                PatternConfig(
-                    pattern=r'\.\.',
-                    description="Remove double periods",
-                    is_error=True
-                )
-            ],
-            'table_figure_references': [
-                PatternConfig(
-                    pattern=r'(?<!^)(?<![.!?])\s+[T]able\s+\d+(?:-\d+)?',
-                    description="Table reference within sentence should be lowercase",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'(?<!^)(?<![.!?])\s+[F]igure\s+\d+(?:-\d+)?',
-                    description="Figure reference within sentence should be lowercase",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'^[t]able\s+\d+(?:-\d+)?',
-                    description="Table reference at start of sentence should be capitalized",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'^[f]igure\s+\d+(?:-\d+)?',
-                    description="Figure reference at start of sentence should be capitalized",
-                    is_error=True
-                )
-            ],
-            'parentheses': [
-                PatternConfig(
-                    pattern=r'\([^)]*$',  # Finds opening parenthesis without closing
-                    description="Missing closing parenthesis",
-                    is_error=True
-                ),
-                PatternConfig(
-                    pattern=r'[^(]*\)',  # Finds closing parenthesis without opening
-                    description="Missing opening parenthesis",
-                    is_error=True
-                )
-            ]
-        }
-        return patterns
 def profile_performance(func):
     """Decorator to profile function performance."""
@@ -667,7 +419,7 @@ class FAADocumentChecker(DocumentChecker):
     PREDEFINED_ACRONYMS = {
         'AGC', 'AIR', 'CFR', 'DC', 'DOT', 'FAA IR-M', 'FAQ', 'i.e.', 'e.g.', 'MA',
-        'MD', 'MIL', 'MO', 'No.', 'PDF', 'SSN', 'TX', 'U.S.', 'U.S.C.', 'USA', 'US',
         'WA', 'XX', 'ZIP'
     }
@@ -903,9 +655,13 @@ class FAADocumentChecker(DocumentChecker):
     @profile_performance
     def acronym_check(self, doc: List[str]) -> DocumentCheckResult:
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
         # Common words that might appear in uppercase but aren't acronyms
         heading_words = self.config_manager.config.get('heading_words', self.HEADING_WORDS)
@@ -932,12 +688,13 @@ class FAADocumentChecker(DocumentChecker):
         defined_acronyms = {}  # Stores definition info
         used_acronyms = set()  # Stores acronyms used after definition
         reported_acronyms = set()  # Stores acronyms that have already been noted as issues
-        issues = []
         # Patterns
         defined_pattern = re.compile(r'\b([\w\s&]+?)\s*\((\b[A-Z]{2,}\b)\)')
         acronym_pattern = re.compile(r'(?<!\()\b[A-Z]{2,}\b(?!\s*[:.]\s*)')
         for paragraph in doc:
             # Skip lines that appear to be headings
             words = paragraph.strip().split()
@@ -973,9 +730,10 @@ class FAADocumentChecker(DocumentChecker):
                 if any(start <= start_pos <= end for start, end in ignored_spans):
                     continue
-                # Skip predefined acronyms and other checks
                 if (acronym in predefined_acronyms or
                     acronym in heading_words or
                     any(not c.isalpha() for c in acronym) or
                     len(acronym) > 10):
                     continue
@@ -1674,12 +1432,16 @@ class FAADocumentChecker(DocumentChecker):
         # Define order of checks for better organization
         check_sequence = [
             ('heading_title_check', lambda: self.heading_title_check(doc, doc_type)),
             ('heading_title_period_check', lambda: self.heading_title_period_check(doc, doc_type)),
             ('terminology_check', lambda: self.check_terminology(doc)),
             ('acronym_check', lambda: self.acronym_check(doc)),
             ('acronym_usage_check', lambda: self.acronym_usage_check(doc)),
             ('section_symbol_usage_check', lambda: self.check_section_symbol_usage(doc)),
             ('date_formats_check', lambda: self.check_date_formats(doc)),
             ('placeholders_check', lambda: self.check_placeholders(doc)),
             ('document_title_check', lambda: self.document_title_check(doc_path, doc_type) if not skip_title_check else DocumentCheckResult(success=True, issues=[])),
@@ -2243,6 +2005,770 @@ class FAADocumentChecker(DocumentChecker):
             issues=issues,
             details=sentence_stats
         )
 class DocumentCheckResultsFormatter:
@@ -2385,6 +2911,50 @@ class DocumentCheckResultsFormatter:
                     'before': 'See AC 25.1309-1B, System Design and Analysis, for information on X.',
                     'after': 'See AC 25.1309-1B, <i>System Design and Analysis</i>, for information on X.'
                 }
             }
         }
@@ -2472,15 +3042,24 @@ class DocumentCheckResultsFormatter:
         return formatted_issues
     def _format_reference_issues(self, result: DocumentCheckResult) -> List[str]:
-        """Format reference-related issues with clear replacement instructions."""
-        output = []
-        if result.issues:
-            for issue in result.issues:
-                if 'reference' in issue and 'correct_form' in issue:
-                    output.append(f"    • Replace '{issue['reference']}' with '{issue['correct_form']}'")
-        return output
     def _format_standard_issue(self, issue: Dict[str, Any]) -> str:
         """Format standard issues consistently."""
@@ -2753,19 +3332,38 @@ class DocumentCheckResultsFormatter:
                     output.extend(self._format_section_symbol_issues(result))
                 elif check_name == 'parentheses_check':
                     output.extend(self._format_parentheses_issues(result))
-                elif check_name == 'paragraph_length_check':
-                    output.extend(self._format_paragraph_length_issues(result))
-                elif check_name == 'sentence_length_check':
-                    formatted_issues = [self._format_standard_issue(issue) for issue in result.issues[:15]]
-                    output.extend(formatted_issues)
-                    if len(result.issues) > 15:
-                        output.append(f"\n    ... and {len(result.issues) - 15} more similar issues.")
                 else:
                     formatted_issues = [self._format_standard_issue(issue) for issue in result.issues[:15]]
                     output.extend(formatted_issues)
-                    if len(result.issues) > 15:
                         output.append(f"\n    ... and {len(result.issues) - 15} more similar issues.")
         return '\n'.join(output)
@@ -2788,33 +3386,29 @@ class DocumentCheckResultsFormatter:
         except Exception as e:
             print(f"Error saving report: {e}")
-def process_document(file_obj, doc_type: str, template_type: Optional[str] = None) -> str:
-    """Process document and run all checks."""
-    try:
-        print(f"Processing document at {time.time()}")  # Debug print
-        checker = FAADocumentChecker()
-        if isinstance(file_obj, bytes):
-            file_obj = io.BytesIO(file_obj)
-        results = checker.run_all_checks(file_obj, doc_type, template_type)
-        return format_markdown_results(results, doc_type)
-    except Exception as e:
-        logging.error(f"Error processing document: {str(e)}")
-        traceback.print_exc()
-        return f"""
-# ❌ Error Processing Document
-**Error Details:** {str(e)}
-Please ensure:
-1. The file is a valid .docx document
-2. The file is not corrupted or password protected
-3. The file is properly formatted
-Try again after checking these issues. If the problem persists, contact support.
-"""
 def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
     """Format check results into a Markdown string for Gradio display."""
@@ -2842,6 +3436,9 @@ def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: s
         'acronym_check': {'title': '📝 Acronym Definitions', 'priority': 1},
         'acronym_usage_check': {'title': '📎 Acronym Usage', 'priority': 1},
         'section_symbol_usage_check': {'title': '§ Section Symbol Usage', 'priority': 2},
         'date_formats_check': {'title': '📅 Date Formats', 'priority': 2},
         'placeholders_check': {'title': '🚩 Placeholder Content', 'priority': 2},
         'document_title_check': {'title': '📑 Document Title Format', 'priority': 2},
@@ -2852,7 +3449,8 @@ def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: s
         'double_period_check': {'title': '⚡ Double Periods', 'priority': 4},
         'spacing_check': {'title': '⌨️ Spacing Issues', 'priority': 4},
         'paragraph_length_check': {'title': '📏 Paragraph Length', 'priority': 5},
-        'sentence_length_check': {'title': '📏 Sentence Length', 'priority': 5}
     }
     sorted_checks = sorted(
@@ -2948,6 +3546,60 @@ def create_interface():
             title = parts[0].strip()
             content = parts[1].strip()
             # Extract description and solution
             description_parts = content.split('How to fix:', 1)
             description = description_parts[0].strip()
@@ -2991,13 +3643,12 @@ def create_interface():
                         <h3 class="font-medium text-gray-800 mb-2">Issues found in your document:</h3>
                         <ul class="list-none space-y-2">
                 """
-                for issue in issues_match[:7]:
-                    # Remove any existing bullet points from the issue text
                     clean_issue = issue.strip().lstrip('•').strip()
                     issues_html_section += f"""
                         <li class="text-gray-600 ml-4">• {clean_issue}</li>
                     """
-                if len(issues_match) > 7:
                     issues_html_section += f"""
                         <li class="text-gray-500 italic ml-4">... and {len(issues_match) - 7} more similar issues.</li>
                     """
@@ -3028,7 +3679,21 @@ def create_interface():
                 </div>
             """
-        # Format summary section
         summary_html = f"""
             <div class="bg-white rounded-lg shadow-sm mb-6 overflow-hidden">
                 <div class="bg-gray-50 px-6 py-4 border-b">
@@ -3058,12 +3723,13 @@ def create_interface():
             </div>
         """
-        # Final HTML with styling
         full_html = f"""
         <div class="mx-auto p-4" style="font-family: system-ui, -apple-system, sans-serif;">
             <style>
                 .text-2xl {{ font-size: 1.5rem; line-height: 2rem; }}
                 .text-lg {{ font-size: 1.125rem; }}
                 .font-bold {{ font-weight: 700; }}
                 .font-semibold {{ font-weight: 600; }}
                 .font-medium {{ font-weight: 500; }}
@@ -3095,9 +3761,7 @@ def create_interface():
                 .overflow-hidden {{ overflow: hidden; }}
                 .list-none {{ list-style-type: none; }}
                 .space-y-4 > * + * {{ margin-top: 1rem; }}
-                .text-red-600 {{ color: #dc2626; }}
-                .text-amber-600 {{ color: #d97706; }}
-                .text-green-600 {{ color: #059669; }}
             </style>
             {header_html}
             {issues_html}

 import traceback
 from datetime import datetime
 from enum import Enum, auto
+from typing import Dict, List, Any, Tuple, Optional, Pattern, Callable, Set
 from dataclasses import dataclass
 from functools import wraps
 from abc import ABC, abstractmethod
 # import tempfile  # For creating temporary files
+import requests
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
 # Third-party imports
 import gradio as gr
         Returns:
             Dict[str, List[PatternConfig]]: Dictionary of pattern configurations by category
         """
+        try:
+            # Get the directory containing the current file
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            patterns_file = os.path.join(current_dir, 'patterns.json')
+            # Load patterns from JSON file
+            with open(patterns_file, 'r') as f:
+                patterns_data = json.load(f)
+            # Convert JSON data to PatternConfig objects
+            patterns = {}
+            for category, pattern_list in patterns_data.items():
+                patterns[category] = [
+                    PatternConfig(
+                        pattern=p['pattern'],
+                        description=p['description'],
+                        is_error=p['is_error'],
+                        replacement=p.get('replacement'),
+                        keep_together=p.get('keep_together', False)
+                    ) for p in pattern_list
+                ]
+            return patterns
+        except Exception as e:
+            self.logger.error(f"Error loading patterns: {e}")
+            # Return empty patterns dictionary if file loading fails
+            return {}
 def profile_performance(func):
     """Decorator to profile function performance."""
     PREDEFINED_ACRONYMS = {
         'AGC', 'AIR', 'CFR', 'DC', 'DOT', 'FAA IR-M', 'FAQ', 'i.e.', 'e.g.', 'MA',
+        'MD', 'MIL', 'MO', 'No.', 'PDF', 'SAE', 'SSN', 'TX', 'U.S.', 'U.S.C.', 'USA', 'US',
         'WA', 'XX', 'ZIP'
     }
     @profile_performance
     def acronym_check(self, doc: List[str]) -> DocumentCheckResult:
+        """Check for acronyms and their definitions."""
         if not self.validate_input(doc):
             return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        # Load valid words
+        valid_words = self._load_valid_words()
         # Common words that might appear in uppercase but aren't acronyms
         heading_words = self.config_manager.config.get('heading_words', self.HEADING_WORDS)
         defined_acronyms = {}  # Stores definition info
         used_acronyms = set()  # Stores acronyms used after definition
         reported_acronyms = set()  # Stores acronyms that have already been noted as issues
         # Patterns
         defined_pattern = re.compile(r'\b([\w\s&]+?)\s*\((\b[A-Z]{2,}\b)\)')
         acronym_pattern = re.compile(r'(?<!\()\b[A-Z]{2,}\b(?!\s*[:.]\s*)')
+        issues = []
         for paragraph in doc:
             # Skip lines that appear to be headings
             words = paragraph.strip().split()
                 if any(start <= start_pos <= end for start, end in ignored_spans):
                     continue
+                # Skip predefined acronyms, valid words, and other checks
                 if (acronym in predefined_acronyms or
                     acronym in heading_words or
+                    acronym.lower() in valid_words or  # Check against valid words list
                     any(not c.isalpha() for c in acronym) or
                     len(acronym) > 10):
                     continue
         # Define order of checks for better organization
         check_sequence = [
+            ('readability_check', lambda: self.check_readability(doc)),
             ('heading_title_check', lambda: self.heading_title_check(doc, doc_type)),
             ('heading_title_period_check', lambda: self.heading_title_period_check(doc, doc_type)),
             ('terminology_check', lambda: self.check_terminology(doc)),
             ('acronym_check', lambda: self.acronym_check(doc)),
             ('acronym_usage_check', lambda: self.acronym_usage_check(doc)),
             ('section_symbol_usage_check', lambda: self.check_section_symbol_usage(doc)),
+            ('508_compliance_check', lambda: self.check_508_compliance(doc_path)),
+            ('cross_references_check', lambda: self.check_cross_references(doc_path)),
+            ('hyperlink_check', lambda: self.check_hyperlinks(doc)),
             ('date_formats_check', lambda: self.check_date_formats(doc)),
             ('placeholders_check', lambda: self.check_placeholders(doc)),
             ('document_title_check', lambda: self.document_title_check(doc_path, doc_type) if not skip_title_check else DocumentCheckResult(success=True, issues=[])),
             issues=issues,
             details=sentence_stats
         )
+    @profile_performance
+    def check_508_compliance(self, doc_path: str) -> DocumentCheckResult:
+        """
+        Perform Section 508 compliance checks focusing on image alt text and heading structure.
+        """
+        try:
+            doc = Document(doc_path)
+            issues = []
+            images_with_alt = 0
+            heading_structure = {}
+            heading_issues = []  # Separate list for heading-specific issues
+            hyperlink_issues = []  # New list for hyperlink issues
+            # Image alt text check
+            for shape in doc.inline_shapes:
+                alt_text = None
+                if hasattr(shape, '_inline') and hasattr(shape._inline, 'docPr'):
+                    docPr = shape._inline.docPr
+                    alt_text = docPr.get('descr') or docPr.get('title')
+                if alt_text:
+                    images_with_alt += 1
+                else:
+                    issues.append({
+                        'category': 'image_alt_text',
+                        'message': 'Image is missing descriptive alt text.',
+                        'context': 'Ensure all images have descriptive alt text.'
+                    })
+            # Enhanced heading structure check
+            headings = []
+            for paragraph in doc.paragraphs:
+                if paragraph.style.name.startswith('Heading'):
+                    try:
+                        level = int(paragraph.style.name.split()[-1])
+                        text = paragraph.text.strip()
+                        if not text:
+                            continue
+                        headings.append((text, level))
+                        heading_structure[level] = heading_structure.get(level, 0) + 1
+                    except ValueError:
+                        continue
+            # Check heading hierarchy
+            if headings:
+                min_level = min(level for _, level in headings)
+                if min_level > 1:
+                    heading_issues.append({
+                        'severity': 'error',
+                        'type': 'missing_h1',
+                        'message': 'Document should start with a Heading 1',
+                        'context': f"First heading found is level {headings[0][1]}: '{headings[0][0]}'",
+                        'recommendation': 'Add a Heading 1 at the start of the document'
+                    })
+                # Check for skipped levels
+                previous_heading = None
+                for text, level in headings:
+                    if previous_heading:
+                        prev_text, prev_level = previous_heading
+                        # Only check for skipped levels when going deeper
+                        if level > prev_level + 1:
+                            missing_levels = list(range(prev_level + 1, level))
+                            heading_issues.append({
+                                'severity': 'error',
+                                'type': 'skipped_levels',
+                                'message': f"Skipped heading level(s) {', '.join(map(str, missing_levels))} - Found H{level} '{text}' after H{prev_level} '{prev_text}'. Add H{prev_level + 1} before this section.",
+                            })
+                    previous_heading = (text, level)
+            # Enhanced Hyperlink Accessibility Check
+            for paragraph in doc.paragraphs:
+                # Check both hyperlink fields and runs with hyperlink formatting
+                hyperlinks = []
+                # Method 1: Check for hyperlink fields
+                if hasattr(paragraph, '_element') and hasattr(paragraph._element, 'xpath'):
+                    hyperlinks.extend(paragraph._element.xpath('.//w:hyperlink'))
+                # Method 2: Check for hyperlink style runs
+                for run in paragraph.runs:
+                    if hasattr(run, '_element') and hasattr(run._element, 'rPr'):
+                        if run._element.rPr is not None:
+                            if run._element.rPr.xpath('.//w:rStyle[@w:val="Hyperlink"]'):
+                                hyperlinks.append(run)
+                    # Method 3: Check for direct hyperlink elements
+                    if hasattr(run, '_r'):
+                        if run._r.xpath('.//w:hyperlink'):
+                            hyperlinks.append(run)
+                # Process found hyperlinks
+                for hyperlink in hyperlinks:
+                    # Extract link text based on element type
+                    if hasattr(hyperlink, 'text'):  # For run objects
+                        link_text = hyperlink.text.strip()
+                    else:  # For hyperlink elements
+                        link_text = ''.join([t.text for t in hyperlink.xpath('.//w:t')])
+                    if not link_text:  # Skip empty links
+                        continue
+                    # Check for accessibility issues
+                    non_descriptive = [
+                        'click here', 'here', 'link', 'this link', 'more',
+                        'read more', 'learn more', 'click', 'see this',
+                        'see here', 'go', 'url', 'this', 'page'
+                    ]
+                    if any(phrase == link_text.lower() for phrase in non_descriptive):
+                        hyperlink_issues.append({
+                            'category': 'hyperlink_accessibility',
+                            'severity': 'warning',
+                            'message': 'Non-descriptive hyperlink text detected',
+                            'context': f'Link text: "{link_text}"',
+                            'recommendation': 'Replace with descriptive text that indicates the link destination',
+                            'user_message': f'Replace non-descriptive link text "{link_text}" with text that clearly indicates where the link will take the user'
+                        })
+                    elif len(link_text.strip()) < 4:  # Check for very short link text
+                        hyperlink_issues.append({
+                            'category': 'hyperlink_accessibility',
+                            'severity': 'warning',
+                            'message': 'Hyperlink text may be too short to be meaningful',
+                            'context': f'Link text: "{link_text}"',
+                            'recommendation': 'Use longer, more descriptive text that indicates the link destination',
+                            'user_message': f'Link text "{link_text}" is too short - use descriptive text that clearly indicates the link destination'
+                        })
+                    elif link_text.lower().startswith(('http', 'www', 'ftp')):
+                        hyperlink_issues.append({
+                            'category': 'hyperlink_accessibility',
+                            'severity': 'warning',
+                            'message': 'Raw URL used as link text',
+                            'context': f'Link text: "{link_text}"',
+                            'recommendation': 'Replace the URL with descriptive text that indicates the link destination',
+                            'user_message': f'Replace the URL "{link_text}" with meaningful text that describes the link destination'
+                        })
+            # Add hyperlink issues to main issues list
+            if hyperlink_issues:
+                issues.extend(hyperlink_issues)
+            # Combine all issues
+            if heading_issues:
+                issues.extend([{
+                    'category': '508_compliance_heading_structure',
+                    **issue
+                } for issue in heading_issues])
+            # Enhanced details with heading structure information
+            details = {
+                'total_images': len(doc.inline_shapes),
+                'images_with_alt': images_with_alt,
+                'heading_structure': {
+                    'total_headings': len(headings),
+                    'levels_found': dict(sorted(heading_structure.items())),
+                    'hierarchy_depth': max(heading_structure.keys()) if heading_structure else 0,
+                    'heading_sequence': [(text[:50] + '...' if len(text) > 50 else text, level)
+                                       for text, level in headings],
+                    'issues_found': len(heading_issues)
+                },
+                'hyperlink_accessibility': {  # New details section
+                    'total_issues': len(hyperlink_issues),
+                    'non_descriptive_links': sum(1 for issue in hyperlink_issues
+                                               if 'Non-descriptive' in issue['message']),
+                    'raw_urls': sum(1 for issue in hyperlink_issues
+                                  if 'Raw URL' in issue['message'])
+                }
+            }
+            return DocumentCheckResult(
+                success=len(issues) == 0,
+                issues=issues,
+                details=details
+            )
+        except Exception as e:
+            self.logger.error(f"Error during 508 compliance check: {str(e)}")
+            return DocumentCheckResult(
+                success=False,
+                issues=[{
+                    'category': 'error',
+                    'message': f'Error performing 508 compliance check: {str(e)}'
+                }]
+            )
+    def _format_compliance_issues(self, result: DocumentCheckResult) -> List[str]:
+        """Format compliance issues with clear, user-friendly descriptions."""
+        formatted_issues = []
+        for issue in result.issues:
+            if issue.get('category') == '508_compliance_heading_structure':
+                # Existing heading structure formatting...
+                message = issue.get('message', 'No description provided')
+                context = issue.get('context', 'No context provided').strip()
+                recommendation = issue.get('recommendation', 'No recommendation provided').strip()
+                formatted_issues.append(
+                    f"    • {message}. Context: {context}. Recommendation: {recommendation}"
+                )
+            elif issue.get('category') == 'image_alt_text':
+                # Existing alt text formatting...
+                formatted_issues.append(
+                    f"    • {issue.get('message', 'No description provided')}. {issue.get('context', '')}"
+                )
+            elif issue.get('category') == 'hyperlink_accessibility':
+                # Use the new user-friendly message
+                formatted_issues.append(
+                    f"    • {issue.get('user_message', issue.get('message', 'No description provided'))}"
+                )
+            elif 'context' in issue and issue['context'].startswith('Link text:'):
+                # This catches the hyperlink issues that might not have the category set
+                link_text = issue['context'].replace('Link text:', '').strip().strip('"')
+                if any(phrase == link_text.lower() for phrase in ['here', 'click here', 'more', 'link']):
+                    formatted_issues.append(
+                        f"    • Replace non-descriptive link text \"{link_text}\" with text that clearly indicates where the link will take the user"
+                    )
+                elif link_text.lower().startswith(('http', 'www', 'ftp')):
+                    formatted_issues.append(
+                        f"    • Replace the URL \"{link_text}\" with meaningful text that describes the link destination"
+                    )
+                elif len(link_text) < 4:
+                    formatted_issues.append(
+                        f"    • Link text \"{link_text}\" is too short - use descriptive text that clearly indicates the link destination"
+                    )
+                else:
+                    formatted_issues.append(f"    • {issue.get('message', 'No description provided')} {issue['context']}")
+            else:
+                # Generic formatting for other issues
+                message = issue.get('message', 'No description provided')
+                context = issue.get('context', '').strip()
+                formatted_issues.append(
+                    f"    • {message} {context}"
+                )
+        return formatted_issues
+    @profile_performance
+    def check_hyperlinks(self, doc: List[str]) -> DocumentCheckResult:
+        """
+        Enhanced hyperlink checker that identifies potentially broken URLs.
+        Args:
+            doc: List of document paragraphs.
+        Returns:
+            DocumentCheckResult with any potentially broken links.
+        """
+        if not self.validate_input(doc):
+            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        issues = []
+        checked_urls = set()
+        # URL pattern - matches http/https URLs
+        url_pattern = re.compile(
+            r'https?://(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b[-a-zA-Z0-9()@:%_\+.~#?&//=]*'
+        )
+        # Helper function to check a single URL
+        def check_url(url):
+            try:
+                response = requests.head(url, timeout=5, allow_redirects=True, headers={'User-Agent': 'CheckerTool/1.0'})
+                if response.status_code >= 400:
+                    return {
+                        'url': url,
+                        'message': f"Broken link: {url} (HTTP {response.status_code})"
+                    }
+            except requests.RequestException:
+                return {
+                    'url': url,
+                    'message': f"Check the link or internet connection: {url} (connection error)"
+                }
+            return None
+        # Extract and deduplicate URLs
+        for paragraph in doc:
+            urls = {match.group() for match in url_pattern.finditer(paragraph)}
+            checked_urls.update(urls)
+        # Concurrently check URLs
+        with ThreadPoolExecutor(max_workers=10) as executor:
+            future_to_url = {executor.submit(check_url, url): url for url in checked_urls}
+            for future in as_completed(future_to_url):
+                issue = future.result()
+                if issue:
+                    issues.append(issue)
+        return DocumentCheckResult(
+            success=len(issues) == 0,
+            issues=issues,
+            details={
+                'total_urls_checked': len(checked_urls),
+                'broken_urls': len(issues)
+            }
+        )
+    def _load_valid_words(self) -> Set[str]:
+        """
+        Load valid English words from valid_words.txt file.
+        Returns:
+            Set[str]: Set of valid English words in lowercase
+        """
+        try:
+            # Get the directory containing the current file
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            words_file = os.path.join(current_dir, 'valid_words.txt')
+            # Load words from file
+            with open(words_file, 'r') as f:
+                words = {line.strip().lower() for line in f if line.strip()}
+            return words
+        except Exception as e:
+            self.logger.warning(f"Error loading word list: {e}")
+            return set()  # Return empty set as fallback
+    @profile_performance
+    def check_cross_references(self, doc_path: str) -> DocumentCheckResult:
+        """
+        Check for missing cross-referenced elements in the document.
+        """
+        try:
+            doc = Document(doc_path)
+        except Exception as e:
+            self.logger.error(f"Error reading the document: {e}")
+            return DocumentCheckResult(success=False, issues=[{'error': str(e)}], details={})
+        heading_structure = self._extract_paragraph_numbering(doc)
+        valid_sections = {number for number, _ in heading_structure}
+        tables = set()
+        figures = set()
+        issues = []
+        # Skip patterns for external references
+        skip_patterns = [
+            r'(?:U\.S\.C\.|USC)\s+(?:§+\s*)?(?:Section|section)?\s*\d+',
+            r'Section\s+\d+(?:\([a-z]\))*\s+of\s+(?:the\s+)?(?:United States Code|U\.S\.C\.)',
+            r'Section\s+\d+(?:\([a-z]\))*\s+of\s+Title\s+\d+',
+            r'(?:Section|§)\s*\d+(?:\([a-z]\))*\s+of\s+the\s+Act',
+            r'Section\s+\d+\([a-z]\)',
+            r'§\s*\d+\([a-z]\)',
+            r'\d+\s*(?:CFR|C\.F\.R\.)',
+            r'Part\s+\d+(?:\.[0-9]+)*\s+of\s+Title\s+\d+',
+            r'Public\s+Law\s+\d+[-–]\d+',
+            r'Title\s+\d+,\s+Section\s+\d+(?:\([a-z]\))*',
+            r'\d+\s+U\.S\.C\.\s+\d+(?:\([a-z]\))*',
+        ]
+        skip_regex = re.compile('|'.join(skip_patterns), re.IGNORECASE)
+        try:
+            # Extract tables and figures
+            for para in doc.paragraphs:
+                text = para.text.strip() if hasattr(para, 'text') else ''
+                # Table extraction
+                if text.lower().startswith('table'):
+                    matches = [
+                        re.match(r'^table\s+(\d{1,2}(?:-\d+)?)\b', text, re.IGNORECASE),
+                        re.match(r'^table\s+(\d{1,2}(?:\.\d+)?)\b', text, re.IGNORECASE)
+                    ]
+                    for match in matches:
+                        if match:
+                            tables.add(match.group(1))
+                # Figure extraction
+                if text.lower().startswith('figure'):
+                    matches = [
+                        re.match(r'^figure\s+(\d{1,2}(?:-\d+)?)\b', text, re.IGNORECASE),
+                        re.match(r'^figure\s+(\d{1,2}(?:\.\d+)?)\b', text, re.IGNORECASE)
+                    ]
+                    for match in matches:
+                        if match:
+                            figures.add(match.group(1))
+            # Check references
+            for para in doc.paragraphs:
+                para_text = para.text.strip() if hasattr(para, 'text') else ''
+                if not para_text or skip_regex.search(para_text):
+                    continue
+                # Table reference check
+                table_refs = re.finditer(
+                    r'(?:see|in|refer to)?\s*(?:table|Table)\s+(\d{1,2}(?:[-\.]\d+)?)\b',
+                    para_text
+                )
+                for match in table_refs:
+                    ref = match.group(1)
+                    if ref not in tables:
+                        issues.append({
+                            'type': 'Table',
+                            'reference': ref,
+                            'context': para_text,
+                            'message': f"Referenced Table {ref} not found in document"
+                        })
+                # Figure reference check
+                figure_refs = re.finditer(
+                    r'(?:see|in|refer to)?\s*(?:figure|Figure)\s+(\d{1,2}(?:[-\.]\d+)?)\b',
+                    para_text
+                )
+                for match in figure_refs:
+                    ref = match.group(1)
+                    if ref not in figures:
+                        issues.append({
+                            'type': 'Figure',
+                            'reference': ref,
+                            'context': para_text,
+                            'message': f"Referenced Figure {ref} not found in document"
+                        })
+                # Section/paragraph reference check
+                section_refs = re.finditer(
+                    r'(?:paragraph|section|appendix)\s+([A-Z]?\.?\d+(?:\.\d+)*)',
+                    para_text,
+                    re.IGNORECASE
+                )
+                for match in section_refs:
+                    ref = match.group(1).strip('.')
+                    if not skip_regex.search(para_text):
+                        if ref not in valid_sections:
+                            found = False
+                            for valid_section in valid_sections:
+                                if valid_section.strip('.') == ref.strip('.'):
+                                    found = True
+                                    break
+                            if not found:
+                                issues.append({
+                                    'type': 'Paragraph',
+                                    'reference': ref,
+                                    'context': para_text,
+                                    'message': f"Confirm paragraph {ref} referenced in '{para_text}' exists in the document"
+                                })
+        except Exception as e:
+            self.logger.error(f"Error processing cross references: {str(e)}")
+            return DocumentCheckResult(
+                success=False,
+                issues=[{'type': 'error', 'message': f"Error processing cross references: {str(e)}"}],
+                details={}
+            )
+        return DocumentCheckResult(
+            success=len(issues) == 0,
+            issues=issues,
+            details={
+                'total_tables': len(tables),
+                'total_figures': len(figures),
+                'found_tables': sorted(list(tables)),
+                'found_figures': sorted(list(figures)),
+                'heading_structure': heading_structure,
+                'valid_sections': sorted(list(valid_sections))
+            }
+        )
+    def _extract_paragraph_numbering(self, doc: Document, in_appendix: bool = False) -> List[Tuple[str, str]]:
+        """
+        Extract paragraph numbers from document headings.
+        """
+        numbered_paragraphs = []
+        try:
+            # Track heading hierarchy (limit to 6 levels as per standard heading styles)
+            current_numbers = {
+                1: 0,  # Heading 1: 1, 2, 3, ...
+                2: 0,  # Heading 2: 1.1, 1.2, 1.3, ...
+                3: 0,  # Heading 3: 1.1.1, 1.1.2, ...
+                4: 0,
+                5: 0,
+                6: 0
+            }
+            current_parent = {
+                2: 0,  # Parent number for level 2
+                3: 0,  # Parent number for level 3
+                4: 0,
+                5: 0,
+                6: 0
+            }
+            last_level = {
+                1: 0,  # Last number used at each level
+                2: 0,
+                3: 0,
+                4: 0,
+                5: 0,
+                6: 0
+            }
+            for para in doc.paragraphs:
+                style_name = para.style.name if hasattr(para, 'style') and hasattr(para.style, 'name') else ''
+                text = para.text.strip() if hasattr(para, 'text') else ''
+                # Only process if it's a heading style
+                if style_name.startswith('Heading'):
+                    try:
+                        heading_level = int(style_name.replace('Heading ', ''))
+                        # Skip if heading level is beyond our supported range
+                        if heading_level > 6:
+                            continue
+                        if heading_level == 1:
+                            # For Heading 1, simply increment
+                            current_numbers[1] += 1
+                            last_level[1] = current_numbers[1]
+                            # Reset all lower levels
+                            for level in range(2, 7):  # Changed from 8 to 7
+                                current_numbers[level] = 0
+                                current_parent[level] = current_numbers[1]
+                                last_level[level] = 0
+                        else:
+                            # Check if we're still in the same parent section
+                            parent_changed = current_parent[heading_level] != current_numbers[heading_level - 1]
+                            if parent_changed:
+                                # Parent section changed
+                                current_numbers[heading_level] = 1
+                                current_parent[heading_level] = current_numbers[heading_level - 1]
+                            else:
+                                # Same parent, increment this level
+                                current_numbers[heading_level] += 1
+                            last_level[heading_level] = current_numbers[heading_level]
+                            # Reset all lower levels
+                            for level in range(heading_level + 1, 7):  # Changed from 8 to 7
+                                current_numbers[level] = 0
+                                current_parent[level] = 0
+                                last_level[level] = 0
+                        # Build section number
+                        section_parts = []
+                        for level in range(1, heading_level + 1):
+                            if level == 1:
+                                section_parts.append(str(current_numbers[1]))
+                            else:
+                                if current_numbers[level] > 0:
+                                    section_parts.append(str(current_numbers[level]))
+                        section_number = '.'.join(section_parts)
+                        if text:
+                            numbered_paragraphs.append((section_number, text))
+                    except ValueError:
+                        continue
+        except Exception as e:
+            self.logger.error(f"Error processing document structure: {str(e)}, Type: {type(e)}, Details: {repr(e)}")
+            return []
+        return numbered_paragraphs
+    def _check_heading_sequence(self, current_level: int, previous_level: int) -> Optional[str]:
+        """
+        Check if heading sequence is valid.
+        Returns error message if invalid, None if valid.
+        Rules:
+        - Can go from any level to H1 or H2 (restart numbering)
+        - When going deeper, can only go one level at a time (e.g., H1 to H2, H2 to H3)
+        - Can freely go to any higher level (e.g., H3 to H1, H4 to H2)
+        """
+        # When going to a deeper level, only allow one level at a time
+        if current_level > previous_level:
+            if current_level != previous_level + 1:
+                return f"Skipped heading level(s) {previous_level + 1} - Found H{current_level} after H{previous_level}. Add H{previous_level + 1} before this section."
+        # All other cases are valid:
+        # - Going to H1 (restart numbering)
+        # - Going to any higher level (e.g., H3 to H1)
+        return None
+    def _check_heading_structure(self, doc: Document) -> List[Dict[str, str]]:
+        """Check document heading structure."""
+        issues = []
+        previous_level = 0
+        previous_heading = ""
+        for para in doc.paragraphs:
+            if para.style.name.startswith('Heading'):
+                try:
+                    current_level = int(para.style.name.replace('Heading ', ''))
+                    # Check sequence
+                    error = self._check_heading_sequence(current_level, previous_level)
+                    if error:
+                        issues.append({
+                            'category': '508_compliance_heading_structure',
+                            'message': error,
+                            'context': f"'{para.text}'",
+                            'recommendation': f"Ensure heading levels follow a logical sequence."
+                        })
+                    previous_level = current_level
+                    previous_heading = para.text
+                except ValueError:
+                    continue
+        return issues
+    @profile_performance
+    def check_readability(self, doc: List[str]) -> DocumentCheckResult:
+        """
+        Check document readability using multiple metrics and plain language standards.
+        Args:
+            doc (List[str]): List of document paragraphs
+        Returns:
+            DocumentCheckResult: Results including readability scores and identified issues
+        """
+        if not self.validate_input(doc):
+            return DocumentCheckResult(success=False, issues=[{'error': 'Invalid document input'}])
+        issues = []
+        text_stats = {
+            'total_words': 0,
+            'total_syllables': 0,
+            'total_sentences': 0,
+            'complex_words': 0,
+            'passive_voice_count': 0
+        }
+        # Patterns for identifying passive voice
+        passive_patterns = [
+            r'\b(?:am|is|are|was|were|be|been|being)\s+\w+ed\b',
+            r'\b(?:am|is|are|was|were|be|been|being)\s+\w+en\b',
+            r'\b(?:has|have|had)\s+been\s+\w+ed\b',
+            r'\b(?:has|have|had)\s+been\s+\w+en\b'
+        ]
+        passive_regex = re.compile('|'.join(passive_patterns), re.IGNORECASE)
+        def count_syllables(word: str) -> int:
+            """Count syllables in a word using basic rules."""
+            word = word.lower()
+            count = 0
+            vowels = 'aeiouy'
+            on_vowel = False
+            for char in word:
+                is_vowel = char in vowels
+                if is_vowel and not on_vowel:
+                    count += 1
+                on_vowel = is_vowel
+            if word.endswith('e'):
+                count -= 1
+            if word.endswith('le') and len(word) > 2 and word[-3] not in vowels:
+                count += 1
+            if count == 0:
+                count = 1
+            return count
+        # Process each paragraph
+        for paragraph in doc:
+            if not paragraph.strip():
+                continue
+            # Split into sentences
+            sentences = re.split(r'(?<=[.!?])\s+', paragraph.strip())
+            text_stats['total_sentences'] += len(sentences)
+            # Check each sentence
+            for sentence in sentences:
+                # Count passive voice instances
+                if passive_regex.search(sentence):
+                    text_stats['passive_voice_count'] += 1
+                # Process words
+                words = sentence.split()
+                text_stats['total_words'] += len(words)
+                for word in words:
+                    word = re.sub(r'[^\w\s]', '', word.lower())
+                    if not word:
+                        continue
+                    syllables = count_syllables(word)
+                    text_stats['total_syllables'] += syllables
+                    if syllables >= 3:
+                        text_stats['complex_words'] += 1
+        # Calculate readability metrics
+        try:
+            # Flesch Reading Ease
+            flesch_ease = 206.835 - 1.015 * (text_stats['total_words'] / text_stats['total_sentences']) - 84.6 * (text_stats['total_syllables'] / text_stats['total_words'])
+            # Flesch-Kincaid Grade Level
+            flesch_grade = 0.39 * (text_stats['total_words'] / text_stats['total_sentences']) + 11.8 * (text_stats['total_syllables'] / text_stats['total_words']) - 15.59
+            # Gunning Fog Index
+            fog_index = 0.4 * ((text_stats['total_words'] / text_stats['total_sentences']) + 100 * (text_stats['complex_words'] / text_stats['total_words']))
+            # Calculate passive voice percentage
+            passive_percentage = (text_stats['passive_voice_count'] / text_stats['total_sentences']) * 100 if text_stats['total_sentences'] > 0 else 0
+            # Add readability summary with high-level guidance and specific issues
+            issues = []
+            if flesch_ease < 50:
+                issues.append({
+                    'type': 'readability_score',
+                    'metric': 'Flesch Reading Ease',
+                    'score': round(flesch_ease, 1),
+                    'message': 'Document may be too difficult for general audience. Consider simplifying language.'
+                })
+            if flesch_grade > 12:
+                issues.append({
+                    'type': 'readability_score',
+                    'metric': 'Flesch-Kincaid Grade Level',
+                    'score': round(flesch_grade, 1),
+                    'message': 'Reading level is above 12th grade. Consider simplifying for broader accessibility.'
+                })
+            if fog_index > 12:
+                issues.append({
+                    'type': 'readability_score',
+                    'metric': 'Gunning Fog Index',
+                    'score': round(fog_index, 1),
+                    'message': 'Text complexity may be too high. Consider using simpler words and shorter sentences.'
+                })
+            if passive_percentage > 10:
+                issues.append({
+                    'type': 'passive_voice',
+                    'percentage': round(passive_percentage, 1),
+                    'message': f'Document uses {round(passive_percentage, 1)}% passive voice (target: less than 10%). Consider using more active voice.'
+                })
+            details = {
+                'metrics': {
+                    'flesch_reading_ease': round(flesch_ease, 1),
+                    'flesch_kincaid_grade': round(flesch_grade, 1),
+                    'gunning_fog_index': round(fog_index, 1),
+                    'passive_voice_percentage': round(passive_percentage, 1)
+                }
+            }
+            return DocumentCheckResult(
+                success=len(issues) == 0,
+                issues=issues,
+                details=details
+            )
+        except Exception as e:
+            self.logger.error(f"Error calculating readability metrics: {str(e)}")
+            return DocumentCheckResult(
+                success=False,
+                issues=[{'error': f'Error calculating readability metrics: {str(e)}'}]
+            )
 class DocumentCheckResultsFormatter:
                     'before': 'See AC 25.1309-1B, System Design and Analysis, for information on X.',
                     'after': 'See AC 25.1309-1B, <i>System Design and Analysis</i>, for information on X.'
                 }
+            },
+            '508_compliance_check': {
+                'title': 'Section 508 Compliance Issues',
+                'description': 'Checks document accessibility features required by Section 508 standards: Image alt text for screen readers, heading structure issues (missing heading 1, skipped heading levels, and out of sequence headings), and hyperlink accessibility (ensuring links have meaningful descriptive text).',
+                'solution': 'Address each accessibility issue: add image alt text for screen readers, fix heading structure, and ensure hyperlinks have descriptive text that indicates their destination.',
+                'example_fix': {
+                    'before': [
+                        'Image without alt text',
+                        'Heading sequence: H1 → H2 → H4 (skipped H3)',
+                        'Link text: "click here" or "www.example.com"'
+                    ],
+                    'after': [
+                        'Image with descriptive alt text',
+                        'Proper heading sequence: H1 → H2 → H3 → H4',
+                        'Descriptive link text: "FAA Compliance Guidelines" or "Download the Safety Report"'
+                    ]
+                }
+            },
+            'hyperlink_check': {
+                'title': 'Hyperlink Issues',
+                'description': 'Checks for potentially broken or inaccessible URLs in the document. This includes checking response codes and connection issues.',
+                'solution': 'Verify each flagged URL is correct and accessible.',
+                'example_fix': {
+                    'before': 'See https://broken-link.example.com for more details.',
+                    'after': 'See https://www.faa.gov for more details.'
+                }
+            },
+            'cross_references_check': {
+                'title': 'Cross-Reference Issues',
+                'description': 'Checks for missing or invalid cross-references to paragraphs, tables, figures, and appendices within the document.',
+                'solution': 'Ensure that all referenced elements are present in the document and update or remove any incorrect references.',
+                'example_fix': {
+                    'before': 'See table 5-2 for more information. (there is no table 5-2)',
+                    'after': 'Either update the table reference or add table 5-2 if missing'
+                }
+            },
+            'readability_check': {
+                'title': 'Readability Issues',
+                'description': 'Analyzes document readability using multiple metrics including Flesch Reading Ease, Flesch-Kincaid Grade Level, and Gunning Fog Index. Also checks for passive voice usage and technical jargon.',
+                'solution': 'Simplify language, reduce passive voice, and replace technical jargon with plain language alternatives.',
+                'example_fix': {
+                    'before': 'The implementation of the procedure was facilitated by technical personnel.',
+                    'after': 'Technical staff helped start the procedure.'
+                }
             }
         }
         return formatted_issues
     def _format_reference_issues(self, result: DocumentCheckResult) -> List[str]:
+        """Format reference issues with clear, concise descriptions."""
+        formatted_issues = []
+        for issue in result.issues:
+            ref_type = issue.get('type', '')
+            ref_num = issue.get('reference', '')
+            context = issue.get('context', '').strip()
+            if context:  # Only include context if it exists
+                formatted_issues.append(
+                    f"    • Confirm {ref_type} {ref_num} referenced in '{context}' exists in the document"
+                )
+            else:
+                formatted_issues.append(
+                    f"    • Confirm {ref_type} {ref_num} exists in the document"
+                )
+        return formatted_issues
     def _format_standard_issue(self, issue: Dict[str, Any]) -> str:
         """Format standard issues consistently."""
                     output.extend(self._format_section_symbol_issues(result))
                 elif check_name == 'parentheses_check':
                     output.extend(self._format_parentheses_issues(result))
+                elif check_name == '508_compliance_check':
+                    if not result.success:
+                        # Combine all 508 compliance issues into a single list
+                        for issue in result.issues:
+                            if issue.get('category') == '508_compliance_heading_structure':
+                                output.append(f"    • {issue['message']}")
+                                if 'context' in issue:
+                                    output.append(f"      Context: {issue['context']}")
+                                if 'recommendation' in issue:
+                                    output.append(f"      Recommendation: {issue['recommendation']}")
+                            elif issue.get('category') == 'image_alt_text':
+                                if 'context' in issue:
+                                    output.append(f"    • {issue['context']}")
+                            elif issue.get('category') == 'hyperlink_accessibility':
+                                output.append(f"    • {issue.get('user_message', issue.get('message', 'No description provided'))}")
+                elif check_name == 'hyperlink_check':
+                    for issue in result.issues:
+                        output.append(f"    • {issue['message']}")
+                        if 'status_code' in issue:
+                            output.append(f"      (HTTP Status: {issue['status_code']})")
+                        elif 'error' in issue:
+                            output.append(f"      (Error: {issue['error']})")
+                elif check_name == 'cross_references_check':
+                    for issue in result.issues:
+                        output.append(f"    • Confirm {issue['type']} {issue['reference']} referenced in '{issue['context']}' exists in the document")
+                elif check_name == 'readability_check':
+                    output.extend(self._format_readability_issues(result))
                 else:
                     formatted_issues = [self._format_standard_issue(issue) for issue in result.issues[:15]]
                     output.extend(formatted_issues)
+                    if len(result.issues) > 10:
                         output.append(f"\n    ... and {len(result.issues) - 15} more similar issues.")
         return '\n'.join(output)
         except Exception as e:
             print(f"Error saving report: {e}")
+    def _format_readability_issues(self, result: DocumentCheckResult) -> List[str]:
+        """Format readability issues with clear, actionable feedback."""
+        formatted_issues = []
+        if result.details and 'metrics' in result.details:
+            metrics = result.details['metrics']
+            formatted_issues.append("\n  Readability Scores:")
+            formatted_issues.append(f"    • Flesch Reading Ease: {metrics['flesch_reading_ease']} (Aim for 50+; higher is easier to read)")
+            formatted_issues.append(f"    • Grade Level: {metrics['flesch_kincaid_grade']} (Aim for 10 or lower; 12 acceptable for technical/legal)")
+            formatted_issues.append(f"    • Gunning Fog Index: {metrics['gunning_fog_index']} (Aim for 12 or lower)")
+            formatted_issues.append(f"    • Passive Voice: {metrics['passive_voice_percentage']}% (Aim for less than 10%; use active voice for clarity)")
+        if result.issues:
+            formatted_issues.append("\n  Identified Issues:")
+            for issue in result.issues:
+                if issue['type'] == 'jargon':
+                    formatted_issues.append(
+                        f"    • Replace '{issue['word']}' with '{issue['suggestion']}' in: \"{issue['sentence']}\""
+                    )
+                elif issue['type'] in ['readability_score', 'passive_voice']:
+                    formatted_issues.append(f"    • {issue['message']}")
+        return formatted_issues
 def format_markdown_results(results: Dict[str, DocumentCheckResult], doc_type: str) -> str:
     """Format check results into a Markdown string for Gradio display."""
         'acronym_check': {'title': '📝 Acronym Definitions', 'priority': 1},
         'acronym_usage_check': {'title': '📎 Acronym Usage', 'priority': 1},
         'section_symbol_usage_check': {'title': '§ Section Symbol Usage', 'priority': 2},
+        '508_compliance_check': {'title': '🕵️‍♂️ 508 Compliance', 'priority': 2},
+        'cross_references_check': {'title': '🔗 Cross References', 'priority': 2},
+        'hyperlink_check': {'title': '🔗 Hyperlinks', 'priority': 2},
         'date_formats_check': {'title': '📅 Date Formats', 'priority': 2},
         'placeholders_check': {'title': '🚩 Placeholder Content', 'priority': 2},
         'document_title_check': {'title': '📑 Document Title Format', 'priority': 2},
         'double_period_check': {'title': '⚡ Double Periods', 'priority': 4},
         'spacing_check': {'title': '⌨️ Spacing Issues', 'priority': 4},
         'paragraph_length_check': {'title': '📏 Paragraph Length', 'priority': 5},
+        'sentence_length_check': {'title': '📏 Sentence Length', 'priority': 5},
     }
     sorted_checks = sorted(
             title = parts[0].strip()
             content = parts[1].strip()
+            # Special handling for readability metrics
+            if "Readability Issues" in title:
+                metrics_match = re.search(r'Readability Scores:(.*?)(?=Identified Issues:|$)', content, re.DOTALL)
+                issues_match = re.search(r'Identified Issues:(.*?)(?=\Z)', content, re.DOTALL)
+                metrics_html = ""
+                if metrics_match:
+                    metrics = metrics_match.group(1).strip().split('\n')
+                    metrics_html = """
+                        <div class="bg-blue-50 rounded-lg p-4 mb-4">
+                            <h3 class="font-medium text-blue-800 mb-2">📊 Readability Metrics</h3>
+                            <div class="grid grid-cols-1 md:grid-cols-2 gap-4">
+                    """
+                    for metric in metrics:
+                        if metric.strip():
+                            label, value = metric.strip('• ').split(':', 1)
+                            metrics_html += f"""
+                                <div class="flex flex-col">
+                                    <span class="text-sm text-blue-600 font-medium">{label}</span>
+                                    <span class="text-lg text-blue-900">{value}</span>
+                                </div>
+                            """
+                    metrics_html += "</div></div>"
+                issues_html_section = ""
+                if issues_match:
+                    issues_list = issues_match.group(1).strip().split('\n')
+                    if issues_list:
+                        issues_html_section = """
+                            <div class="mt-4">
+                                <h3 class="font-medium text-gray-800 mb-2">📝 Identified Issues:</h3>
+                                <ul class="list-none space-y-2">
+                        """
+                        for issue in issues_list:
+                            if issue.strip():
+                                issues_html_section += f"""
+                                    <li class="text-gray-600 ml-4">• {issue.strip('• ')}</li>
+                                """
+                        issues_html_section += "</ul></div>"
+                # Combine the readability section
+                issues_html += f"""
+                    <div class="bg-white rounded-lg shadow-sm mb-6 overflow-hidden">
+                        <div class="bg-gray-50 px-6 py-4 border-b">
+                            <h2 class="text-lg font-semibold text-gray-800">{title}</h2>
+                        </div>
+                        <div class="px-6 py-4">
+                            {metrics_html}
+                            {issues_html_section}
+                        </div>
+                    </div>
+                """
+                continue
             # Extract description and solution
             description_parts = content.split('How to fix:', 1)
             description = description_parts[0].strip()
                         <h3 class="font-medium text-gray-800 mb-2">Issues found in your document:</h3>
                         <ul class="list-none space-y-2">
                 """
+                for issue in issues_match[:7]:
                     clean_issue = issue.strip().lstrip('•').strip()
                     issues_html_section += f"""
                         <li class="text-gray-600 ml-4">• {clean_issue}</li>
                     """
+                if len(issues_match) > 7:
                     issues_html_section += f"""
                         <li class="text-gray-500 italic ml-4">... and {len(issues_match) - 7} more similar issues.</li>
                     """
                 </div>
             """
+        # Add new CSS classes for readability metrics
+        additional_styles = """
+            .bg-blue-50 { background-color: #eff6ff; }
+            .text-blue-600 { color: #2563eb; }
+            .text-blue-800 { color: #1e40af; }
+            .text-blue-900 { color: #1e3a8a; }
+            .grid { display: grid; }
+            .grid-cols-1 { grid-template-columns: repeat(1, minmax(0, 1fr)); }
+            .gap-4 { gap: 1rem; }
+            @media (min-width: 768px) {
+                .md\\:grid-cols-2 { grid-template-columns: repeat(2, minmax(0, 1fr)); }
+            }
+        """
+        # Add summary section before the final return
         summary_html = f"""
             <div class="bg-white rounded-lg shadow-sm mb-6 overflow-hidden">
                 <div class="bg-gray-50 px-6 py-4 border-b">
             </div>
         """
+        # Update the final HTML to include the summary section
         full_html = f"""
         <div class="mx-auto p-4" style="font-family: system-ui, -apple-system, sans-serif;">
             <style>
                 .text-2xl {{ font-size: 1.5rem; line-height: 2rem; }}
                 .text-lg {{ font-size: 1.125rem; }}
+                .text-sm {{ font-size: 0.875rem; }}
                 .font-bold {{ font-weight: 700; }}
                 .font-semibold {{ font-weight: 600; }}
                 .font-medium {{ font-weight: 500; }}
                 .overflow-hidden {{ overflow: hidden; }}
                 .list-none {{ list-style-type: none; }}
                 .space-y-4 > * + * {{ margin-top: 1rem; }}
+                {additional_styles}
             </style>
             {header_html}
             {issues_html}