""" Screen Reader Simulator Module Simulates how NVDA and JAWS would read a PDF page, supporting both tagged (structure tree) and untagged (visual order fallback) PDFs. """ from typing import Dict, List, Any, Optional, Tuple import pikepdf from structure_tree import extract_structure_tree, StructureNode def simulate_screen_reader( pdf_path: str, page_index: int, blocks: List[Any], reader_type: str = "NVDA", detail_level: str = "default", order_mode: str = "tblr" ) -> Dict[str, Any]: """ Simulate screen reader output for a PDF page. Args: pdf_path: Path to PDF file page_index: 0-based page index blocks: List of BlockInfo objects from extract_blocks_spans reader_type: "NVDA" or "JAWS" detail_level: "minimal", "default", or "verbose" order_mode: Reading order mode for untagged fallback ("raw", "tblr", "columns") Returns: Dictionary with transcript, analysis, and metadata """ # Try tagged approach first root = extract_structure_tree(pdf_path) if root: # Use structure tree transcript, analysis = _simulate_tagged( root, page_index, reader_type, detail_level ) mode = "tagged" else: # Fallback to visual order transcript, analysis = _simulate_untagged( blocks, reader_type, detail_level, order_mode ) mode = "untagged" return { 'transcript': transcript, 'analysis': analysis, 'mode': mode, 'reader_type': reader_type, 'detail_level': detail_level } def _simulate_tagged( root: StructureNode, page_index: int, reader_type: str, detail_level: str ) -> Tuple[str, str]: """ Simulate screen reader for tagged PDF using structure tree. Args: root: Root StructureNode page_index: Page to simulate (0-based) reader_type: "NVDA" or "JAWS" detail_level: Detail level Returns: Tuple of (transcript, analysis) """ # Collect structure elements for this page page_elements = [] def _collect_page_elements(node: StructureNode): # Include node if it's for this page or has no page ref (document-level) if node.page_ref is None or node.page_ref == page_index: if node.tag_type not in ['StructTreeRoot', 'MCID']: page_elements.append(node) for child in node.children: _collect_page_elements(child) _collect_page_elements(root) # Generate transcript transcript_lines = [] element_count = 0 for element in page_elements: announcement = _format_element_announcement( element, reader_type, detail_level ) if announcement: transcript_lines.append(announcement) element_count += 1 transcript = '\n\n'.join(transcript_lines) # Generate analysis analysis_lines = [ "## Screen Reader Analysis (Tagged Mode)", "", f"**Structure**: This page uses PDF tagging (accessible structure tree)", f"**Elements Found**: {element_count}", "" ] # Count element types tag_counts = {} for element in page_elements: tag_counts[element.tag_type] = tag_counts.get(element.tag_type, 0) + 1 if tag_counts: analysis_lines.extend([ "### Element Types", "" ]) for tag, count in sorted(tag_counts.items()): analysis_lines.append(f"- **{tag}**: {count}") # Check for alt text coverage elements_needing_alt = [e for e in page_elements if e.tag_type in ['Figure', 'Formula', 'Artifact']] elements_with_alt = [e for e in elements_needing_alt if e.alt_text] if elements_needing_alt: coverage = len(elements_with_alt) / len(elements_needing_alt) * 100 analysis_lines.extend([ "", "### Alt Text Coverage", "", f"**Elements needing alt text**: {len(elements_needing_alt)}", f"**Elements with alt text**: {len(elements_with_alt)}", f"**Coverage**: {coverage:.1f}%", "" ]) if coverage < 100: analysis_lines.append("⚠️ Some elements are missing alt text") analysis = '\n'.join(analysis_lines) return transcript, analysis def _simulate_untagged( blocks: List[Any], reader_type: str, detail_level: str, order_mode: str ) -> Tuple[str, str]: """ Simulate screen reader for untagged PDF using visual order. Args: blocks: List of BlockInfo objects reader_type: "NVDA" or "JAWS" detail_level: Detail level order_mode: Reading order mode Returns: Tuple of (transcript, analysis) """ from layout_utils import order_blocks # Import the ordering function # Order blocks according to mode ordered_blocks = order_blocks(blocks, order_mode) # Generate transcript transcript_lines = [] text_block_count = 0 image_block_count = 0 for idx, block in ordered_blocks: if block.block_type == 0: # Text block # Infer heading from font size is_heading = False heading_level = None if block.spans: avg_size = sum(s.size for s in block.spans) / len(block.spans) if avg_size > 18: is_heading = True heading_level = 1 elif avg_size > 14: is_heading = True heading_level = 2 # Format announcement if is_heading and detail_level != "minimal": if reader_type == "NVDA": transcript_lines.append(f"Heading level {heading_level}") transcript_lines.append(block.text.strip()) else: # JAWS transcript_lines.append(f"Heading {heading_level}: {block.text.strip()}") else: transcript_lines.append(block.text.strip()) text_block_count += 1 elif block.block_type == 1: # Image block if detail_level != "minimal": transcript_lines.append("[Image - no alt text available]") image_block_count += 1 transcript = '\n\n'.join(transcript_lines) # Generate analysis analysis_lines = [ "## Screen Reader Analysis (Untagged Mode)", "", "⚠️ **No Structure**: This page does not use PDF tagging", "", "Screen readers will read text in visual order with limited context.", "", f"**Reading Order Mode**: {order_mode}", f"**Text Blocks**: {text_block_count}", f"**Images**: {image_block_count}", "", "### Limitations", "", "- No semantic information (headings, lists, tables)", "- No alt text for images", "- Reading order may not match intended flow", "- Navigation by elements not possible", "", "**Recommendation**: Add PDF tagging for better accessibility" ] analysis = '\n'.join(analysis_lines) return transcript, analysis def _format_element_announcement( element: StructureNode, reader_type: str, detail_level: str ) -> Optional[str]: """ Format a structure element as a screen reader announcement. Args: element: StructureNode to announce reader_type: "NVDA" or "JAWS" detail_level: "minimal", "default", or "verbose" Returns: Formatted announcement string or None """ tag = element.tag_type lines = [] # Map PDF tag types to screen reader announcements if tag.startswith('H'): # Heading level = tag[1:] if len(tag) > 1 else '1' text = element.actual_text or "[Heading]" if detail_level == "minimal": return text if reader_type == "NVDA": lines.append(f"Heading level {level}") lines.append(text) else: # JAWS lines.append(f"Heading {level}: {text}") elif tag == 'P': # Paragraph text = element.actual_text or "[Paragraph]" if detail_level == "minimal": return text if detail_level == "verbose": if reader_type == "NVDA": lines.append("Paragraph") lines.append(text) if reader_type == "NVDA" and detail_level == "verbose": lines.append("Out of paragraph") else: lines.append(text) elif tag == 'Figure': # Figure/Image alt_text = element.alt_text or "[Image - no alt text]" if detail_level == "minimal": return None if reader_type == "NVDA": lines.append("Graphic") lines.append(alt_text) else: # JAWS lines.append(f"Graphic: {alt_text}") elif tag == 'Formula': # Math formula alt_text = element.alt_text or element.actual_text or "[Formula]" if detail_level == "minimal": return alt_text if reader_type == "NVDA": lines.append("Formula") lines.append(alt_text) else: # JAWS lines.append(f"Formula: {alt_text}") elif tag in ['L', 'LI']: # List/List Item text = element.actual_text or "[List item]" if detail_level == "minimal": return text if tag == 'L' and detail_level == "verbose": lines.append("List start") else: if reader_type == "NVDA": lines.append("List item") lines.append(text) else: # JAWS lines.append(f"Bullet: {text}") elif tag == 'Table': # Table if detail_level != "minimal": if reader_type == "NVDA": lines.append("Table") else: # JAWS lines.append("Table start") elif tag in ['TR', 'TD', 'TH']: # Table row/cell text = element.actual_text or "" if text and detail_level != "minimal": lines.append(text) elif tag == 'Link': # Link text = element.actual_text or "[Link]" if detail_level == "minimal": return text if reader_type == "NVDA": lines.append("Link") lines.append(text) else: # JAWS lines.append(f"Link: {text}") elif tag == 'Span': # Inline text text = element.actual_text or "" if text: return text elif tag in ['Document', 'Part', 'Sect', 'Div', 'Art']: # Container elements - usually not announced return None else: # Unknown tag type if element.actual_text: return element.actual_text if lines: return '\n'.join(lines) return None def format_transcript(result: Dict[str, Any]) -> str: """ Format screen reader transcript for display. Args: result: Result from simulate_screen_reader Returns: Formatted transcript string """ header = f"# {result['reader_type']} Transcript ({result['detail_level']} detail)\n\n" if result['mode'] == 'untagged': header += "⚠️ Simulated from visual order (PDF not tagged)\n\n" header += "---\n\n" return header + result['transcript']