Spaces:
Sleeping
Sleeping
| """ | |
| Screen Reader Simulator Module | |
| Simulates how NVDA and JAWS would read a PDF page, supporting both | |
| tagged (structure tree) and untagged (visual order fallback) PDFs. | |
| """ | |
| from typing import Dict, List, Any, Optional, Tuple | |
| import pikepdf | |
| from structure_tree import extract_structure_tree, StructureNode | |
| def simulate_screen_reader( | |
| pdf_path: str, | |
| page_index: int, | |
| blocks: List[Any], | |
| reader_type: str = "NVDA", | |
| detail_level: str = "default", | |
| order_mode: str = "tblr" | |
| ) -> Dict[str, Any]: | |
| """ | |
| Simulate screen reader output for a PDF page. | |
| Args: | |
| pdf_path: Path to PDF file | |
| page_index: 0-based page index | |
| blocks: List of BlockInfo objects from extract_blocks_spans | |
| reader_type: "NVDA" or "JAWS" | |
| detail_level: "minimal", "default", or "verbose" | |
| order_mode: Reading order mode for untagged fallback ("raw", "tblr", "columns") | |
| Returns: | |
| Dictionary with transcript, analysis, and metadata | |
| """ | |
| # Try tagged approach first | |
| root = extract_structure_tree(pdf_path) | |
| if root: | |
| # Use structure tree | |
| transcript, analysis = _simulate_tagged( | |
| root, page_index, reader_type, detail_level | |
| ) | |
| mode = "tagged" | |
| else: | |
| # Fallback to visual order | |
| transcript, analysis = _simulate_untagged( | |
| blocks, reader_type, detail_level, order_mode | |
| ) | |
| mode = "untagged" | |
| return { | |
| 'transcript': transcript, | |
| 'analysis': analysis, | |
| 'mode': mode, | |
| 'reader_type': reader_type, | |
| 'detail_level': detail_level | |
| } | |
| def _simulate_tagged( | |
| root: StructureNode, | |
| page_index: int, | |
| reader_type: str, | |
| detail_level: str | |
| ) -> Tuple[str, str]: | |
| """ | |
| Simulate screen reader for tagged PDF using structure tree. | |
| Args: | |
| root: Root StructureNode | |
| page_index: Page to simulate (0-based) | |
| reader_type: "NVDA" or "JAWS" | |
| detail_level: Detail level | |
| Returns: | |
| Tuple of (transcript, analysis) | |
| """ | |
| # Collect structure elements for this page | |
| page_elements = [] | |
| def _collect_page_elements(node: StructureNode): | |
| # Include node if it's for this page or has no page ref (document-level) | |
| if node.page_ref is None or node.page_ref == page_index: | |
| if node.tag_type not in ['StructTreeRoot', 'MCID']: | |
| page_elements.append(node) | |
| for child in node.children: | |
| _collect_page_elements(child) | |
| _collect_page_elements(root) | |
| # Generate transcript | |
| transcript_lines = [] | |
| element_count = 0 | |
| for element in page_elements: | |
| announcement = _format_element_announcement( | |
| element, reader_type, detail_level | |
| ) | |
| if announcement: | |
| transcript_lines.append(announcement) | |
| element_count += 1 | |
| transcript = '\n\n'.join(transcript_lines) | |
| # Generate analysis | |
| analysis_lines = [ | |
| "## Screen Reader Analysis (Tagged Mode)", | |
| "", | |
| f"**Structure**: This page uses PDF tagging (accessible structure tree)", | |
| f"**Elements Found**: {element_count}", | |
| "" | |
| ] | |
| # Count element types | |
| tag_counts = {} | |
| for element in page_elements: | |
| tag_counts[element.tag_type] = tag_counts.get(element.tag_type, 0) + 1 | |
| if tag_counts: | |
| analysis_lines.extend([ | |
| "### Element Types", | |
| "" | |
| ]) | |
| for tag, count in sorted(tag_counts.items()): | |
| analysis_lines.append(f"- **{tag}**: {count}") | |
| # Check for alt text coverage | |
| elements_needing_alt = [e for e in page_elements if e.tag_type in ['Figure', 'Formula', 'Artifact']] | |
| elements_with_alt = [e for e in elements_needing_alt if e.alt_text] | |
| if elements_needing_alt: | |
| coverage = len(elements_with_alt) / len(elements_needing_alt) * 100 | |
| analysis_lines.extend([ | |
| "", | |
| "### Alt Text Coverage", | |
| "", | |
| f"**Elements needing alt text**: {len(elements_needing_alt)}", | |
| f"**Elements with alt text**: {len(elements_with_alt)}", | |
| f"**Coverage**: {coverage:.1f}%", | |
| "" | |
| ]) | |
| if coverage < 100: | |
| analysis_lines.append("⚠️ Some elements are missing alt text") | |
| analysis = '\n'.join(analysis_lines) | |
| return transcript, analysis | |
| def _simulate_untagged( | |
| blocks: List[Any], | |
| reader_type: str, | |
| detail_level: str, | |
| order_mode: str | |
| ) -> Tuple[str, str]: | |
| """ | |
| Simulate screen reader for untagged PDF using visual order. | |
| Args: | |
| blocks: List of BlockInfo objects | |
| reader_type: "NVDA" or "JAWS" | |
| detail_level: Detail level | |
| order_mode: Reading order mode | |
| Returns: | |
| Tuple of (transcript, analysis) | |
| """ | |
| from layout_utils import order_blocks # Import the ordering function | |
| # Order blocks according to mode | |
| ordered_blocks = order_blocks(blocks, order_mode) | |
| # Generate transcript | |
| transcript_lines = [] | |
| text_block_count = 0 | |
| image_block_count = 0 | |
| for idx, block in ordered_blocks: | |
| if block.block_type == 0: # Text block | |
| # Infer heading from font size | |
| is_heading = False | |
| heading_level = None | |
| if block.spans: | |
| avg_size = sum(s.size for s in block.spans) / len(block.spans) | |
| if avg_size > 18: | |
| is_heading = True | |
| heading_level = 1 | |
| elif avg_size > 14: | |
| is_heading = True | |
| heading_level = 2 | |
| # Format announcement | |
| if is_heading and detail_level != "minimal": | |
| if reader_type == "NVDA": | |
| transcript_lines.append(f"Heading level {heading_level}") | |
| transcript_lines.append(block.text.strip()) | |
| else: # JAWS | |
| transcript_lines.append(f"Heading {heading_level}: {block.text.strip()}") | |
| else: | |
| transcript_lines.append(block.text.strip()) | |
| text_block_count += 1 | |
| elif block.block_type == 1: # Image block | |
| if detail_level != "minimal": | |
| transcript_lines.append("[Image - no alt text available]") | |
| image_block_count += 1 | |
| transcript = '\n\n'.join(transcript_lines) | |
| # Generate analysis | |
| analysis_lines = [ | |
| "## Screen Reader Analysis (Untagged Mode)", | |
| "", | |
| "⚠️ **No Structure**: This page does not use PDF tagging", | |
| "", | |
| "Screen readers will read text in visual order with limited context.", | |
| "", | |
| f"**Reading Order Mode**: {order_mode}", | |
| f"**Text Blocks**: {text_block_count}", | |
| f"**Images**: {image_block_count}", | |
| "", | |
| "### Limitations", | |
| "", | |
| "- No semantic information (headings, lists, tables)", | |
| "- No alt text for images", | |
| "- Reading order may not match intended flow", | |
| "- Navigation by elements not possible", | |
| "", | |
| "**Recommendation**: Add PDF tagging for better accessibility" | |
| ] | |
| analysis = '\n'.join(analysis_lines) | |
| return transcript, analysis | |
| def _format_element_announcement( | |
| element: StructureNode, | |
| reader_type: str, | |
| detail_level: str | |
| ) -> Optional[str]: | |
| """ | |
| Format a structure element as a screen reader announcement. | |
| Args: | |
| element: StructureNode to announce | |
| reader_type: "NVDA" or "JAWS" | |
| detail_level: "minimal", "default", or "verbose" | |
| Returns: | |
| Formatted announcement string or None | |
| """ | |
| tag = element.tag_type | |
| lines = [] | |
| # Map PDF tag types to screen reader announcements | |
| if tag.startswith('H'): | |
| # Heading | |
| level = tag[1:] if len(tag) > 1 else '1' | |
| text = element.actual_text or "[Heading]" | |
| if detail_level == "minimal": | |
| return text | |
| if reader_type == "NVDA": | |
| lines.append(f"Heading level {level}") | |
| lines.append(text) | |
| else: # JAWS | |
| lines.append(f"Heading {level}: {text}") | |
| elif tag == 'P': | |
| # Paragraph | |
| text = element.actual_text or "[Paragraph]" | |
| if detail_level == "minimal": | |
| return text | |
| if detail_level == "verbose": | |
| if reader_type == "NVDA": | |
| lines.append("Paragraph") | |
| lines.append(text) | |
| if reader_type == "NVDA" and detail_level == "verbose": | |
| lines.append("Out of paragraph") | |
| else: | |
| lines.append(text) | |
| elif tag == 'Figure': | |
| # Figure/Image | |
| alt_text = element.alt_text or "[Image - no alt text]" | |
| if detail_level == "minimal": | |
| return None | |
| if reader_type == "NVDA": | |
| lines.append("Graphic") | |
| lines.append(alt_text) | |
| else: # JAWS | |
| lines.append(f"Graphic: {alt_text}") | |
| elif tag == 'Formula': | |
| # Math formula | |
| alt_text = element.alt_text or element.actual_text or "[Formula]" | |
| if detail_level == "minimal": | |
| return alt_text | |
| if reader_type == "NVDA": | |
| lines.append("Formula") | |
| lines.append(alt_text) | |
| else: # JAWS | |
| lines.append(f"Formula: {alt_text}") | |
| elif tag in ['L', 'LI']: | |
| # List/List Item | |
| text = element.actual_text or "[List item]" | |
| if detail_level == "minimal": | |
| return text | |
| if tag == 'L' and detail_level == "verbose": | |
| lines.append("List start") | |
| else: | |
| if reader_type == "NVDA": | |
| lines.append("List item") | |
| lines.append(text) | |
| else: # JAWS | |
| lines.append(f"Bullet: {text}") | |
| elif tag == 'Table': | |
| # Table | |
| if detail_level != "minimal": | |
| if reader_type == "NVDA": | |
| lines.append("Table") | |
| else: # JAWS | |
| lines.append("Table start") | |
| elif tag in ['TR', 'TD', 'TH']: | |
| # Table row/cell | |
| text = element.actual_text or "" | |
| if text and detail_level != "minimal": | |
| lines.append(text) | |
| elif tag == 'Link': | |
| # Link | |
| text = element.actual_text or "[Link]" | |
| if detail_level == "minimal": | |
| return text | |
| if reader_type == "NVDA": | |
| lines.append("Link") | |
| lines.append(text) | |
| else: # JAWS | |
| lines.append(f"Link: {text}") | |
| elif tag == 'Span': | |
| # Inline text | |
| text = element.actual_text or "" | |
| if text: | |
| return text | |
| elif tag in ['Document', 'Part', 'Sect', 'Div', 'Art']: | |
| # Container elements - usually not announced | |
| return None | |
| else: | |
| # Unknown tag type | |
| if element.actual_text: | |
| return element.actual_text | |
| if lines: | |
| return '\n'.join(lines) | |
| return None | |
| def format_transcript(result: Dict[str, Any]) -> str: | |
| """ | |
| Format screen reader transcript for display. | |
| Args: | |
| result: Result from simulate_screen_reader | |
| Returns: | |
| Formatted transcript string | |
| """ | |
| header = f"# {result['reader_type']} Transcript ({result['detail_level']} detail)\n\n" | |
| if result['mode'] == 'untagged': | |
| header += "⚠️ Simulated from visual order (PDF not tagged)\n\n" | |
| header += "---\n\n" | |
| return header + result['transcript'] | |