pdfinspector / screen_reader_sim.py
rianders's picture
Fix file load errors and implement auto-refresh functionality
0d61aa0
"""
Screen Reader Simulator Module
Simulates how NVDA and JAWS would read a PDF page, supporting both
tagged (structure tree) and untagged (visual order fallback) PDFs.
"""
from typing import Dict, List, Any, Optional, Tuple
import pikepdf
from structure_tree import extract_structure_tree, StructureNode
def simulate_screen_reader(
pdf_path: str,
page_index: int,
blocks: List[Any],
reader_type: str = "NVDA",
detail_level: str = "default",
order_mode: str = "tblr"
) -> Dict[str, Any]:
"""
Simulate screen reader output for a PDF page.
Args:
pdf_path: Path to PDF file
page_index: 0-based page index
blocks: List of BlockInfo objects from extract_blocks_spans
reader_type: "NVDA" or "JAWS"
detail_level: "minimal", "default", or "verbose"
order_mode: Reading order mode for untagged fallback ("raw", "tblr", "columns")
Returns:
Dictionary with transcript, analysis, and metadata
"""
# Try tagged approach first
root = extract_structure_tree(pdf_path)
if root:
# Use structure tree
transcript, analysis = _simulate_tagged(
root, page_index, reader_type, detail_level
)
mode = "tagged"
else:
# Fallback to visual order
transcript, analysis = _simulate_untagged(
blocks, reader_type, detail_level, order_mode
)
mode = "untagged"
return {
'transcript': transcript,
'analysis': analysis,
'mode': mode,
'reader_type': reader_type,
'detail_level': detail_level
}
def _simulate_tagged(
root: StructureNode,
page_index: int,
reader_type: str,
detail_level: str
) -> Tuple[str, str]:
"""
Simulate screen reader for tagged PDF using structure tree.
Args:
root: Root StructureNode
page_index: Page to simulate (0-based)
reader_type: "NVDA" or "JAWS"
detail_level: Detail level
Returns:
Tuple of (transcript, analysis)
"""
# Collect structure elements for this page
page_elements = []
def _collect_page_elements(node: StructureNode):
# Include node if it's for this page or has no page ref (document-level)
if node.page_ref is None or node.page_ref == page_index:
if node.tag_type not in ['StructTreeRoot', 'MCID']:
page_elements.append(node)
for child in node.children:
_collect_page_elements(child)
_collect_page_elements(root)
# Generate transcript
transcript_lines = []
element_count = 0
for element in page_elements:
announcement = _format_element_announcement(
element, reader_type, detail_level
)
if announcement:
transcript_lines.append(announcement)
element_count += 1
transcript = '\n\n'.join(transcript_lines)
# Generate analysis
analysis_lines = [
"## Screen Reader Analysis (Tagged Mode)",
"",
f"**Structure**: This page uses PDF tagging (accessible structure tree)",
f"**Elements Found**: {element_count}",
""
]
# Count element types
tag_counts = {}
for element in page_elements:
tag_counts[element.tag_type] = tag_counts.get(element.tag_type, 0) + 1
if tag_counts:
analysis_lines.extend([
"### Element Types",
""
])
for tag, count in sorted(tag_counts.items()):
analysis_lines.append(f"- **{tag}**: {count}")
# Check for alt text coverage
elements_needing_alt = [e for e in page_elements if e.tag_type in ['Figure', 'Formula', 'Artifact']]
elements_with_alt = [e for e in elements_needing_alt if e.alt_text]
if elements_needing_alt:
coverage = len(elements_with_alt) / len(elements_needing_alt) * 100
analysis_lines.extend([
"",
"### Alt Text Coverage",
"",
f"**Elements needing alt text**: {len(elements_needing_alt)}",
f"**Elements with alt text**: {len(elements_with_alt)}",
f"**Coverage**: {coverage:.1f}%",
""
])
if coverage < 100:
analysis_lines.append("⚠️ Some elements are missing alt text")
analysis = '\n'.join(analysis_lines)
return transcript, analysis
def _simulate_untagged(
blocks: List[Any],
reader_type: str,
detail_level: str,
order_mode: str
) -> Tuple[str, str]:
"""
Simulate screen reader for untagged PDF using visual order.
Args:
blocks: List of BlockInfo objects
reader_type: "NVDA" or "JAWS"
detail_level: Detail level
order_mode: Reading order mode
Returns:
Tuple of (transcript, analysis)
"""
from layout_utils import order_blocks # Import the ordering function
# Order blocks according to mode
ordered_blocks = order_blocks(blocks, order_mode)
# Generate transcript
transcript_lines = []
text_block_count = 0
image_block_count = 0
for idx, block in ordered_blocks:
if block.block_type == 0: # Text block
# Infer heading from font size
is_heading = False
heading_level = None
if block.spans:
avg_size = sum(s.size for s in block.spans) / len(block.spans)
if avg_size > 18:
is_heading = True
heading_level = 1
elif avg_size > 14:
is_heading = True
heading_level = 2
# Format announcement
if is_heading and detail_level != "minimal":
if reader_type == "NVDA":
transcript_lines.append(f"Heading level {heading_level}")
transcript_lines.append(block.text.strip())
else: # JAWS
transcript_lines.append(f"Heading {heading_level}: {block.text.strip()}")
else:
transcript_lines.append(block.text.strip())
text_block_count += 1
elif block.block_type == 1: # Image block
if detail_level != "minimal":
transcript_lines.append("[Image - no alt text available]")
image_block_count += 1
transcript = '\n\n'.join(transcript_lines)
# Generate analysis
analysis_lines = [
"## Screen Reader Analysis (Untagged Mode)",
"",
"⚠️ **No Structure**: This page does not use PDF tagging",
"",
"Screen readers will read text in visual order with limited context.",
"",
f"**Reading Order Mode**: {order_mode}",
f"**Text Blocks**: {text_block_count}",
f"**Images**: {image_block_count}",
"",
"### Limitations",
"",
"- No semantic information (headings, lists, tables)",
"- No alt text for images",
"- Reading order may not match intended flow",
"- Navigation by elements not possible",
"",
"**Recommendation**: Add PDF tagging for better accessibility"
]
analysis = '\n'.join(analysis_lines)
return transcript, analysis
def _format_element_announcement(
element: StructureNode,
reader_type: str,
detail_level: str
) -> Optional[str]:
"""
Format a structure element as a screen reader announcement.
Args:
element: StructureNode to announce
reader_type: "NVDA" or "JAWS"
detail_level: "minimal", "default", or "verbose"
Returns:
Formatted announcement string or None
"""
tag = element.tag_type
lines = []
# Map PDF tag types to screen reader announcements
if tag.startswith('H'):
# Heading
level = tag[1:] if len(tag) > 1 else '1'
text = element.actual_text or "[Heading]"
if detail_level == "minimal":
return text
if reader_type == "NVDA":
lines.append(f"Heading level {level}")
lines.append(text)
else: # JAWS
lines.append(f"Heading {level}: {text}")
elif tag == 'P':
# Paragraph
text = element.actual_text or "[Paragraph]"
if detail_level == "minimal":
return text
if detail_level == "verbose":
if reader_type == "NVDA":
lines.append("Paragraph")
lines.append(text)
if reader_type == "NVDA" and detail_level == "verbose":
lines.append("Out of paragraph")
else:
lines.append(text)
elif tag == 'Figure':
# Figure/Image
alt_text = element.alt_text or "[Image - no alt text]"
if detail_level == "minimal":
return None
if reader_type == "NVDA":
lines.append("Graphic")
lines.append(alt_text)
else: # JAWS
lines.append(f"Graphic: {alt_text}")
elif tag == 'Formula':
# Math formula
alt_text = element.alt_text or element.actual_text or "[Formula]"
if detail_level == "minimal":
return alt_text
if reader_type == "NVDA":
lines.append("Formula")
lines.append(alt_text)
else: # JAWS
lines.append(f"Formula: {alt_text}")
elif tag in ['L', 'LI']:
# List/List Item
text = element.actual_text or "[List item]"
if detail_level == "minimal":
return text
if tag == 'L' and detail_level == "verbose":
lines.append("List start")
else:
if reader_type == "NVDA":
lines.append("List item")
lines.append(text)
else: # JAWS
lines.append(f"Bullet: {text}")
elif tag == 'Table':
# Table
if detail_level != "minimal":
if reader_type == "NVDA":
lines.append("Table")
else: # JAWS
lines.append("Table start")
elif tag in ['TR', 'TD', 'TH']:
# Table row/cell
text = element.actual_text or ""
if text and detail_level != "minimal":
lines.append(text)
elif tag == 'Link':
# Link
text = element.actual_text or "[Link]"
if detail_level == "minimal":
return text
if reader_type == "NVDA":
lines.append("Link")
lines.append(text)
else: # JAWS
lines.append(f"Link: {text}")
elif tag == 'Span':
# Inline text
text = element.actual_text or ""
if text:
return text
elif tag in ['Document', 'Part', 'Sect', 'Div', 'Art']:
# Container elements - usually not announced
return None
else:
# Unknown tag type
if element.actual_text:
return element.actual_text
if lines:
return '\n'.join(lines)
return None
def format_transcript(result: Dict[str, Any]) -> str:
"""
Format screen reader transcript for display.
Args:
result: Result from simulate_screen_reader
Returns:
Formatted transcript string
"""
header = f"# {result['reader_type']} Transcript ({result['detail_level']} detail)\n\n"
if result['mode'] == 'untagged':
header += "⚠️ Simulated from visual order (PDF not tagged)\n\n"
header += "---\n\n"
return header + result['transcript']