Spaces:

rianders
/

pdfinspector

Sleeping

App Files Files Community

pdfinspector / screen_reader_sim.py

rianders

Fix file load errors and implement auto-refresh functionality

0d61aa0 about 2 months ago

raw

history blame contribute delete

11.4 kB

	"""
	Screen Reader Simulator Module

	Simulates how NVDA and JAWS would read a PDF page, supporting both
	tagged (structure tree) and untagged (visual order fallback) PDFs.
	"""

	from typing import Dict, List, Any, Optional, Tuple
	import pikepdf
	from structure_tree import extract_structure_tree, StructureNode


	def simulate_screen_reader(
	pdf_path: str,
	page_index: int,
	blocks: List[Any],
	reader_type: str = "NVDA",
	detail_level: str = "default",
	order_mode: str = "tblr"
	) -> Dict[str, Any]:
	"""
	Simulate screen reader output for a PDF page.

	Args:
	pdf_path: Path to PDF file
	page_index: 0-based page index
	blocks: List of BlockInfo objects from extract_blocks_spans
	reader_type: "NVDA" or "JAWS"
	detail_level: "minimal", "default", or "verbose"
	order_mode: Reading order mode for untagged fallback ("raw", "tblr", "columns")

	Returns:
	Dictionary with transcript, analysis, and metadata
	"""
	# Try tagged approach first
	root = extract_structure_tree(pdf_path)

	if root:
	# Use structure tree
	transcript, analysis = _simulate_tagged(
	root, page_index, reader_type, detail_level
	)
	mode = "tagged"
	else:
	# Fallback to visual order
	transcript, analysis = _simulate_untagged(
	blocks, reader_type, detail_level, order_mode
	)
	mode = "untagged"

	return {
	'transcript': transcript,
	'analysis': analysis,
	'mode': mode,
	'reader_type': reader_type,
	'detail_level': detail_level
	}


	def _simulate_tagged(
	root: StructureNode,
	page_index: int,
	reader_type: str,
	detail_level: str
	) -> Tuple[str, str]:
	"""
	Simulate screen reader for tagged PDF using structure tree.

	Args:
	root: Root StructureNode
	page_index: Page to simulate (0-based)
	reader_type: "NVDA" or "JAWS"
	detail_level: Detail level

	Returns:
	Tuple of (transcript, analysis)
	"""
	# Collect structure elements for this page
	page_elements = []

	def _collect_page_elements(node: StructureNode):
	# Include node if it's for this page or has no page ref (document-level)
	if node.page_ref is None or node.page_ref == page_index:
	if node.tag_type not in ['StructTreeRoot', 'MCID']:
	page_elements.append(node)

	for child in node.children:
	_collect_page_elements(child)

	_collect_page_elements(root)

	# Generate transcript
	transcript_lines = []
	element_count = 0

	for element in page_elements:
	announcement = _format_element_announcement(
	element, reader_type, detail_level
	)
	if announcement:
	transcript_lines.append(announcement)
	element_count += 1

	transcript = '\n\n'.join(transcript_lines)

	# Generate analysis
	analysis_lines = [
	"## Screen Reader Analysis (Tagged Mode)",
	"",
	f"Structure: This page uses PDF tagging (accessible structure tree)",
	f"Elements Found: {element_count}",
	""
	]

	# Count element types
	tag_counts = {}
	for element in page_elements:
	tag_counts[element.tag_type] = tag_counts.get(element.tag_type, 0) + 1

	if tag_counts:
	analysis_lines.extend([
	"### Element Types",
	""
	])
	for tag, count in sorted(tag_counts.items()):
	analysis_lines.append(f"- {tag}: {count}")

	# Check for alt text coverage
	elements_needing_alt = [e for e in page_elements if e.tag_type in ['Figure', 'Formula', 'Artifact']]
	elements_with_alt = [e for e in elements_needing_alt if e.alt_text]

	if elements_needing_alt:
	coverage = len(elements_with_alt) / len(elements_needing_alt) * 100
	analysis_lines.extend([
	"",
	"### Alt Text Coverage",
	"",
	f"Elements needing alt text: {len(elements_needing_alt)}",
	f"Elements with alt text: {len(elements_with_alt)}",
	f"Coverage: {coverage:.1f}%",
	""
	])

	if coverage < 100:
	analysis_lines.append("⚠️ Some elements are missing alt text")

	analysis = '\n'.join(analysis_lines)

	return transcript, analysis


	def _simulate_untagged(
	blocks: List[Any],
	reader_type: str,
	detail_level: str,
	order_mode: str
	) -> Tuple[str, str]:
	"""
	Simulate screen reader for untagged PDF using visual order.

	Args:
	blocks: List of BlockInfo objects
	reader_type: "NVDA" or "JAWS"
	detail_level: Detail level
	order_mode: Reading order mode

	Returns:
	Tuple of (transcript, analysis)
	"""
	from layout_utils import order_blocks # Import the ordering function

	# Order blocks according to mode
	ordered_blocks = order_blocks(blocks, order_mode)

	# Generate transcript
	transcript_lines = []
	text_block_count = 0
	image_block_count = 0

	for idx, block in ordered_blocks:
	if block.block_type == 0: # Text block
	# Infer heading from font size
	is_heading = False
	heading_level = None

	if block.spans:
	avg_size = sum(s.size for s in block.spans) / len(block.spans)
	if avg_size > 18:
	is_heading = True
	heading_level = 1
	elif avg_size > 14:
	is_heading = True
	heading_level = 2

	# Format announcement
	if is_heading and detail_level != "minimal":
	if reader_type == "NVDA":
	transcript_lines.append(f"Heading level {heading_level}")
	transcript_lines.append(block.text.strip())
	else: # JAWS
	transcript_lines.append(f"Heading {heading_level}: {block.text.strip()}")
	else:
	transcript_lines.append(block.text.strip())

	text_block_count += 1

	elif block.block_type == 1: # Image block
	if detail_level != "minimal":
	transcript_lines.append("[Image - no alt text available]")
	image_block_count += 1

	transcript = '\n\n'.join(transcript_lines)

	# Generate analysis
	analysis_lines = [
	"## Screen Reader Analysis (Untagged Mode)",
	"",
	"⚠️ No Structure: This page does not use PDF tagging",
	"",
	"Screen readers will read text in visual order with limited context.",
	"",
	f"Reading Order Mode: {order_mode}",
	f"Text Blocks: {text_block_count}",
	f"Images: {image_block_count}",
	"",
	"### Limitations",
	"",
	"- No semantic information (headings, lists, tables)",
	"- No alt text for images",
	"- Reading order may not match intended flow",
	"- Navigation by elements not possible",
	"",
	"Recommendation: Add PDF tagging for better accessibility"
	]

	analysis = '\n'.join(analysis_lines)

	return transcript, analysis


	def _format_element_announcement(
	element: StructureNode,
	reader_type: str,
	detail_level: str
	) -> Optional[str]:
	"""
	Format a structure element as a screen reader announcement.

	Args:
	element: StructureNode to announce
	reader_type: "NVDA" or "JAWS"
	detail_level: "minimal", "default", or "verbose"

	Returns:
	Formatted announcement string or None
	"""
	tag = element.tag_type
	lines = []

	# Map PDF tag types to screen reader announcements
	if tag.startswith('H'):
	# Heading
	level = tag[1:] if len(tag) > 1 else '1'
	text = element.actual_text or "[Heading]"

	if detail_level == "minimal":
	return text

	if reader_type == "NVDA":
	lines.append(f"Heading level {level}")
	lines.append(text)
	else: # JAWS
	lines.append(f"Heading {level}: {text}")

	elif tag == 'P':
	# Paragraph
	text = element.actual_text or "[Paragraph]"

	if detail_level == "minimal":
	return text

	if detail_level == "verbose":
	if reader_type == "NVDA":
	lines.append("Paragraph")
	lines.append(text)
	if reader_type == "NVDA" and detail_level == "verbose":
	lines.append("Out of paragraph")
	else:
	lines.append(text)

	elif tag == 'Figure':
	# Figure/Image
	alt_text = element.alt_text or "[Image - no alt text]"

	if detail_level == "minimal":
	return None

	if reader_type == "NVDA":
	lines.append("Graphic")
	lines.append(alt_text)
	else: # JAWS
	lines.append(f"Graphic: {alt_text}")

	elif tag == 'Formula':
	# Math formula
	alt_text = element.alt_text or element.actual_text or "[Formula]"

	if detail_level == "minimal":
	return alt_text

	if reader_type == "NVDA":
	lines.append("Formula")
	lines.append(alt_text)
	else: # JAWS
	lines.append(f"Formula: {alt_text}")

	elif tag in ['L', 'LI']:
	# List/List Item
	text = element.actual_text or "[List item]"

	if detail_level == "minimal":
	return text

	if tag == 'L' and detail_level == "verbose":
	lines.append("List start")
	else:
	if reader_type == "NVDA":
	lines.append("List item")
	lines.append(text)
	else: # JAWS
	lines.append(f"Bullet: {text}")

	elif tag == 'Table':
	# Table
	if detail_level != "minimal":
	if reader_type == "NVDA":
	lines.append("Table")
	else: # JAWS
	lines.append("Table start")

	elif tag in ['TR', 'TD', 'TH']:
	# Table row/cell
	text = element.actual_text or ""
	if text and detail_level != "minimal":
	lines.append(text)

	elif tag == 'Link':
	# Link
	text = element.actual_text or "[Link]"

	if detail_level == "minimal":
	return text

	if reader_type == "NVDA":
	lines.append("Link")
	lines.append(text)
	else: # JAWS
	lines.append(f"Link: {text}")

	elif tag == 'Span':
	# Inline text
	text = element.actual_text or ""
	if text:
	return text

	elif tag in ['Document', 'Part', 'Sect', 'Div', 'Art']:
	# Container elements - usually not announced
	return None

	else:
	# Unknown tag type
	if element.actual_text:
	return element.actual_text

	if lines:
	return '\n'.join(lines)

	return None


	def format_transcript(result: Dict[str, Any]) -> str:
	"""
	Format screen reader transcript for display.

	Args:
	result: Result from simulate_screen_reader

	Returns:
	Formatted transcript string
	"""
	header = f"# {result['reader_type']} Transcript ({result['detail_level']} detail)\n\n"

	if result['mode'] == 'untagged':
	header += "⚠️ Simulated from visual order (PDF not tagged)\n\n"

	header += "---\n\n"

	return header + result['transcript']