Spaces:

rianders
/

pdfinspector

Sleeping

File size: 11,448 Bytes

"""
Screen Reader Simulator Module

Simulates how NVDA and JAWS would read a PDF page, supporting both
tagged (structure tree) and untagged (visual order fallback) PDFs.
"""

from typing import Dict, List, Any, Optional, Tuple
import pikepdf
from structure_tree import extract_structure_tree, StructureNode


def simulate_screen_reader(
    pdf_path: str,
    page_index: int,
    blocks: List[Any],
    reader_type: str = "NVDA",
    detail_level: str = "default",
    order_mode: str = "tblr"
) -> Dict[str, Any]:
    """
    Simulate screen reader output for a PDF page.

    Args:
        pdf_path: Path to PDF file
        page_index: 0-based page index
        blocks: List of BlockInfo objects from extract_blocks_spans
        reader_type: "NVDA" or "JAWS"
        detail_level: "minimal", "default", or "verbose"
        order_mode: Reading order mode for untagged fallback ("raw", "tblr", "columns")

    Returns:
        Dictionary with transcript, analysis, and metadata
    """
    # Try tagged approach first
    root = extract_structure_tree(pdf_path)

    if root:
        # Use structure tree
        transcript, analysis = _simulate_tagged(
            root, page_index, reader_type, detail_level
        )
        mode = "tagged"
    else:
        # Fallback to visual order
        transcript, analysis = _simulate_untagged(
            blocks, reader_type, detail_level, order_mode
        )
        mode = "untagged"

    return {
        'transcript': transcript,
        'analysis': analysis,
        'mode': mode,
        'reader_type': reader_type,
        'detail_level': detail_level
    }


def _simulate_tagged(
    root: StructureNode,
    page_index: int,
    reader_type: str,
    detail_level: str
) -> Tuple[str, str]:
    """
    Simulate screen reader for tagged PDF using structure tree.

    Args:
        root: Root StructureNode
        page_index: Page to simulate (0-based)
        reader_type: "NVDA" or "JAWS"
        detail_level: Detail level

    Returns:
        Tuple of (transcript, analysis)
    """
    # Collect structure elements for this page
    page_elements = []

    def _collect_page_elements(node: StructureNode):
        # Include node if it's for this page or has no page ref (document-level)
        if node.page_ref is None or node.page_ref == page_index:
            if node.tag_type not in ['StructTreeRoot', 'MCID']:
                page_elements.append(node)

        for child in node.children:
            _collect_page_elements(child)

    _collect_page_elements(root)

    # Generate transcript
    transcript_lines = []
    element_count = 0

    for element in page_elements:
        announcement = _format_element_announcement(
            element, reader_type, detail_level
        )
        if announcement:
            transcript_lines.append(announcement)
            element_count += 1

    transcript = '\n\n'.join(transcript_lines)

    # Generate analysis
    analysis_lines = [
        "## Screen Reader Analysis (Tagged Mode)",
        "",
        f"**Structure**: This page uses PDF tagging (accessible structure tree)",
        f"**Elements Found**: {element_count}",
        ""
    ]

    # Count element types
    tag_counts = {}
    for element in page_elements:
        tag_counts[element.tag_type] = tag_counts.get(element.tag_type, 0) + 1

    if tag_counts:
        analysis_lines.extend([
            "### Element Types",
            ""
        ])
        for tag, count in sorted(tag_counts.items()):
            analysis_lines.append(f"- **{tag}**: {count}")

    # Check for alt text coverage
    elements_needing_alt = [e for e in page_elements if e.tag_type in ['Figure', 'Formula', 'Artifact']]
    elements_with_alt = [e for e in elements_needing_alt if e.alt_text]

    if elements_needing_alt:
        coverage = len(elements_with_alt) / len(elements_needing_alt) * 100
        analysis_lines.extend([
            "",
            "### Alt Text Coverage",
            "",
            f"**Elements needing alt text**: {len(elements_needing_alt)}",
            f"**Elements with alt text**: {len(elements_with_alt)}",
            f"**Coverage**: {coverage:.1f}%",
            ""
        ])

        if coverage < 100:
            analysis_lines.append("⚠️ Some elements are missing alt text")

    analysis = '\n'.join(analysis_lines)

    return transcript, analysis


def _simulate_untagged(
    blocks: List[Any],
    reader_type: str,
    detail_level: str,
    order_mode: str
) -> Tuple[str, str]:
    """
    Simulate screen reader for untagged PDF using visual order.

    Args:
        blocks: List of BlockInfo objects
        reader_type: "NVDA" or "JAWS"
        detail_level: Detail level
        order_mode: Reading order mode

    Returns:
        Tuple of (transcript, analysis)
    """
    from layout_utils import order_blocks  # Import the ordering function

    # Order blocks according to mode
    ordered_blocks = order_blocks(blocks, order_mode)

    # Generate transcript
    transcript_lines = []
    text_block_count = 0
    image_block_count = 0

    for idx, block in ordered_blocks:
        if block.block_type == 0:  # Text block
            # Infer heading from font size
            is_heading = False
            heading_level = None

            if block.spans:
                avg_size = sum(s.size for s in block.spans) / len(block.spans)
                if avg_size > 18:
                    is_heading = True
                    heading_level = 1
                elif avg_size > 14:
                    is_heading = True
                    heading_level = 2

            # Format announcement
            if is_heading and detail_level != "minimal":
                if reader_type == "NVDA":
                    transcript_lines.append(f"Heading level {heading_level}")
                    transcript_lines.append(block.text.strip())
                else:  # JAWS
                    transcript_lines.append(f"Heading {heading_level}: {block.text.strip()}")
            else:
                transcript_lines.append(block.text.strip())

            text_block_count += 1

        elif block.block_type == 1:  # Image block
            if detail_level != "minimal":
                transcript_lines.append("[Image - no alt text available]")
            image_block_count += 1

    transcript = '\n\n'.join(transcript_lines)

    # Generate analysis
    analysis_lines = [
        "## Screen Reader Analysis (Untagged Mode)",
        "",
        "⚠️ **No Structure**: This page does not use PDF tagging",
        "",
        "Screen readers will read text in visual order with limited context.",
        "",
        f"**Reading Order Mode**: {order_mode}",
        f"**Text Blocks**: {text_block_count}",
        f"**Images**: {image_block_count}",
        "",
        "### Limitations",
        "",
        "- No semantic information (headings, lists, tables)",
        "- No alt text for images",
        "- Reading order may not match intended flow",
        "- Navigation by elements not possible",
        "",
        "**Recommendation**: Add PDF tagging for better accessibility"
    ]

    analysis = '\n'.join(analysis_lines)

    return transcript, analysis


def _format_element_announcement(
    element: StructureNode,
    reader_type: str,
    detail_level: str
) -> Optional[str]:
    """
    Format a structure element as a screen reader announcement.

    Args:
        element: StructureNode to announce
        reader_type: "NVDA" or "JAWS"
        detail_level: "minimal", "default", or "verbose"

    Returns:
        Formatted announcement string or None
    """
    tag = element.tag_type
    lines = []

    # Map PDF tag types to screen reader announcements
    if tag.startswith('H'):
        # Heading
        level = tag[1:] if len(tag) > 1 else '1'
        text = element.actual_text or "[Heading]"

        if detail_level == "minimal":
            return text

        if reader_type == "NVDA":
            lines.append(f"Heading level {level}")
            lines.append(text)
        else:  # JAWS
            lines.append(f"Heading {level}: {text}")

    elif tag == 'P':
        # Paragraph
        text = element.actual_text or "[Paragraph]"

        if detail_level == "minimal":
            return text

        if detail_level == "verbose":
            if reader_type == "NVDA":
                lines.append("Paragraph")
            lines.append(text)
            if reader_type == "NVDA" and detail_level == "verbose":
                lines.append("Out of paragraph")
        else:
            lines.append(text)

    elif tag == 'Figure':
        # Figure/Image
        alt_text = element.alt_text or "[Image - no alt text]"

        if detail_level == "minimal":
            return None

        if reader_type == "NVDA":
            lines.append("Graphic")
            lines.append(alt_text)
        else:  # JAWS
            lines.append(f"Graphic: {alt_text}")

    elif tag == 'Formula':
        # Math formula
        alt_text = element.alt_text or element.actual_text or "[Formula]"

        if detail_level == "minimal":
            return alt_text

        if reader_type == "NVDA":
            lines.append("Formula")
            lines.append(alt_text)
        else:  # JAWS
            lines.append(f"Formula: {alt_text}")

    elif tag in ['L', 'LI']:
        # List/List Item
        text = element.actual_text or "[List item]"

        if detail_level == "minimal":
            return text

        if tag == 'L' and detail_level == "verbose":
            lines.append("List start")
        else:
            if reader_type == "NVDA":
                lines.append("List item")
                lines.append(text)
            else:  # JAWS
                lines.append(f"Bullet: {text}")

    elif tag == 'Table':
        # Table
        if detail_level != "minimal":
            if reader_type == "NVDA":
                lines.append("Table")
            else:  # JAWS
                lines.append("Table start")

    elif tag in ['TR', 'TD', 'TH']:
        # Table row/cell
        text = element.actual_text or ""
        if text and detail_level != "minimal":
            lines.append(text)

    elif tag == 'Link':
        # Link
        text = element.actual_text or "[Link]"

        if detail_level == "minimal":
            return text

        if reader_type == "NVDA":
            lines.append("Link")
            lines.append(text)
        else:  # JAWS
            lines.append(f"Link: {text}")

    elif tag == 'Span':
        # Inline text
        text = element.actual_text or ""
        if text:
            return text

    elif tag in ['Document', 'Part', 'Sect', 'Div', 'Art']:
        # Container elements - usually not announced
        return None

    else:
        # Unknown tag type
        if element.actual_text:
            return element.actual_text

    if lines:
        return '\n'.join(lines)

    return None


def format_transcript(result: Dict[str, Any]) -> str:
    """
    Format screen reader transcript for display.

    Args:
        result: Result from simulate_screen_reader

    Returns:
        Formatted transcript string
    """
    header = f"# {result['reader_type']} Transcript ({result['detail_level']} detail)\n\n"

    if result['mode'] == 'untagged':
        header += "⚠️ Simulated from visual order (PDF not tagged)\n\n"

    header += "---\n\n"

    return header + result['transcript']