Spaces:

sk851
/

TerraFin

Running

File size: 24,767 Bytes

import re
from pathlib import Path

import pytest

from TerraFin.data.providers.corporate.filings.sec_edgar import parser


_FIXTURES_DIR = Path(__file__).resolve().parent / "fixtures"
# Real EDGAR 8-K filing: Apple Inc., accession 0000320193-26-000011.
# Contains Item 2.02 (Results of Operations) + Item 9.01 (Exhibits) —
# the two-item shape covers both correctly-classified TopSectionTitle
# and mis-classified TitleElement cases observed with Edgar10QParser.
_SAMPLE_8K_PATH = _FIXTURES_DIR / "sample_8k_AAPL_0000320193-26-000011.html"


_FILING_HTML = """<html><body>
<h2>PART I - FINANCIAL INFORMATION</h2>
<h3>Item 1. Financial Statements</h3>
<p>Revenue grew 10% year over year.</p>
<img src="charts/revenue.jpg" alt="Revenue chart" />
<img src="data:image/png;base64,AAAAAAAA" alt="Inline logo" />
<img src="logo.png" alt="  Leading\nand trailing\twhitespace  " />
<h3>Item 2. MD&amp;A</h3>
<p>Management commentary.</p>
</body></html>"""


def test_parse_sec_filing_uses_hash_prefix_for_section_titles() -> None:
    md = parser.parse_sec_filing(_FILING_HTML, "10-Q")

    # sec_parser classifies "PART I" and "Item N" as TopSectionTitle → "## ".
    # Flat "#### " from the prior implementation should be gone.
    assert "## PART I - FINANCIAL INFORMATION" in md
    assert "## Item 1. Financial Statements" in md
    assert "#### " not in md


def test_parse_filing_maps_title_element_to_h3(monkeypatch) -> None:
    """Direct branch-coverage test for the TitleElement → '### ' path.

    Real SEC filings that use explicit `<strong>`-style sub-titles can yield
    TitleElements alongside TopSectionTitles — mock the parser to guarantee
    we see both branches.
    """
    from unittest.mock import MagicMock

    from sec_parser.semantic_elements import TitleElement, TopSectionTitle

    top = MagicMock(spec=TopSectionTitle)
    top.text = "Top"
    sub = MagicMock(spec=TitleElement)
    sub.text = "Sub"

    class _FakeParser:
        def parse(self, _html):
            return [top, sub]

    monkeypatch.setattr(parser.sp, "Edgar10QParser", _FakeParser)

    md = parser._parse_filing("<html></html>", include_images=False)

    assert "## Top" in md
    assert "### Sub" in md


def test_parse_sec_filing_omits_images_by_default() -> None:
    md = parser.parse_sec_filing(_FILING_HTML, "10-Q")
    assert "![" not in md
    assert "<inline-image" not in md


def test_parse_sec_filing_includes_images_when_requested() -> None:
    md = parser.parse_sec_filing(_FILING_HTML, "10-Q", include_images=True)

    assert "![Revenue chart](charts/revenue.jpg)" in md
    # Data URI is replaced with a placeholder — no raw base64 payload.
    assert "AAAAAAAA" not in md
    assert "![Inline logo](<inline-image:image/png>)" in md
    # Alt text whitespace is collapsed.
    assert "![Leading and trailing whitespace](logo.png)" in md


def test_image_to_md_sanitizes_long_alt() -> None:
    import sec_parser as sp
    from sec_parser.semantic_elements import ImageElement

    long_alt = "x" * 500
    html = f'<html><body><h2>S</h2><img src="a.png" alt="{long_alt}" /></body></html>'
    elements = sp.Edgar10QParser().parse(html)
    images = [e for e in elements if isinstance(e, ImageElement)]
    assert images, "sec_parser should emit an ImageElement"

    md = parser._image_to_md(images[0])
    # Truncated to _ALT_MAX with an ellipsis, leaving room for ]( syntax.
    assert md.startswith("![xxx")
    assert md.endswith("\u2026](a.png)")
    assert len(md) < 500


def test_parse_sec_filing_raises_for_unsupported_form() -> None:
    with pytest.raises(ValueError, match="not supported"):
        parser.parse_sec_filing(_FILING_HTML, "DEF 14A")


def test_parse_sec_filing_accepts_verbose_form_descriptors() -> None:
    # SEC's primaryDocDescription sometimes comes as "FORM 10-Q" or
    # "10-K (Annual Report)". Loose matching preserves the caller contract.
    md_a = parser.parse_sec_filing(_FILING_HTML, "FORM 10-Q")
    md_b = parser.parse_sec_filing(_FILING_HTML, "10-K (Annual Report)")
    md_c = parser.parse_sec_filing(_FILING_HTML, "10-Q/A")
    assert "PART I" in md_a
    assert "PART I" in md_b
    assert "PART I" in md_c


def test_parse_sec_filing_handles_none_filing_form() -> None:
    with pytest.raises(ValueError, match="not supported"):
        parser.parse_sec_filing(_FILING_HTML, None)


# ---------------------------------------------------------------------------
# 8-K parsing parity (real EDGAR fixture)
# ---------------------------------------------------------------------------

_EIGHT_K_ITEM_HEADING_RE = re.compile(r"^## Item \d+\.\d{2}\b", re.MULTILINE)


def test_parse_sec_filing_8k_returns_nonempty_markdown() -> None:
    """8-K branch should accept the form and produce markdown output."""
    html = _SAMPLE_8K_PATH.read_text()
    md = parser.parse_sec_filing(html, "8-K")
    assert isinstance(md, str)
    assert md.strip()


def test_parse_sec_filing_8k_promotes_item_code_headings_to_level_two() -> None:
    """8-K item codes (`Item N.NN`) must surface as ## headings — sec_parser
    classifies the second item as TitleElement on this AAPL fixture, so
    the heading-promotion path in `_emit_heading` is what lifts it."""
    html = _SAMPLE_8K_PATH.read_text()
    md = parser.parse_sec_filing(html, "8-K")
    matches = _EIGHT_K_ITEM_HEADING_RE.findall(md)
    assert matches, "expected at least one `## Item N.NN` heading in 8-K markdown"


def test_parse_sec_filing_8k_build_toc_emits_item_code_slugs() -> None:
    """`build_toc` is form-agnostic — it just needs ## headings. On the
    AAPL fixture both Item 2.02 and Item 9.01 should land in the TOC
    with item-code-style slugs."""
    html = _SAMPLE_8K_PATH.read_text()
    md = parser.parse_sec_filing(html, "8-K")
    toc = parser.build_toc(md)
    slugs = [e["slug"] for e in toc]
    assert any(s.startswith("item-202") for s in slugs), slugs
    assert any(s.startswith("item-901") for s in slugs), slugs


def test_parse_sec_filing_8k_accepts_verbose_form_descriptors() -> None:
    """EDGAR's primaryDocDescription sometimes comes as `FORM 8-K` or
    `8-K/A`. Loose matching preserves the caller contract."""
    html = _SAMPLE_8K_PATH.read_text()
    md_a = parser.parse_sec_filing(html, "FORM 8-K")
    md_b = parser.parse_sec_filing(html, "8-K/A")
    assert _EIGHT_K_ITEM_HEADING_RE.search(md_a)
    assert _EIGHT_K_ITEM_HEADING_RE.search(md_b)


def test_parse_sec_filing_8k_section_body_lookup_roundtrip() -> None:
    """End-to-end: parse → build TOC → slice section body by slug.
    Mirrors what `service.sec_filing_section` does — confirms the
    parser output is shaped correctly for downstream consumers."""
    html = _SAMPLE_8K_PATH.read_text()
    md = parser.parse_sec_filing(html, "8-K")
    toc = parser.build_toc(md)
    target = next(e for e in toc if e["slug"].startswith("item-202"))
    lines = md.split("\n")
    later = [e for e in toc if e["line_index"] > target["line_index"]]
    end_line = later[0]["line_index"] if later else len(lines)
    body = "\n".join(lines[target["line_index"] + 1 : end_line]).strip()
    assert body, "Item 2.02 body should not be empty"
    # AAPL's Item 2.02 mentions a press release.
    assert "press release" in body.lower()


_SAMPLE_MD = (
    "## PART I - FINANCIAL INFORMATION\n"
    "\n"
    "### Item 1. Financial Statements\n"
    "\n"
    "Some prose that mentions #tokens but is not a heading.\n"
    "\n"
    "### Item 2. MD&A\n"
    "\n"
    "## PART II - OTHER INFORMATION\n"
)


def test_build_toc_default_keeps_top_level_only_for_compact_agent_context() -> None:
    """Compact default: agents get the Part/Item scaffold, not every sub-title."""
    toc = parser.build_toc(_SAMPLE_MD)

    assert [(e["level"], e["text"]) for e in toc] == [
        (2, "PART I - FINANCIAL INFORMATION"),
        (2, "PART II - OTHER INFORMATION"),
    ]


def test_build_toc_max_level_none_returns_full_hierarchy() -> None:
    toc = parser.build_toc(_SAMPLE_MD, max_level=None)

    assert [(e["level"], e["text"]) for e in toc] == [
        (2, "PART I - FINANCIAL INFORMATION"),
        (3, "Item 1. Financial Statements"),
        (3, "Item 2. MD&A"),
        (2, "PART II - OTHER INFORMATION"),
    ]
    # Common entry shape for every item.
    for entry in toc:
        assert set(entry) == {"level", "text", "line_index", "slug", "char_count"}
    assert toc[0]["slug"] == "part-i-financial-information"


def test_build_toc_char_count_aggregates_filtered_subsections() -> None:
    """When subsections are filtered out, the parent section's char_count expands
    to cover every filtered-out heading line plus its body."""
    toc_full = parser.build_toc(_SAMPLE_MD, max_level=None)
    toc_compact = parser.build_toc(_SAMPLE_MD, max_level=2)

    # PART I (compact) must span strictly more chars than PART I (full): full
    # stops at the next ### heading; compact stops at the next ## heading.
    assert toc_compact[0]["text"] == "PART I - FINANCIAL INFORMATION"
    assert toc_compact[0]["char_count"] > toc_full[0]["char_count"]

    # Every body span should be non-negative and no larger than the document itself.
    total_chars = sum(len(line) + 1 for line in _SAMPLE_MD.splitlines())
    for entry in toc_compact + toc_full:
        assert 0 <= entry["char_count"] <= total_chars


def test_build_toc_on_empty_or_heading_less_input() -> None:
    assert parser.build_toc("") == []
    assert parser.build_toc("No headings here.\nJust prose.") == []


def test_heal_broken_titles_splices_mid_word_split() -> None:
    """sec_parser occasionally splits a heading mid-word (e.g. ZETA's 10-K
    gives `TopSectionTitle("Item 1. Bus")` + `TitleElement("iness.")`).
    The post-parse healer must merge them back."""
    raw = (
        "## Item 1. Bus\n"
        "\n"
        "### iness.\n"
        "\n"
        "### Overview\n"
        "\n"
        "Body prose.\n"
    )
    healed = parser._heal_broken_titles(raw)

    assert "## Item 1. Business." in healed
    assert "### iness." not in healed
    # Overview and body prose should survive unchanged.
    assert "### Overview" in healed
    assert "Body prose." in healed


def test_heal_broken_titles_handles_multi_fragment_chain() -> None:
    """Handles chains where the word is split across 3+ fragments."""
    raw = "## Item 1. Bus\n\n### iness\n\n### .\n"
    healed = parser._heal_broken_titles(raw)
    assert "## Item 1. Business." in healed


def test_heal_broken_titles_leaves_legit_subheadings_alone() -> None:
    """A genuine lowercase subheading after a complete parent title must NOT
    be merged. Signal: parent ends with a period or other terminator."""
    raw = "## Item 2. Properties.\n\n### overview\n\nBody.\n"
    healed = parser._heal_broken_titles(raw)
    assert "## Item 2. Properties." in healed
    assert "### overview" in healed


def test_heal_broken_titles_does_not_merge_all_caps_parents() -> None:
    """All-caps section titles followed by a legitimate lowercase subheading
    are a real 10-K pattern (e.g. `RISKS` + `### related to operations`).
    The old regex merged them into `RISKSrelated`; the tightened regex
    requires the parent-line tail to be title-case (one upper + two lower)."""
    raw = "## RISKS\n\n### related to operations\n\nBody.\n"
    healed = parser._heal_broken_titles(raw)
    assert "## RISKS" in healed
    assert "RISKSrelated" not in healed
    assert "### related to operations" in healed


def test_heal_broken_titles_does_not_merge_possessive_parent() -> None:
    """Possessive nouns like `Company's` look mid-wordish to a naive regex
    because they end in a letter, but the following lowercase-led line is
    always a legitimate sub-heading."""
    raw = "## Item 1. Company's\n\n### own operations\n\nBody.\n"
    healed = parser._heal_broken_titles(raw)
    assert "## Item 1. Company's" in healed
    assert "Company'sown" not in healed
    assert "### own operations" in healed


def test_heal_broken_titles_does_not_merge_complete_short_parent() -> None:
    """A four-character complete word like `Note` must not glue onto a
    following lowercase sub-heading."""
    raw = "## Note\n\n### overview of disclosures\n\nBody.\n"
    healed = parser._heal_broken_titles(raw)
    assert "## Note" in healed
    assert "Noteoverview" not in healed
    assert "### overview of disclosures" in healed


def test_build_toc_ignores_inline_hashes() -> None:
    md = "This sentence has a ## middle-of-line token.\n## Real Heading"
    toc = parser.build_toc(md)
    assert len(toc) == 1
    assert toc[0]["text"] == "Real Heading"
    assert toc[0]["line_index"] == 1


def test_looks_like_section_heading_matches_part_and_item_patterns() -> None:
    """Core regex that decides whether a sec_parser text blob should be
    promoted to a ## heading. Matches the canonical Part/Item prefixes
    (case-insensitive, with or without period/dash).

    No length cap: a genuine Item 7 MD&A heading can be 100+ chars, and
    even when sec_parser fuses the heading with its paragraph into one
    long blob we still want promotion — `_split_heading_and_body` in
    `_emit_heading` splits at the first newline so the heading line
    lands cleanly in the TOC."""
    assert parser._looks_like_section_heading("Item 7. Management's Discussion") is True
    assert parser._looks_like_section_heading("ITEM 7A. Quantitative and Qualitative") is True
    assert parser._looks_like_section_heading("Part I") is True
    assert parser._looks_like_section_heading("PART II") is True
    assert parser._looks_like_section_heading("Item 8") is True  # no period
    # Run-on (heading fused with body) still matches — caller splits.
    run_on = "Item 7. MD&A\nOverview\nNet income was ..."
    assert parser._looks_like_section_heading(run_on) is True
    # Rejects non-heading text.
    assert parser._looks_like_section_heading("The Company sells products") is False
    assert parser._looks_like_section_heading("") is False


def test_slugify_caps_output_at_80_chars() -> None:
    """Without this cap, a sec_parser-misclassified 400-char boilerplate
    paragraph becomes a 400-char slug that floods the TOC. Cap at 80 chars
    at a word boundary."""
    long_title = (
        "Indicates a management contract or compensatory plan. The certifications "
        "attached as Exhibit 32.1 and Exhibit 32.2 that accompany this Annual Report"
    )
    slug = parser._slugify(long_title)
    assert len(slug) <= 80
    # Must end at a word boundary (no trailing hyphen / partial word).
    assert not slug.endswith("-")
    # Stays meaningful-looking.
    assert slug.startswith("indicates-a-management-contract")


def test_slugify_preserves_short_slugs_unchanged() -> None:
    assert parser._slugify("Item 7. MD&A") == "item-7-mda"
    assert parser._slugify("Part II") == "part-ii"


def test_emit_heading_splits_run_on_item_paragraph_into_heading_plus_body() -> None:
    """sec_parser's Edgar10QParser routinely fuses a 10-K Item heading
    with the paragraph that follows it into a single TextElement. Before
    this split, Item 7 / Item 8 never appeared in the TOC for 10-Ks
    because the fused text was too long to recognize as a heading —
    ZETA's 10-K reproduced this.

    The fix: when a text blob starts with an `Item N.` / `Part I` marker
    and contains a newline, treat the first line as the heading and
    emit the rest as body. The length of the heading line itself no
    longer matters; a 91-char `Item 7. Management's Discussion and
    Analysis of Financial Condition and Results of Operations` is a
    legitimate heading, not boilerplate."""
    run_on = (
        "Item 7. Management's Discussion and Analysis of Financial Condition "
        "and Results of Operations\n"
        "Overview\n"
        "Net income was $X million for the year."
    )
    rendered = parser._emit_heading(run_on, default_level=3)

    # Heading promoted to ##, body preserved as following lines.
    assert rendered.startswith("## Item 7. Management's Discussion")
    assert "Overview" in rendered
    assert "Net income was $X million" in rendered


def test_build_toc_keeps_long_real_headings() -> None:
    """No length filter in `build_toc` — a genuine `Item 5.` heading is
    often 100+ chars in a 10-K (`Item 5. Market for Registrant's Common
    Equity, Related Stockholder Matters and Issuer Purchases of Equity
    Securities`) and must still appear in the TOC."""
    long_real = (
        "Item 5. Market for Registrant's Common Equity, Related Stockholder "
        "Matters and Issuer Purchases of Equity Securities"
    )
    md = f"## Real Part II\n### {long_real}\nBody.\n### Item 7. MD&A\nMD&A body.\n"
    toc = parser.build_toc(md, max_level=3)
    texts = [entry["text"] for entry in toc]
    assert long_real in texts
    assert "Item 7. MD&A" in texts


def test_emit_heading_splits_multi_item_blob_so_every_item_lands_in_toc() -> None:
    """sec_parser routinely fuses several 10-K Item headings and their
    bodies into one big TextElement. The ZETA 10-K failure mode: Item 7
    MD&A, Item 7A Market Risk, and Item 8 Financial Statements all live
    inside a single blob, so only the first one used to get promoted.
    After the multi-chunk split they all surface in the TOC."""
    blob = (
        "Item 7. Management's Discussion and Analysis of Financial Condition\n"
        "Overview of results.\n"
        "Revenue was $500M.\n"
        "Item 7A. Quantitative and Qualitative Disclosures About Market Risk\n"
        "Interest rate sensitivity analysis.\n"
        "Item 8. Financial Statements and Supplementary Data\n"
        "Net income was $50M for the year ended 2025.\n"
    )
    rendered = parser._emit_heading(blob, default_level=3)

    # All three Item headings promoted to ##.
    assert "## Item 7. Management's Discussion" in rendered
    assert "## Item 7A. Quantitative and Qualitative" in rendered
    assert "## Item 8. Financial Statements" in rendered
    # And their bodies are still present under each heading.
    assert "Revenue was $500M" in rendered
    assert "Interest rate sensitivity" in rendered
    assert "Net income was $50M" in rendered


def test_emit_heading_promotes_embedded_item_after_preamble() -> None:
    """If a text blob starts with non-heading prose (like a 'Table of
    Contents' breadcrumb) and an Item heading is embedded further down,
    the embedded heading must still promote — earlier regex anchored
    at start-of-string and would miss this case, leaving the whole
    blob as unclassified body prose."""
    preamble_blob = (
        "Table of Contents\n"
        "Item 7. Management's Discussion\n"
        "Overview text."
    )
    rendered = parser._emit_heading(preamble_blob, default_level=3)

    # The Item 7 heading is promoted despite the preamble line.
    assert "## Item 7. Management's Discussion" in rendered
    # Preamble stays, emitted above the heading.
    assert "Table of Contents" in rendered
    # Body stays under the heading.
    assert "Overview text" in rendered


def test_build_toc_dedupes_colliding_slugs_with_numeric_suffix() -> None:
    """Two sections whose titles slugify to the same string would
    otherwise alias to a single TOC entry — first-match-wins in
    `sec_filing_section` makes the second section unreachable.
    Deduplicate with `-2`, `-3`, … suffixes so every entry has a
    unique slug."""
    # Same heading text appearing twice is the simplest collision case;
    # it happens in real 10-Ks where, e.g. Part I and Part IV both have
    # an "Exhibits" subsection.
    md = "## Part I\n### Exhibits\nA.\n## Part IV\n### Exhibits\nB.\n"
    toc = parser.build_toc(md, max_level=3)
    slugs = [entry["slug"] for entry in toc]
    # All slugs distinct.
    assert len(set(slugs)) == len(slugs)
    # One `exhibits`, one `exhibits-2`.
    assert "exhibits" in slugs
    assert "exhibits-2" in slugs


def test_build_toc_dedup_respects_existing_numeric_suffix_slug() -> None:
    """If the source already has a heading whose own slug is
    `exhibits-2` (e.g. literally `## Exhibits 2`) AND two `Exhibits`
    headings collide, the collision resolver must increment past the
    already-used `-2` rather than re-emitting it. A plain per-base-slug
    counter would produce a duplicate; the used-slug set approach
    avoids that."""
    md = "## Exhibits\nA.\n## Exhibits 2\nB.\n## Exhibits\nC.\n## Exhibits\nD.\n"
    toc = parser.build_toc(md, max_level=3)
    slugs = [entry["slug"] for entry in toc]
    # Every slug must be unique.
    assert len(set(slugs)) == len(slugs)
    # Sensible ordering: first `Exhibits` wins the bare slug, then the
    # legitimately-named `Exhibits 2` keeps its own slug, and the later
    # duplicates skip past the collision.
    assert slugs[0] == "exhibits"
    assert slugs[1] == "exhibits-2"
    # Subsequent collisions use suffixes that don't collide with slug[1].
    assert slugs[2] != slugs[1]
    assert slugs[3] != slugs[1] and slugs[3] != slugs[2]


def test_build_toc_dedupes_colliding_truncated_slugs() -> None:
    """Collision case that specifically exercises the 80-char cap: two
    long titles that share their first 80 chars but diverge after that.
    Before dedup, both truncate to the same slug."""
    # Identical prefix (first 80 chars of slug identical), different tails.
    prefix = "Item 1. Business overview covering segments products go to market strategy and"
    title_a = prefix + " North America revenue"
    title_b = prefix + " Europe operations headcount"
    md = f"## Part I\n### {title_a}\nA.\n### {title_b}\nB.\n"
    toc = parser.build_toc(md, max_level=3)
    slugs = [entry["slug"] for entry in toc]
    assert len(set(slugs)) == len(slugs)  # no collision survives


def test_build_toc_strips_trailing_carriage_return_from_heading_text() -> None:
    """EDGAR HTML often has CRLF line endings. `splitlines()` preserves
    the `\\r` inside the captured group, which would leak into the slug
    as a `-r` suffix and break lookup."""
    md = "## Part I\r\n### Item 1. Business\r\nBody.\r\n"
    toc = parser.build_toc(md, max_level=3)
    slugs = [entry["slug"] for entry in toc]
    assert "item-1-business" in slugs
    # None of the slugs end with a garbage `-r`.
    assert not any(s.endswith("-r") for s in slugs)


def test_promote_item_heading_regex_does_not_overmatch_body_text() -> None:
    """The promote/merge regexes used to accept `\\w+` after `ITEM`,
    which matched incidental body text like `### item foo` (a real H3
    sub-section). Tightening to `\\d+[A-Z]?` prevents spurious
    level-2 promotions.

    The critical property here is that an `### item foo bar` heading
    (non-numeric Item-like word) stays at level 3 in the healed
    markdown — `build_toc(max_level=2)` will correctly ignore it."""
    md = "## Part II\n### item foo bar\nRegular subsection.\n"
    healed = parser._heal_broken_titles(md)
    # It's still a level-3 heading; `build_toc(max_level=2)` excludes it
    # instead of surfacing a bogus level-2 entry.
    toc = parser.build_toc(healed, max_level=2)
    slugs = [e["slug"] for e in toc]
    assert "item-foo-bar" not in slugs
    assert "part-ii" in slugs


def test_parse_sec_filing_8k_item_801_other_events_lands_in_toc() -> None:
    """Item 8.01 "Other Events" is a common 8-K item code (used for
    non-2.02/non-5.02 announcements: dividends, buyback authorizations,
    SEC settlements, etc.). The 8-K item-code regex (`\\d+\\.\\d{2}`)
    must match it the same as the more common Item 2.02 / Item 9.01.
    Synthesize a minimal 8-K body with just an Item 8.01 heading and
    assert it surfaces both as a `## Item 8.01` heading and as a TOC slug.
    """
    html = (
        "<html><body>"
        "<p>Item 8.01 Other Events</p>"
        "<p>On May 21, 2026 the Company announced a $1B share repurchase "
        "authorization replacing the prior program.</p>"
        "</body></html>"
    )
    md = parser.parse_sec_filing(html, "8-K")
    assert "## Item 8.01" in md, md
    toc = parser.build_toc(md)
    slugs = [e["slug"] for e in toc]
    assert any(s.startswith("item-801") for s in slugs), slugs


def test_table_to_md_returns_sec_parser_markdown_verbatim() -> None:
    """We intentionally don't post-process sec_parser's table markdown so the
    agent sees the same rows and columns the user renders. Strip/normalize
    would risk silent data loss on sparse tables, and user↔agent data
    surface must stay identical."""
    class _FakeTable:
        def table_to_markdown(self) -> str:
            return "| Header | Other |\n| cell | value |"

    result = parser._table_to_md(_FakeTable())

    assert result == "| Header | Other |\n| cell | value |"