Spaces:

Legal-i
/

orgstate

Running

File size: 7,768 Bytes

d2d1903

"""
Tests for Stage 47 — GET /runs/{rid}/issues.csv.

CSV download for non-technical consumers — finance/ops teams who
live in Excel. Two layers:

  - handlers.issues_csv — pure CSV generation with stdlib csv.writer
    so RFC 4180 escaping (embedded quotes, commas, newlines in
    titles) is handled correctly.
  - HTTP route — same auth as the JSON variant; correct content-type;
    Content-Disposition with a sensible filename.
"""
import csv
import io
import json

import pytest

from infra import OrgStateService
from infra.api import handlers


def _insert_run_with_issues(svc, tenant_id, run_id, issues):
    """Insert a run + a list of issues directly. ``issues`` is a list
    of dicts with at least entity_id/severity/score/title."""
    svc.db.execute(
        """INSERT INTO runs
               (run_id, tenant_id, entity_type, vertical, status,
                n_states, n_issues, n_decisions, summary_json,
                started_at, finished_at)
           VALUES (?, ?, 'widget', 'logistics', 'completed',
                   0, ?, 0, '{}',
                   '2026-05-16T08:00:00+00:00',
                   '2026-05-16T08:00:00+00:00')""",
        (run_id, tenant_id, len(issues)),
    )
    for i, issue in enumerate(issues):
        full = {
            "issue_id": issue.get("issue_id", f"i_{i}"),
            "entity_id": issue["entity_id"],
            "entity_type": issue.get("entity_type", "widget"),
            "severity": issue["severity"],
            "score": issue["score"],
            "title": issue.get("title", "default title"),
            "detected_at": issue.get("detected_at", "2026-05-16"),
        }
        svc.db.execute(
            """INSERT INTO run_issues
                   (run_id, issue_id, tenant_id, entity_id, entity_type,
                    severity, score, detected_at, issue_json)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (run_id, full["issue_id"], tenant_id, full["entity_id"],
             full["entity_type"], full["severity"], full["score"],
             full["detected_at"], json.dumps(full)),
        )


# --- pure handler -----------------------------------------------------

@pytest.fixture
def svc():
    s = OrgStateService(":memory:")
    yield s
    s.close()


def test_handler_emits_header_row(svc):
    svc.register_tenant("acme", "ACME")
    _insert_run_with_issues(svc, "acme", "r1", [])
    text = handlers.issues_csv(svc, "r1")
    rows = list(csv.reader(io.StringIO(text)))
    assert rows[0] == ["issue_id", "entity_id", "entity_type",
                        "severity", "score", "title", "detected_at"]


def test_handler_emits_body_rows_in_score_order(svc):
    """get_run_issues already sorts by score desc; the CSV preserves
    that — worst row at the top of the sheet."""
    svc.register_tenant("acme", "ACME")
    _insert_run_with_issues(svc, "acme", "r1", [
        {"entity_id": "low", "severity": "low", "score": 0.1,
         "title": "minor"},
        {"entity_id": "crit", "severity": "critical", "score": 0.9,
         "title": "very bad"},
        {"entity_id": "med", "severity": "medium", "score": 0.5,
         "title": "ok"},
    ])
    rows = list(csv.reader(io.StringIO(handlers.issues_csv(svc, "r1"))))
    # row 0 is header; rows 1..N are body in score-desc order
    body = rows[1:]
    assert [r[1] for r in body] == ["crit", "med", "low"]


def test_handler_escapes_funky_titles(svc):
    """RFC 4180: embedded commas, quotes, and newlines must round-trip
    through csv.reader without breaking the row structure."""
    svc.register_tenant("acme", "ACME")
    _insert_run_with_issues(svc, "acme", "r1", [
        {"entity_id": "a", "severity": "high", "score": 0.7,
         "title": 'title with, comma and "quote" and\nnewline'},
    ])
    text = handlers.issues_csv(svc, "r1")
    rows = list(csv.reader(io.StringIO(text)))
    assert len(rows) == 2     # header + one body row
    assert rows[1][5] == 'title with, comma and "quote" and\nnewline'


def test_handler_unknown_run_raises_404(svc):
    from infra.api.errors import ApiError
    with pytest.raises(ApiError):
        handlers.issues_csv(svc, "r_ghost")


def test_handler_empty_run_returns_only_header(svc):
    svc.register_tenant("acme", "ACME")
    _insert_run_with_issues(svc, "acme", "r1", [])
    rows = list(csv.reader(io.StringIO(handlers.issues_csv(svc, "r1"))))
    assert len(rows) == 1     # only the header


# --- HTTP route smoke -------------------------------------------------

def _client(dbfile=":memory:"):
    pytest.importorskip("fastapi")
    pytest.importorskip("httpx")
    from fastapi.testclient import TestClient

    from infra.api import create_app
    return TestClient(create_app(dbfile))


def _bootstrap(tmp_path):
    dbfile = str(tmp_path / "csv.sqlite3")
    svc = OrgStateService(dbfile)
    try:
        svc.register_tenant("acme", "ACME")
        svc.register_tenant("globex", "Globex")
        _insert_run_with_issues(svc, "acme", "r_acme", [
            {"entity_id": "wh_north", "severity": "critical",
             "score": 0.91, "title": "drift severe"},
        ])
        _insert_run_with_issues(svc, "globex", "r_globex", [])
        keys = {
            "acme_ro": svc.create_api_key("acme", role="readonly").raw,
            "globex_ro": svc.create_api_key("globex", role="readonly").raw,
        }
    finally:
        svc.close()
    return dbfile, keys


def test_csv_route_returns_text_csv_content_type(tmp_path):
    dbfile, keys = _bootstrap(tmp_path)
    client = _client(dbfile)
    r = client.get("/runs/r_acme/issues.csv",
                    headers={"Authorization": f"Bearer {keys['acme_ro']}"})
    assert r.status_code == 200
    # text/csv is the canonical type; charset utf-8 explicit
    assert r.headers["content-type"].startswith("text/csv")
    assert "utf-8" in r.headers["content-type"]


def test_csv_route_includes_content_disposition_attachment(tmp_path):
    """Browsers download instead of inline-display when the header
    includes 'attachment; filename=...'. Excel + Sheets respect this."""
    dbfile, keys = _bootstrap(tmp_path)
    client = _client(dbfile)
    r = client.get("/runs/r_acme/issues.csv",
                    headers={"Authorization": f"Bearer {keys['acme_ro']}"})
    cd = r.headers.get("content-disposition", "")
    assert "attachment" in cd
    assert "r_acme" in cd  # filename carries the run id


def test_csv_route_body_parses_as_csv(tmp_path):
    dbfile, keys = _bootstrap(tmp_path)
    client = _client(dbfile)
    r = client.get("/runs/r_acme/issues.csv",
                    headers={"Authorization": f"Bearer {keys['acme_ro']}"})
    rows = list(csv.reader(io.StringIO(r.text)))
    assert rows[0][0] == "issue_id"
    assert rows[1][1] == "wh_north"
    assert rows[1][5] == "drift severe"


def test_csv_route_no_key_returns_401(tmp_path):
    dbfile, _ = _bootstrap(tmp_path)
    client = _client(dbfile)
    r = client.get("/runs/r_acme/issues.csv")
    assert r.status_code == 401


def test_csv_route_cross_tenant_returns_404(tmp_path):
    """Cross-tenant access goes through require_run_access, which
    intentionally returns 404 (not 403) for runs on another tenant —
    so existence isn't disclosed via the distinction."""
    dbfile, keys = _bootstrap(tmp_path)
    client = _client(dbfile)
    r = client.get("/runs/r_globex/issues.csv",
                    headers={"Authorization": f"Bearer {keys['acme_ro']}"})
    assert r.status_code == 404


def test_csv_route_unknown_run_returns_404(tmp_path):
    dbfile, keys = _bootstrap(tmp_path)
    client = _client(dbfile)
    r = client.get("/runs/r_ghost/issues.csv",
                    headers={"Authorization": f"Bearer {keys['acme_ro']}"})
    assert r.status_code == 404