File size: 2,308 Bytes
5c6e6ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""CLI entrypoint smoke tests (fast). Optional integration tests are marked."""

from __future__ import annotations

import os
import subprocess
import sys
from pathlib import Path

import pytest

REPO_ROOT = Path(__file__).resolve().parents[1]


def test_cli_redact_help_exits_zero():
    """Ensure the installed entrypoint responds to --help."""
    result = subprocess.run(
        [sys.executable, str(REPO_ROOT / "cli_redact.py"), "--help"],
        cwd=str(REPO_ROOT),
        capture_output=True,
        text=True,
        timeout=60,
    )
    assert result.returncode == 0, result.stderr
    assert "usage" in (result.stdout + result.stderr).lower()


def test_cli_redact_module_help_exits_zero():
    """Same as --help via python -m (packaging smoke)."""
    result = subprocess.run(
        [sys.executable, "-m", "cli_redact", "--help"],
        cwd=str(REPO_ROOT),
        capture_output=True,
        text=True,
        timeout=60,
    )
    assert result.returncode == 0, result.stderr


@pytest.mark.integration
def test_cli_redact_smoke_pdf_local_text_optional(tmp_path):
    """
    End-to-end redact on a small example PDF (Local text path), only when enabled.

    Set PYTEST_CLI_INTEGRATION=1 and ensure the repo is installed with dependencies.
    Skips by default to keep CI fast unless the env var is set.
    """
    if os.environ.get("PYTEST_CLI_INTEGRATION") != "1":
        pytest.skip("Set PYTEST_CLI_INTEGRATION=1 to run CLI PDF smoke test")

    pdf = REPO_ROOT / "example_data" / "graduate-job-example-cover-letter.pdf"
    if not pdf.is_file():
        pytest.skip(f"Example PDF not found: {pdf}")

    out_dir = tmp_path / "out"
    out_dir.mkdir()
    result = subprocess.run(
        [
            sys.executable,
            str(REPO_ROOT / "cli_redact.py"),
            "--task",
            "redact",
            "--input_file",
            str(pdf),
            "--output_dir",
            str(out_dir),
            "--input_dir",
            str(REPO_ROOT / "example_data"),
            "--ocr_method",
            "Local text",
            "--pii_detector",
            "Local",
        ],
        cwd=str(REPO_ROOT),
        capture_output=True,
        text=True,
        timeout=600,
    )
    assert result.returncode == 0, result.stdout + "\n" + result.stderr