File size: 5,322 Bytes
a2e06b3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | """Tests for doc_redaction.cli_api output path discovery."""
from __future__ import annotations
import sys
import time
from pathlib import Path
import pytest
pytest.importorskip("pikepdf")
REPO_ROOT = Path(__file__).resolve().parent.parent
_PI_SRC = REPO_ROOT / "agent-redact" / "pi"
if str(_PI_SRC) not in sys.path:
sys.path.insert(0, str(_PI_SRC))
from remote_redaction import ( # noqa: E402
discover_redaction_outputs,
resolve_redaction_output_paths,
)
from doc_redaction.cli_api import ( # noqa: E402
_run_cli,
_snapshot_files_newer_than,
)
def test_snapshot_files_newer_than_includes_overwritten_files(tmp_path: Path) -> None:
existing = tmp_path / "doc_review_file.csv"
existing.write_text("old", encoding="utf-8")
time.sleep(0.02)
started = time.time()
time.sleep(0.02)
existing.write_text("new", encoding="utf-8")
found = _snapshot_files_newer_than(str(tmp_path), started)
assert str(existing.resolve()) in found
def test_run_cli_pins_session_hash_for_stateless_doc_redact(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""SESSION_OUTPUT_FOLDER + empty username must not generate two session hashes."""
out_base = tmp_path / "output"
out_base.mkdir()
hashes = ["abcd1234", "wxyz9876"]
monkeypatch.setattr(
"cli_redact._generate_session_hash",
lambda: hashes.pop(0),
)
from cli_redact import get_username_and_folders
written_dirs: list[str] = []
cli_usernames: list[str] = []
def _fake_cli_main(direct_mode_args: dict | None = None) -> None:
assert direct_mode_args is not None
cli_usernames.append(str(direct_mode_args.get("username") or ""))
_, effective_out, _, _, _, _, _, _ = get_username_and_folders(
username=cli_usernames[-1],
output_folder_textbox=str(direct_mode_args["output_dir"]),
input_folder_textbox=str(direct_mode_args.get("input_dir") or ""),
session_output_folder=bool(direct_mode_args.get("save_to_user_folders")),
)
written_dirs.append(effective_out)
Path(effective_out).mkdir(parents=True, exist_ok=True)
(Path(effective_out) / "example_redacted.pdf").write_bytes(b"%PDF")
monkeypatch.setattr("cli_redact.main", _fake_cli_main)
paths = _run_cli(
gradio_api_name="doc_redact",
overrides={
"task": "redact",
"input_file": ["example.pdf"],
"save_to_user_folders": True,
},
output_dir=str(out_base) + "/",
)
assert cli_usernames == ["abcd1234"]
assert len(written_dirs) == 1
assert "abcd1234" in written_dirs[0]
assert len(paths) == 1
assert paths[0].endswith("example_redacted.pdf")
assert "abcd1234" in paths[0]
def test_run_cli_returns_touched_files_on_rerun(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
effective = tmp_path / "session_out"
effective.mkdir(parents=True)
stale = effective / "example_review_file.csv"
stale.write_text("from prior run", encoding="utf-8")
def _fake_effective_output_dir(merged: dict) -> str:
return str(effective)
def _fake_cli_main(direct_mode_args: dict | None = None) -> None:
target = effective / "example_review_file.csv"
target.write_text("updated this run", encoding="utf-8")
(effective / "example_redacted.pdf").write_bytes(b"%PDF")
monkeypatch.setattr(
"doc_redaction.cli_api._effective_output_dir",
_fake_effective_output_dir,
)
monkeypatch.setattr("cli_redact.main", _fake_cli_main)
paths = _run_cli(
gradio_api_name="doc_redact",
overrides={"task": "redact", "input_file": ["example.pdf"]},
output_dir=str(tmp_path / "base_out"),
)
assert str((effective / "example_review_file.csv").resolve()) in paths
assert str((effective / "example_redacted.pdf").resolve()) in paths
def test_resolve_redaction_output_paths_falls_back_to_discover(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
out_root = tmp_path / "output" / "user_session"
out_root.mkdir(parents=True)
review = out_root / "example_of_emails_sent_review_file.csv"
review.write_text("a,b\n", encoding="utf-8")
monkeypatch.setattr(
"remote_redaction.doc_redaction_output_root",
lambda: tmp_path / "output",
)
paths = resolve_redaction_output_paths(
([], "doc_redact completed"),
document_stem="example_of_emails_sent",
)
assert str(review.resolve()) in paths
def test_discover_redaction_outputs_respects_since(tmp_path: Path) -> None:
out_root = tmp_path / "output"
out_root.mkdir()
old = out_root / "example_of_emails_old.csv"
old.write_text("old", encoding="utf-8")
time.sleep(0.02)
since = time.time()
time.sleep(0.02)
new = out_root / "example_of_emails_new.csv"
new.write_text("new", encoding="utf-8")
import remote_redaction as rr
original = rr.doc_redaction_output_root
rr.doc_redaction_output_root = lambda: out_root
try:
found = discover_redaction_outputs("example_of_emails", since=since)
finally:
rr.doc_redaction_output_root = original
assert str(new.resolve()) in found
assert str(old.resolve()) not in found
|