| """Tests for doc_redaction.cli_api output path discovery.""" |
|
|
| from __future__ import annotations |
|
|
| import sys |
| import time |
| from pathlib import Path |
|
|
| import pytest |
|
|
| pytest.importorskip("pikepdf") |
|
|
| REPO_ROOT = Path(__file__).resolve().parent.parent |
| _PI_SRC = REPO_ROOT / "agent-redact" / "pi" |
| if str(_PI_SRC) not in sys.path: |
| sys.path.insert(0, str(_PI_SRC)) |
|
|
| from remote_redaction import ( |
| discover_redaction_outputs, |
| resolve_redaction_output_paths, |
| ) |
|
|
| from doc_redaction.cli_api import ( |
| _run_cli, |
| _snapshot_files_newer_than, |
| ) |
|
|
|
|
| def test_snapshot_files_newer_than_includes_overwritten_files(tmp_path: Path) -> None: |
| existing = tmp_path / "doc_review_file.csv" |
| existing.write_text("old", encoding="utf-8") |
| time.sleep(0.02) |
| started = time.time() |
| time.sleep(0.02) |
| existing.write_text("new", encoding="utf-8") |
|
|
| found = _snapshot_files_newer_than(str(tmp_path), started) |
| assert str(existing.resolve()) in found |
|
|
|
|
| def test_run_cli_pins_session_hash_for_stateless_doc_redact( |
| tmp_path: Path, monkeypatch: pytest.MonkeyPatch |
| ) -> None: |
| """SESSION_OUTPUT_FOLDER + empty username must not generate two session hashes.""" |
| out_base = tmp_path / "output" |
| out_base.mkdir() |
| hashes = ["abcd1234", "wxyz9876"] |
|
|
| monkeypatch.setattr( |
| "cli_redact._generate_session_hash", |
| lambda: hashes.pop(0), |
| ) |
|
|
| from cli_redact import get_username_and_folders |
|
|
| written_dirs: list[str] = [] |
| cli_usernames: list[str] = [] |
|
|
| def _fake_cli_main(direct_mode_args: dict | None = None) -> None: |
| assert direct_mode_args is not None |
| cli_usernames.append(str(direct_mode_args.get("username") or "")) |
| _, effective_out, _, _, _, _, _, _ = get_username_and_folders( |
| username=cli_usernames[-1], |
| output_folder_textbox=str(direct_mode_args["output_dir"]), |
| input_folder_textbox=str(direct_mode_args.get("input_dir") or ""), |
| session_output_folder=bool(direct_mode_args.get("save_to_user_folders")), |
| ) |
| written_dirs.append(effective_out) |
| Path(effective_out).mkdir(parents=True, exist_ok=True) |
| (Path(effective_out) / "example_redacted.pdf").write_bytes(b"%PDF") |
|
|
| monkeypatch.setattr("cli_redact.main", _fake_cli_main) |
|
|
| paths = _run_cli( |
| gradio_api_name="doc_redact", |
| overrides={ |
| "task": "redact", |
| "input_file": ["example.pdf"], |
| "save_to_user_folders": True, |
| }, |
| output_dir=str(out_base) + "/", |
| ) |
| assert cli_usernames == ["abcd1234"] |
| assert len(written_dirs) == 1 |
| assert "abcd1234" in written_dirs[0] |
| assert len(paths) == 1 |
| assert paths[0].endswith("example_redacted.pdf") |
| assert "abcd1234" in paths[0] |
|
|
|
|
| def test_run_cli_returns_touched_files_on_rerun( |
| tmp_path: Path, monkeypatch: pytest.MonkeyPatch |
| ) -> None: |
| effective = tmp_path / "session_out" |
| effective.mkdir(parents=True) |
| stale = effective / "example_review_file.csv" |
| stale.write_text("from prior run", encoding="utf-8") |
|
|
| def _fake_effective_output_dir(merged: dict) -> str: |
| return str(effective) |
|
|
| def _fake_cli_main(direct_mode_args: dict | None = None) -> None: |
| target = effective / "example_review_file.csv" |
| target.write_text("updated this run", encoding="utf-8") |
| (effective / "example_redacted.pdf").write_bytes(b"%PDF") |
|
|
| monkeypatch.setattr( |
| "doc_redaction.cli_api._effective_output_dir", |
| _fake_effective_output_dir, |
| ) |
| monkeypatch.setattr("cli_redact.main", _fake_cli_main) |
|
|
| paths = _run_cli( |
| gradio_api_name="doc_redact", |
| overrides={"task": "redact", "input_file": ["example.pdf"]}, |
| output_dir=str(tmp_path / "base_out"), |
| ) |
| assert str((effective / "example_review_file.csv").resolve()) in paths |
| assert str((effective / "example_redacted.pdf").resolve()) in paths |
|
|
|
|
| def test_resolve_redaction_output_paths_falls_back_to_discover( |
| tmp_path: Path, monkeypatch: pytest.MonkeyPatch |
| ) -> None: |
| out_root = tmp_path / "output" / "user_session" |
| out_root.mkdir(parents=True) |
| review = out_root / "example_of_emails_sent_review_file.csv" |
| review.write_text("a,b\n", encoding="utf-8") |
|
|
| monkeypatch.setattr( |
| "remote_redaction.doc_redaction_output_root", |
| lambda: tmp_path / "output", |
| ) |
|
|
| paths = resolve_redaction_output_paths( |
| ([], "doc_redact completed"), |
| document_stem="example_of_emails_sent", |
| ) |
| assert str(review.resolve()) in paths |
|
|
|
|
| def test_discover_redaction_outputs_respects_since(tmp_path: Path) -> None: |
| out_root = tmp_path / "output" |
| out_root.mkdir() |
| old = out_root / "example_of_emails_old.csv" |
| old.write_text("old", encoding="utf-8") |
| time.sleep(0.02) |
| since = time.time() |
| time.sleep(0.02) |
| new = out_root / "example_of_emails_new.csv" |
| new.write_text("new", encoding="utf-8") |
|
|
| import remote_redaction as rr |
|
|
| original = rr.doc_redaction_output_root |
| rr.doc_redaction_output_root = lambda: out_root |
| try: |
| found = discover_redaction_outputs("example_of_emails", since=since) |
| finally: |
| rr.doc_redaction_output_root = original |
|
|
| assert str(new.resolve()) in found |
| assert str(old.resolve()) not in found |
|
|