import json import tempfile import unittest from pathlib import Path from zsgdp.artifacts import MANIFEST_SCHEMA_VERSION, validate_artifact_manifest from zsgdp.cli import main from zsgdp.pipeline import parse_document from zsgdp.schema import SCHEMA_VERSION class ArtifactManifestTests(unittest.TestCase): def test_parse_writes_valid_artifact_manifest(self): with tempfile.TemporaryDirectory() as tmp: tmp_path = Path(tmp) input_path = tmp_path / "sample.md" output_dir = tmp_path / "out" input_path.write_text("# Report\n\nHello world.\n", encoding="utf-8") parsed = parse_document(input_path, output_dir) manifest = json.loads((output_dir / "artifact_manifest.json").read_text(encoding="utf-8")) validation = validate_artifact_manifest(output_dir) self.assertEqual(manifest["doc_id"], parsed.doc_id) self.assertEqual(manifest["counts"]["chunks"], len(parsed.chunks)) self.assertTrue(any(record["path"] == "parsed_document.json" for record in manifest["files"])) self.assertTrue(validation["valid"]) self.assertEqual(validation["checked_count"], manifest["artifact_count"]) def test_manifest_records_schema_versions(self): with tempfile.TemporaryDirectory() as tmp: tmp_path = Path(tmp) input_path = tmp_path / "sample.md" output_dir = tmp_path / "out" input_path.write_text("# Report\n\nHello.\n", encoding="utf-8") parsed = parse_document(input_path, output_dir) manifest = json.loads((output_dir / "artifact_manifest.json").read_text(encoding="utf-8")) # Manifest format version is its own integer; parsed-document # schema version is a string echoed from the dataclass. self.assertEqual(manifest["schema_version"], MANIFEST_SCHEMA_VERSION) self.assertEqual(manifest["parsed_document_schema_version"], SCHEMA_VERSION) self.assertEqual(parsed.schema_version, SCHEMA_VERSION) # Validation echoes both versions so callers can gate on them. validation = validate_artifact_manifest(output_dir) self.assertEqual(validation["manifest_schema_version"], MANIFEST_SCHEMA_VERSION) self.assertEqual(validation["parsed_document_schema_version"], SCHEMA_VERSION) def test_validate_artifact_manifest_detects_checksum_mismatch(self): with tempfile.TemporaryDirectory() as tmp: tmp_path = Path(tmp) input_path = tmp_path / "sample.md" output_dir = tmp_path / "out" input_path.write_text("# Report\n\nHello world.\n", encoding="utf-8") parse_document(input_path, output_dir) (output_dir / "document.md").write_text("tampered\n", encoding="utf-8") validation = validate_artifact_manifest(output_dir) self.assertFalse(validation["valid"]) self.assertTrue(any("SHA-256 mismatch: document.md" == error for error in validation["errors"])) def test_validate_artifacts_cli_writes_report(self): with tempfile.TemporaryDirectory() as tmp: tmp_path = Path(tmp) input_path = tmp_path / "sample.md" output_dir = tmp_path / "out" report_path = tmp_path / "validation.json" input_path.write_text("# Report\n\nHello world.\n", encoding="utf-8") parse_document(input_path, output_dir) code = main(["validate-artifacts", "--parsed", str(output_dir), "--output", str(report_path)]) self.assertEqual(code, 0) self.assertTrue(report_path.exists()) self.assertTrue(json.loads(report_path.read_text(encoding="utf-8"))["valid"]) if __name__ == "__main__": unittest.main()