File size: 3,856 Bytes
db06ffa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import json
import tempfile
import unittest
from pathlib import Path

from zsgdp.artifacts import MANIFEST_SCHEMA_VERSION, validate_artifact_manifest
from zsgdp.cli import main
from zsgdp.pipeline import parse_document
from zsgdp.schema import SCHEMA_VERSION


class ArtifactManifestTests(unittest.TestCase):
    def test_parse_writes_valid_artifact_manifest(self):
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)
            input_path = tmp_path / "sample.md"
            output_dir = tmp_path / "out"
            input_path.write_text("# Report\n\nHello world.\n", encoding="utf-8")

            parsed = parse_document(input_path, output_dir)
            manifest = json.loads((output_dir / "artifact_manifest.json").read_text(encoding="utf-8"))
            validation = validate_artifact_manifest(output_dir)

            self.assertEqual(manifest["doc_id"], parsed.doc_id)
            self.assertEqual(manifest["counts"]["chunks"], len(parsed.chunks))
            self.assertTrue(any(record["path"] == "parsed_document.json" for record in manifest["files"]))
            self.assertTrue(validation["valid"])
            self.assertEqual(validation["checked_count"], manifest["artifact_count"])

    def test_manifest_records_schema_versions(self):
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)
            input_path = tmp_path / "sample.md"
            output_dir = tmp_path / "out"
            input_path.write_text("# Report\n\nHello.\n", encoding="utf-8")

            parsed = parse_document(input_path, output_dir)
            manifest = json.loads((output_dir / "artifact_manifest.json").read_text(encoding="utf-8"))

            # Manifest format version is its own integer; parsed-document
            # schema version is a string echoed from the dataclass.
            self.assertEqual(manifest["schema_version"], MANIFEST_SCHEMA_VERSION)
            self.assertEqual(manifest["parsed_document_schema_version"], SCHEMA_VERSION)
            self.assertEqual(parsed.schema_version, SCHEMA_VERSION)

            # Validation echoes both versions so callers can gate on them.
            validation = validate_artifact_manifest(output_dir)
            self.assertEqual(validation["manifest_schema_version"], MANIFEST_SCHEMA_VERSION)
            self.assertEqual(validation["parsed_document_schema_version"], SCHEMA_VERSION)

    def test_validate_artifact_manifest_detects_checksum_mismatch(self):
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)
            input_path = tmp_path / "sample.md"
            output_dir = tmp_path / "out"
            input_path.write_text("# Report\n\nHello world.\n", encoding="utf-8")
            parse_document(input_path, output_dir)

            (output_dir / "document.md").write_text("tampered\n", encoding="utf-8")
            validation = validate_artifact_manifest(output_dir)

            self.assertFalse(validation["valid"])
            self.assertTrue(any("SHA-256 mismatch: document.md" == error for error in validation["errors"]))

    def test_validate_artifacts_cli_writes_report(self):
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)
            input_path = tmp_path / "sample.md"
            output_dir = tmp_path / "out"
            report_path = tmp_path / "validation.json"
            input_path.write_text("# Report\n\nHello world.\n", encoding="utf-8")
            parse_document(input_path, output_dir)

            code = main(["validate-artifacts", "--parsed", str(output_dir), "--output", str(report_path)])

            self.assertEqual(code, 0)
            self.assertTrue(report_path.exists())
            self.assertTrue(json.loads(report_path.read_text(encoding="utf-8"))["valid"])


if __name__ == "__main__":
    unittest.main()