Spaces:
Running on Zero
Running on Zero
File size: 3,856 Bytes
db06ffa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | import json
import tempfile
import unittest
from pathlib import Path
from zsgdp.artifacts import MANIFEST_SCHEMA_VERSION, validate_artifact_manifest
from zsgdp.cli import main
from zsgdp.pipeline import parse_document
from zsgdp.schema import SCHEMA_VERSION
class ArtifactManifestTests(unittest.TestCase):
def test_parse_writes_valid_artifact_manifest(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
input_path = tmp_path / "sample.md"
output_dir = tmp_path / "out"
input_path.write_text("# Report\n\nHello world.\n", encoding="utf-8")
parsed = parse_document(input_path, output_dir)
manifest = json.loads((output_dir / "artifact_manifest.json").read_text(encoding="utf-8"))
validation = validate_artifact_manifest(output_dir)
self.assertEqual(manifest["doc_id"], parsed.doc_id)
self.assertEqual(manifest["counts"]["chunks"], len(parsed.chunks))
self.assertTrue(any(record["path"] == "parsed_document.json" for record in manifest["files"]))
self.assertTrue(validation["valid"])
self.assertEqual(validation["checked_count"], manifest["artifact_count"])
def test_manifest_records_schema_versions(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
input_path = tmp_path / "sample.md"
output_dir = tmp_path / "out"
input_path.write_text("# Report\n\nHello.\n", encoding="utf-8")
parsed = parse_document(input_path, output_dir)
manifest = json.loads((output_dir / "artifact_manifest.json").read_text(encoding="utf-8"))
# Manifest format version is its own integer; parsed-document
# schema version is a string echoed from the dataclass.
self.assertEqual(manifest["schema_version"], MANIFEST_SCHEMA_VERSION)
self.assertEqual(manifest["parsed_document_schema_version"], SCHEMA_VERSION)
self.assertEqual(parsed.schema_version, SCHEMA_VERSION)
# Validation echoes both versions so callers can gate on them.
validation = validate_artifact_manifest(output_dir)
self.assertEqual(validation["manifest_schema_version"], MANIFEST_SCHEMA_VERSION)
self.assertEqual(validation["parsed_document_schema_version"], SCHEMA_VERSION)
def test_validate_artifact_manifest_detects_checksum_mismatch(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
input_path = tmp_path / "sample.md"
output_dir = tmp_path / "out"
input_path.write_text("# Report\n\nHello world.\n", encoding="utf-8")
parse_document(input_path, output_dir)
(output_dir / "document.md").write_text("tampered\n", encoding="utf-8")
validation = validate_artifact_manifest(output_dir)
self.assertFalse(validation["valid"])
self.assertTrue(any("SHA-256 mismatch: document.md" == error for error in validation["errors"]))
def test_validate_artifacts_cli_writes_report(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
input_path = tmp_path / "sample.md"
output_dir = tmp_path / "out"
report_path = tmp_path / "validation.json"
input_path.write_text("# Report\n\nHello world.\n", encoding="utf-8")
parse_document(input_path, output_dir)
code = main(["validate-artifacts", "--parsed", str(output_dir), "--output", str(report_path)])
self.assertEqual(code, 0)
self.assertTrue(report_path.exists())
self.assertTrue(json.loads(report_path.read_text(encoding="utf-8"))["valid"])
if __name__ == "__main__":
unittest.main()
|