Spaces:
Running on Zero
Running on Zero
| import json | |
| import tempfile | |
| import unittest | |
| from pathlib import Path | |
| from zsgdp.artifacts import MANIFEST_SCHEMA_VERSION, validate_artifact_manifest | |
| from zsgdp.cli import main | |
| from zsgdp.pipeline import parse_document | |
| from zsgdp.schema import SCHEMA_VERSION | |
| class ArtifactManifestTests(unittest.TestCase): | |
| def test_parse_writes_valid_artifact_manifest(self): | |
| with tempfile.TemporaryDirectory() as tmp: | |
| tmp_path = Path(tmp) | |
| input_path = tmp_path / "sample.md" | |
| output_dir = tmp_path / "out" | |
| input_path.write_text("# Report\n\nHello world.\n", encoding="utf-8") | |
| parsed = parse_document(input_path, output_dir) | |
| manifest = json.loads((output_dir / "artifact_manifest.json").read_text(encoding="utf-8")) | |
| validation = validate_artifact_manifest(output_dir) | |
| self.assertEqual(manifest["doc_id"], parsed.doc_id) | |
| self.assertEqual(manifest["counts"]["chunks"], len(parsed.chunks)) | |
| self.assertTrue(any(record["path"] == "parsed_document.json" for record in manifest["files"])) | |
| self.assertTrue(validation["valid"]) | |
| self.assertEqual(validation["checked_count"], manifest["artifact_count"]) | |
| def test_manifest_records_schema_versions(self): | |
| with tempfile.TemporaryDirectory() as tmp: | |
| tmp_path = Path(tmp) | |
| input_path = tmp_path / "sample.md" | |
| output_dir = tmp_path / "out" | |
| input_path.write_text("# Report\n\nHello.\n", encoding="utf-8") | |
| parsed = parse_document(input_path, output_dir) | |
| manifest = json.loads((output_dir / "artifact_manifest.json").read_text(encoding="utf-8")) | |
| # Manifest format version is its own integer; parsed-document | |
| # schema version is a string echoed from the dataclass. | |
| self.assertEqual(manifest["schema_version"], MANIFEST_SCHEMA_VERSION) | |
| self.assertEqual(manifest["parsed_document_schema_version"], SCHEMA_VERSION) | |
| self.assertEqual(parsed.schema_version, SCHEMA_VERSION) | |
| # Validation echoes both versions so callers can gate on them. | |
| validation = validate_artifact_manifest(output_dir) | |
| self.assertEqual(validation["manifest_schema_version"], MANIFEST_SCHEMA_VERSION) | |
| self.assertEqual(validation["parsed_document_schema_version"], SCHEMA_VERSION) | |
| def test_validate_artifact_manifest_detects_checksum_mismatch(self): | |
| with tempfile.TemporaryDirectory() as tmp: | |
| tmp_path = Path(tmp) | |
| input_path = tmp_path / "sample.md" | |
| output_dir = tmp_path / "out" | |
| input_path.write_text("# Report\n\nHello world.\n", encoding="utf-8") | |
| parse_document(input_path, output_dir) | |
| (output_dir / "document.md").write_text("tampered\n", encoding="utf-8") | |
| validation = validate_artifact_manifest(output_dir) | |
| self.assertFalse(validation["valid"]) | |
| self.assertTrue(any("SHA-256 mismatch: document.md" == error for error in validation["errors"])) | |
| def test_validate_artifacts_cli_writes_report(self): | |
| with tempfile.TemporaryDirectory() as tmp: | |
| tmp_path = Path(tmp) | |
| input_path = tmp_path / "sample.md" | |
| output_dir = tmp_path / "out" | |
| report_path = tmp_path / "validation.json" | |
| input_path.write_text("# Report\n\nHello world.\n", encoding="utf-8") | |
| parse_document(input_path, output_dir) | |
| code = main(["validate-artifacts", "--parsed", str(output_dir), "--output", str(report_path)]) | |
| self.assertEqual(code, 0) | |
| self.assertTrue(report_path.exists()) | |
| self.assertTrue(json.loads(report_path.read_text(encoding="utf-8"))["valid"]) | |
| if __name__ == "__main__": | |
| unittest.main() | |