zeroshotGPU / tests /test_artifacts.py
Arjunvir Singh
Initial commit: zeroshotGPU MVP with full eval surface
db06ffa
import json
import tempfile
import unittest
from pathlib import Path
from zsgdp.artifacts import MANIFEST_SCHEMA_VERSION, validate_artifact_manifest
from zsgdp.cli import main
from zsgdp.pipeline import parse_document
from zsgdp.schema import SCHEMA_VERSION
class ArtifactManifestTests(unittest.TestCase):
def test_parse_writes_valid_artifact_manifest(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
input_path = tmp_path / "sample.md"
output_dir = tmp_path / "out"
input_path.write_text("# Report\n\nHello world.\n", encoding="utf-8")
parsed = parse_document(input_path, output_dir)
manifest = json.loads((output_dir / "artifact_manifest.json").read_text(encoding="utf-8"))
validation = validate_artifact_manifest(output_dir)
self.assertEqual(manifest["doc_id"], parsed.doc_id)
self.assertEqual(manifest["counts"]["chunks"], len(parsed.chunks))
self.assertTrue(any(record["path"] == "parsed_document.json" for record in manifest["files"]))
self.assertTrue(validation["valid"])
self.assertEqual(validation["checked_count"], manifest["artifact_count"])
def test_manifest_records_schema_versions(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
input_path = tmp_path / "sample.md"
output_dir = tmp_path / "out"
input_path.write_text("# Report\n\nHello.\n", encoding="utf-8")
parsed = parse_document(input_path, output_dir)
manifest = json.loads((output_dir / "artifact_manifest.json").read_text(encoding="utf-8"))
# Manifest format version is its own integer; parsed-document
# schema version is a string echoed from the dataclass.
self.assertEqual(manifest["schema_version"], MANIFEST_SCHEMA_VERSION)
self.assertEqual(manifest["parsed_document_schema_version"], SCHEMA_VERSION)
self.assertEqual(parsed.schema_version, SCHEMA_VERSION)
# Validation echoes both versions so callers can gate on them.
validation = validate_artifact_manifest(output_dir)
self.assertEqual(validation["manifest_schema_version"], MANIFEST_SCHEMA_VERSION)
self.assertEqual(validation["parsed_document_schema_version"], SCHEMA_VERSION)
def test_validate_artifact_manifest_detects_checksum_mismatch(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
input_path = tmp_path / "sample.md"
output_dir = tmp_path / "out"
input_path.write_text("# Report\n\nHello world.\n", encoding="utf-8")
parse_document(input_path, output_dir)
(output_dir / "document.md").write_text("tampered\n", encoding="utf-8")
validation = validate_artifact_manifest(output_dir)
self.assertFalse(validation["valid"])
self.assertTrue(any("SHA-256 mismatch: document.md" == error for error in validation["errors"]))
def test_validate_artifacts_cli_writes_report(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
input_path = tmp_path / "sample.md"
output_dir = tmp_path / "out"
report_path = tmp_path / "validation.json"
input_path.write_text("# Report\n\nHello world.\n", encoding="utf-8")
parse_document(input_path, output_dir)
code = main(["validate-artifacts", "--parsed", str(output_dir), "--output", str(report_path)])
self.assertEqual(code, 0)
self.assertTrue(report_path.exists())
self.assertTrue(json.loads(report_path.read_text(encoding="utf-8"))["valid"])
if __name__ == "__main__":
unittest.main()