| import pytest |
| from uuid import UUID, uuid4 |
| from datetime import datetime, timedelta |
|
|
| from pydantic import ValidationError |
|
|
| from tensorus.metadata.schemas import ( |
| TensorDescriptor, |
| SemanticMetadata, |
| DataType, |
| StorageFormat, |
| AccessControl, |
| CompressionInfo, |
| |
| LineageSourceType, |
| LineageSource, |
| ParentTensorLink, |
| TransformationStep, |
| VersionControlInfo, |
| LineageMetadata, |
| ComputationalMetadata, |
| QualityStatistics, |
| MissingValuesInfo, |
| OutlierInfo, |
| QualityMetadata, |
| RelatedTensorLink, |
| RelationalMetadata, |
| UsageAccessRecord, |
| UsageMetadata |
| ) |
|
|
| |
|
|
| def test_tensor_descriptor_enhanced_valid(): |
| now = datetime.utcnow() |
| td = TensorDescriptor( |
| tensor_id=uuid4(), |
| dimensionality=2, |
| shape=[100, 100], |
| data_type=DataType.FLOAT32, |
| storage_format=StorageFormat.NUMPY_NPZ, |
| creation_timestamp=now, |
| last_modified_timestamp=now + timedelta(seconds=1), |
| owner="test_user_enhanced", |
| access_control=AccessControl(read=["user1"], write=["owner"], owner_permissions="rwd"), |
| byte_size=40000, |
| checksum="md5:abcdef123456", |
| compression_info=CompressionInfo(algorithm="gzip", level=6, settings={"comment": "test"}), |
| tags=["enhanced", "testing"], |
| metadata={"source": "api_test", "quality_approved": True, "nested": {"value": 1}} |
| ) |
| assert td.storage_format == StorageFormat.NUMPY_NPZ |
| assert td.checksum == "md5:abcdef123456" |
| assert td.access_control.owner_permissions == "rwd" |
| assert td.compression_info.settings["comment"] == "test" |
| assert td.metadata["quality_approved"] is True |
|
|
| |
|
|
| def test_lineage_source_valid(): |
| ls = LineageSource(type=LineageSourceType.API, identifier="/api/v1/data/source", details={"param": "value"}) |
| assert ls.type == LineageSourceType.API |
| assert ls.identifier == "/api/v1/data/source" |
| assert ls.details["param"] == "value" |
|
|
| def test_parent_tensor_link_valid(): |
| ptl = ParentTensorLink(tensor_id=uuid4(), relationship="derived_from") |
| assert isinstance(ptl.tensor_id, UUID) |
| assert ptl.relationship == "derived_from" |
|
|
| def test_transformation_step_valid(): |
| ts = TransformationStep(operation="normalized", parameters={"mean": 0, "std": 1}, operator="proc_x", software_version="lib-v1.2") |
| assert ts.operation == "normalized" |
| assert ts.parameters["mean"] == 0 |
| assert ts.timestamp <= datetime.utcnow() |
|
|
| def test_version_control_info_valid(): |
| vci = VersionControlInfo(repository="http://git.example.com/repo.git", commit_hash="abcdef123", branch="main") |
| assert vci.repository == "http://git.example.com/repo.git" |
|
|
| def test_lineage_metadata_valid(): |
| tensor_id = uuid4() |
| lm = LineageMetadata( |
| tensor_id=tensor_id, |
| source=LineageSource(type=LineageSourceType.FILE, identifier="/path/to/source.csv"), |
| parent_tensors=[ParentTensorLink(tensor_id=uuid4(), relationship="copied_from")], |
| transformation_history=[TransformationStep(operation="cleaned")], |
| version="v1.0.1", |
| version_control=VersionControlInfo(commit_hash="xyz789"), |
| provenance={"user_notes": "Initial dataset processing"} |
| ) |
| assert lm.tensor_id == tensor_id |
| assert lm.source.type == LineageSourceType.FILE |
| assert len(lm.parent_tensors) == 1 |
| assert lm.version == "v1.0.1" |
| assert lm.provenance["user_notes"] == "Initial dataset processing" |
|
|
| |
|
|
| def test_computational_metadata_valid(): |
| cm = ComputationalMetadata( |
| tensor_id=uuid4(), |
| algorithm="PCA", |
| parameters={"n_components": 10}, |
| computation_time_seconds=120.5, |
| hardware_info={"cpu": "Intel Xeon", "ram_gb": 64} |
| ) |
| assert cm.algorithm == "PCA" |
| assert cm.computation_time_seconds == 120.5 |
|
|
| def test_computational_metadata_invalid_time(): |
| with pytest.raises(ValidationError, match="Computation time cannot be negative"): |
| ComputationalMetadata(tensor_id=uuid4(), computation_time_seconds=-10.0) |
|
|
| |
|
|
| def test_quality_statistics_valid(): |
| qs = QualityStatistics(min_value=0.0, max_value=1.0, mean=0.5, std_dev=0.2, percentiles={50: 0.49}) |
| assert qs.mean == 0.5 |
| assert qs.percentiles[50] == 0.49 |
|
|
| def test_missing_values_info_valid(): |
| mvi = MissingValuesInfo(count=10, percentage=1.5, strategy="imputed_mean") |
| assert mvi.count == 10 |
| assert mvi.percentage == 1.5 |
|
|
| def test_missing_values_info_invalid_percentage(): |
| with pytest.raises(ValidationError): |
| MissingValuesInfo(count=0, percentage=110.0) |
| with pytest.raises(ValidationError): |
| MissingValuesInfo(count=0, percentage=-5.0) |
| with pytest.raises(ValidationError): |
| MissingValuesInfo(count=-1, percentage=10.0) |
|
|
| def test_outlier_info_valid(): |
| oi = OutlierInfo(count=5, percentage=0.5, method_used="IQR") |
| assert oi.count == 5 |
|
|
| def test_quality_metadata_valid(): |
| qm = QualityMetadata( |
| tensor_id=uuid4(), |
| statistics=QualityStatistics(mean=10.0), |
| missing_values=MissingValuesInfo(count=0, percentage=0.0), |
| outliers=OutlierInfo(count=1, percentage=0.01, method_used="Z-score"), |
| confidence_score=0.95 |
| ) |
| assert qm.statistics.mean == 10.0 |
| assert qm.confidence_score == 0.95 |
|
|
| def test_quality_metadata_invalid_confidence(): |
| with pytest.raises(ValidationError): |
| QualityMetadata(tensor_id=uuid4(), confidence_score=1.5) |
| with pytest.raises(ValidationError): |
| QualityMetadata(tensor_id=uuid4(), confidence_score=-0.1) |
|
|
| |
|
|
| def test_related_tensor_link_valid(): |
| rtl = RelatedTensorLink(related_tensor_id=uuid4(), relationship_type="augmented_version") |
| assert isinstance(rtl.related_tensor_id, UUID) |
| assert rtl.relationship_type == "augmented_version" |
|
|
| def test_relational_metadata_valid(): |
| rm = RelationalMetadata( |
| tensor_id=uuid4(), |
| related_tensors=[RelatedTensorLink(related_tensor_id=uuid4(), relationship_type="sample_of")], |
| collections=["dataset_A_main_features"], |
| dependencies=[uuid4()] |
| ) |
| assert len(rm.related_tensors) == 1 |
| assert "dataset_A_main_features" in rm.collections |
|
|
| |
|
|
| def test_usage_access_record_valid(): |
| uar = UsageAccessRecord(user_or_service="user_x", operation_type="read", details={"query": "full_tensor"}) |
| assert uar.user_or_service == "user_x" |
| assert uar.status == "success" |
|
|
| def test_usage_metadata_valid(): |
| tensor_id = uuid4() |
| now = datetime.utcnow() |
| um = UsageMetadata( |
| tensor_id=tensor_id, |
| access_history=[ |
| UsageAccessRecord(user_or_service="user_a", operation_type="read", accessed_at=now - timedelta(days=1)), |
| UsageAccessRecord(user_or_service="service_b", operation_type="transform", accessed_at=now) |
| ], |
| application_references=["model_training_pipeline_X"] |
| ) |
| assert um.tensor_id == tensor_id |
| assert len(um.access_history) == 2 |
| assert um.usage_frequency == 2 |
| assert um.last_accessed_at == now |
|
|
| def test_usage_metadata_sync_validators(): |
| um = UsageMetadata(tensor_id=uuid4()) |
| assert um.usage_frequency == 0 |
| assert um.last_accessed_at is None |
|
|
| t1 = datetime.utcnow() - timedelta(minutes=5) |
| t2 = datetime.utcnow() |
|
|
| um.access_history.append(UsageAccessRecord(user_or_service="u1", operation_type="read", accessed_at=t1)) |
| |
| |
| |
| |
| |
| |
| um_revalidated = UsageMetadata.model_validate(um.model_dump()) |
|
|
| assert um_revalidated.usage_frequency == 1 |
| assert um_revalidated.last_accessed_at == t1 |
|
|
| um_revalidated.access_history.append(UsageAccessRecord(user_or_service="u2", operation_type="write", accessed_at=t2)) |
| um_final = UsageMetadata.model_validate(um_revalidated.model_dump()) |
|
|
| assert um_final.usage_frequency == 2 |
| assert um_final.last_accessed_at == t2 |
|
|
| def test_usage_metadata_invalid_frequency(): |
| with pytest.raises(ValidationError): |
| UsageMetadata(tensor_id=uuid4(), usage_frequency=-1) |
|
|
|
|
| |
| def test_semantic_metadata_valid(): |
| tensor_uuid = uuid4() |
| sm = SemanticMetadata( |
| name="image_class_label", |
| description="Describes the primary class identified in the image tensor.", |
| tensor_id=tensor_uuid |
| ) |
| assert sm.name == "image_class_label" |
| assert sm.tensor_id == tensor_uuid |
|
|
| def test_semantic_metadata_empty_name(): |
| with pytest.raises(ValidationError, match="Name and description fields cannot be empty or just whitespace."): |
| SemanticMetadata(name="", description="A description", tensor_id=uuid4()) |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| def test_tensor_descriptor_new_defaults(): |
| td = TensorDescriptor( |
| dimensionality=1, |
| shape=[10], |
| data_type=DataType.INT32, |
| owner="test_user_defaults", |
| byte_size=40, |
| ) |
| assert td.tags == [] |
| assert td.metadata == {} |
| assert td.checksum is None |
| assert td.access_control.owner_permissions is None |
| assert td.storage_format == StorageFormat.RAW |
| assert isinstance(td.tensor_id, UUID) |
|
|
| |
| def test_extended_enums_in_tensor_descriptor(): |
| td = TensorDescriptor( |
| dimensionality=1, shape=[1], data_type=DataType.FLOAT16, |
| storage_format=StorageFormat.HDF5, |
| owner="test", byte_size=2 |
| ) |
| assert td.data_type == DataType.FLOAT16 |
| assert td.storage_format == StorageFormat.HDF5 |
| |
|
|