Spaces:
Running
Running
| """Source manifest model for dataset freshness validation. | |
| This module defines the Pydantic models for source_manifest.json which | |
| tracks source file hashes and metadata for cache invalidation and | |
| dataset integrity verification. | |
| The source manifest is generated during the build pipeline and stored | |
| alongside the RAG artifacts in the HuggingFace dataset. On server startup, | |
| the manifest is loaded and validated to ensure: | |
| 1. Schema version compatibility (forward/backward migrations) | |
| 2. Artifact integrity (file hashes match expected values) | |
| 3. Build metadata is available for debugging | |
| Manifest Structure: | |
| The manifest contains: | |
| - Schema version for future migrations | |
| - Creation timestamp for the build | |
| - Index version identifier (matches index_version.txt) | |
| - List of source files with hashes and metadata | |
| Schema Versioning: | |
| The EXPECTED_SCHEMA_VERSION constant defines the schema version that | |
| this server code expects. If the downloaded manifest has a different | |
| schema version, validation will fail with a clear error message | |
| indicating the version mismatch. | |
| Lazy Loading: | |
| Pydantic is imported inside a factory function to avoid import | |
| overhead at module load time. This follows the project's lazy | |
| loading pattern used throughout the codebase. | |
| Example: | |
| ------- | |
| >>> from rag_chatbot.api.manifest import SourceManifest, SourceFileEntry | |
| >>> from datetime import datetime, UTC | |
| >>> | |
| >>> # Create a source file entry | |
| >>> file_entry = SourceFileEntry( | |
| ... path="data/raw/ashrae_55.pdf", | |
| ... sha256="abc123...", | |
| ... size_bytes=1024000, | |
| ... modified_at=datetime.now(UTC), | |
| ... ) | |
| >>> | |
| >>> # Create a manifest | |
| >>> manifest = SourceManifest( | |
| ... schema_version="1.0.0", | |
| ... created_at=datetime.now(UTC), | |
| ... index_version="2024.01.15.001", | |
| ... source_files=[file_entry], | |
| ... ) | |
| >>> manifest.schema_version | |
| '1.0.0' | |
| """ | |
| from __future__ import annotations | |
| from typing import TYPE_CHECKING, Any | |
| if TYPE_CHECKING: | |
| from datetime import datetime | |
| # ============================================================================= | |
| # Module Exports | |
| # ============================================================================= | |
| __all__: list[str] = [ | |
| "SourceManifest", | |
| "SourceFileEntry", | |
| "ManifestValidationError", | |
| "EXPECTED_SCHEMA_VERSION", | |
| ] | |
| # ============================================================================= | |
| # Constants | |
| # ============================================================================= | |
| EXPECTED_SCHEMA_VERSION: str = "1.0.0" | |
| """Expected schema version for source_manifest.json. | |
| This constant defines the schema version that the server expects. | |
| If the downloaded manifest has a different schema version, the | |
| server will fail to start with a clear error message. | |
| Versioning scheme follows semantic versioning (MAJOR.MINOR.PATCH): | |
| - MAJOR: Breaking changes that require code changes to handle | |
| - MINOR: Backward-compatible additions (new optional fields) | |
| - PATCH: Backward-compatible bug fixes (documentation, etc.) | |
| History: | |
| - 1.0.0: Initial schema version with core fields | |
| """ | |
| # ============================================================================= | |
| # Exceptions | |
| # ============================================================================= | |
| class ManifestValidationError(Exception): | |
| """Exception raised when manifest validation fails. | |
| This exception is raised when the source manifest fails validation | |
| due to schema version mismatch, missing required fields, or invalid | |
| field values. | |
| The exception includes detailed information to help diagnose the | |
| issue, including expected vs actual schema versions when applicable. | |
| Attributes: | |
| ---------- | |
| message : str | |
| Human-readable description of the validation failure. | |
| expected_version : str | None | |
| The schema version expected by the server (if applicable). | |
| actual_version : str | None | |
| The schema version found in the manifest (if applicable). | |
| field_name : str | None | |
| Name of the field that failed validation (if applicable). | |
| Example: | |
| ------- | |
| >>> try: | |
| ... validator.validate() | |
| ... except ManifestValidationError as e: | |
| ... print(f"Validation failed: {e.message}") | |
| ... if e.expected_version: | |
| ... print(f"Expected version: {e.expected_version}") | |
| ... print(f"Actual version: {e.actual_version}") | |
| """ | |
| def __init__( | |
| self, | |
| message: str, | |
| *, | |
| expected_version: str | None = None, | |
| actual_version: str | None = None, | |
| field_name: str | None = None, | |
| ) -> None: | |
| """Initialize a ManifestValidationError. | |
| Args: | |
| ---- | |
| message: Human-readable description of the validation failure. | |
| expected_version: Schema version expected by server (optional). | |
| actual_version: Schema version found in manifest (optional). | |
| field_name: Name of the field that failed validation (optional). | |
| """ | |
| super().__init__(message) | |
| self.message = message | |
| self.expected_version = expected_version | |
| self.actual_version = actual_version | |
| self.field_name = field_name | |
| def __str__(self) -> str: | |
| """Return a string representation of the error. | |
| Returns | |
| ------- | |
| Formatted error message with version and field info if available. | |
| """ | |
| parts = [self.message] | |
| # Add version mismatch details if present | |
| if self.expected_version is not None and self.actual_version is not None: | |
| parts.append( | |
| f"(expected: {self.expected_version}, actual: {self.actual_version})" | |
| ) | |
| # Add field name if present | |
| if self.field_name: | |
| parts.append(f"[field: {self.field_name}]") | |
| return " ".join(parts) | |
| # ============================================================================= | |
| # Pydantic Model Factory (Lazy Loading) | |
| # ============================================================================= | |
| # This factory function creates the Pydantic models lazily to avoid | |
| # importing Pydantic at module load time. This follows the project's | |
| # lazy loading pattern used throughout the codebase. | |
| # ============================================================================= | |
| def _create_source_file_entry_model() -> type: | |
| """Create the SourceFileEntry Pydantic model. | |
| This factory function creates the model class with lazy imports | |
| to avoid loading Pydantic at module import time. | |
| Returns | |
| ------- | |
| type: The SourceFileEntry Pydantic model class. | |
| """ | |
| # Import datetime for Pydantic's runtime type resolution | |
| from datetime import datetime # noqa: F401 - Used by Pydantic field annotation | |
| from pydantic import BaseModel, ConfigDict, Field, field_validator | |
| class _SourceFileEntry(BaseModel): | |
| """Model for a single source file entry in the manifest. | |
| Each SourceFileEntry represents one source file that was used | |
| to build the RAG index. It includes the file path, content hash, | |
| size, and modification timestamp for verification and debugging. | |
| The SHA256 hash enables verification that source files haven't | |
| changed since the index was built. If source files change, a | |
| rebuild is required to keep the index in sync. | |
| Attributes: | |
| ---------- | |
| path : str | |
| Relative path to the source file from the project root. | |
| Example: "data/raw/ashrae_55.pdf" | |
| sha256 : str | |
| SHA256 hash of the file content (64 hex characters). | |
| Used for integrity verification and change detection. | |
| size_bytes : int | |
| File size in bytes. Useful for debugging and validation. | |
| Must be non-negative. | |
| modified_at : datetime | |
| Last modification timestamp of the source file. | |
| Should be in UTC timezone for consistency. | |
| Example: | |
| ------- | |
| >>> from datetime import datetime, UTC | |
| >>> entry = _SourceFileEntry( | |
| ... path="data/raw/ashrae_55.pdf", | |
| ... sha256="abc123def456...", | |
| ... size_bytes=1024000, | |
| ... modified_at=datetime.now(UTC), | |
| ... ) | |
| >>> entry.path | |
| 'data/raw/ashrae_55.pdf' | |
| """ | |
| # ===================================================================== | |
| # Model Configuration | |
| # ===================================================================== | |
| model_config = ConfigDict( | |
| # Forbid extra fields to catch typos in manifest files | |
| extra="forbid", | |
| # Make instances immutable for thread-safety | |
| frozen=True, | |
| # Enable JSON schema generation with examples | |
| json_schema_extra={ | |
| "examples": [ | |
| { | |
| "path": "data/raw/ashrae_55.pdf", | |
| # Example SHA256 hash (64 hex characters) | |
| "sha256": ( | |
| "e3b0c44298fc1c149afbf4c8996fb924" | |
| "27ae41e4649b934ca495991b7852b855" | |
| ), | |
| "size_bytes": 1048576, | |
| "modified_at": "2024-01-15T10:30:00Z", | |
| } | |
| ] | |
| }, | |
| ) | |
| # ===================================================================== | |
| # Fields | |
| # ===================================================================== | |
| path: str = Field( | |
| ..., # Required field | |
| min_length=1, | |
| description=( | |
| "Relative path to the source file from project root. " | |
| "Example: 'data/raw/ashrae_55.pdf'" | |
| ), | |
| ) | |
| sha256: str = Field( | |
| ..., # Required field | |
| min_length=64, | |
| max_length=64, | |
| pattern=r"^[a-f0-9]{64}$", | |
| description=( | |
| "SHA256 hash of the file content as 64 lowercase hex characters. " | |
| "Used for integrity verification and change detection." | |
| ), | |
| ) | |
| size_bytes: int = Field( | |
| ..., # Required field | |
| ge=0, # File size must be non-negative | |
| description="File size in bytes. Must be non-negative.", | |
| ) | |
| modified_at: datetime = Field( | |
| ..., # Required field | |
| description=( | |
| "Last modification timestamp of the source file. " | |
| "Should be in UTC timezone for consistency." | |
| ), | |
| ) | |
| # ===================================================================== | |
| # Validators | |
| # ===================================================================== | |
| def _normalize_path(cls, value: object) -> str: | |
| """Normalize the path field by stripping whitespace. | |
| Args: | |
| ---- | |
| value: The input value to normalize. | |
| Returns: | |
| ------- | |
| Stripped path string. | |
| Raises: | |
| ------ | |
| ValueError: If value is None or empty after stripping. | |
| """ | |
| if value is None: | |
| msg = "path cannot be None" | |
| raise ValueError(msg) | |
| path = str(value).strip() | |
| if not path: | |
| msg = "path cannot be empty" | |
| raise ValueError(msg) | |
| return path | |
| def _normalize_sha256(cls, value: object) -> str: | |
| """Normalize the sha256 field to lowercase. | |
| Args: | |
| ---- | |
| value: The input value to normalize. | |
| Returns: | |
| ------- | |
| Lowercase sha256 string. | |
| Raises: | |
| ------ | |
| ValueError: If value is None or invalid format. | |
| """ | |
| if value is None: | |
| msg = "sha256 cannot be None" | |
| raise ValueError(msg) | |
| # Convert to lowercase string and strip whitespace | |
| sha256 = str(value).strip().lower() | |
| if not sha256: | |
| msg = "sha256 cannot be empty" | |
| raise ValueError(msg) | |
| return sha256 | |
| return _SourceFileEntry | |
| def _create_source_manifest_model(source_file_entry_class: type) -> type: | |
| """Create the SourceManifest Pydantic model. | |
| This factory function creates the model class with lazy imports | |
| to avoid loading Pydantic at module import time. | |
| Args: | |
| ---- | |
| source_file_entry_class: The SourceFileEntry model class to use | |
| for the source_files field type annotation. | |
| Returns: | |
| ------- | |
| type: The SourceManifest Pydantic model class. | |
| """ | |
| # Import datetime for Pydantic's runtime type resolution | |
| from datetime import datetime # noqa: F401 - Used by Pydantic field annotation | |
| from pydantic import BaseModel, ConfigDict, Field, field_validator | |
| class _SourceManifest(BaseModel): | |
| """Model for the source_manifest.json file. | |
| The SourceManifest tracks metadata about the source files and | |
| build process for the RAG index. It enables: | |
| - Schema version validation for compatibility checking | |
| - Build timestamp tracking for debugging | |
| - Index version matching with index_version.txt | |
| - Source file tracking for change detection | |
| This manifest is generated during the build pipeline and stored | |
| alongside the RAG artifacts in the HuggingFace dataset. | |
| Attributes: | |
| ---------- | |
| schema_version : str | |
| Schema version of this manifest (e.g., "1.0.0"). | |
| Must match EXPECTED_SCHEMA_VERSION for validation to pass. | |
| created_at : datetime | |
| When this manifest was generated (build timestamp). | |
| Should be in UTC timezone for consistency. | |
| index_version : str | |
| Index version identifier that matches index_version.txt. | |
| Used for cache invalidation and version tracking. | |
| source_files : list[SourceFileEntry] | |
| List of source files used to build the index. | |
| Each entry includes path, hash, size, and timestamp. | |
| Example: | |
| ------- | |
| >>> from datetime import datetime, UTC | |
| >>> manifest = _SourceManifest( | |
| ... schema_version="1.0.0", | |
| ... created_at=datetime.now(UTC), | |
| ... index_version="2024.01.15.001", | |
| ... source_files=[], | |
| ... ) | |
| >>> manifest.schema_version | |
| '1.0.0' | |
| """ | |
| # ===================================================================== | |
| # Model Configuration | |
| # ===================================================================== | |
| model_config = ConfigDict( | |
| # Forbid extra fields to catch typos in manifest files | |
| extra="forbid", | |
| # Make instances immutable for thread-safety | |
| frozen=True, | |
| # Enable JSON schema generation with examples | |
| json_schema_extra={ | |
| "examples": [ | |
| { | |
| "schema_version": "1.0.0", | |
| "created_at": "2024-01-15T10:30:00Z", | |
| "index_version": "2024.01.15.001", | |
| "source_files": [ | |
| { | |
| "path": "data/raw/ashrae_55.pdf", | |
| "sha256": "e3b0c44...", | |
| "size_bytes": 1048576, | |
| "modified_at": "2024-01-15T10:30:00Z", | |
| } | |
| ], | |
| } | |
| ] | |
| }, | |
| ) | |
| # ===================================================================== | |
| # Fields | |
| # ===================================================================== | |
| schema_version: str = Field( | |
| ..., # Required field | |
| min_length=1, | |
| pattern=r"^\d+\.\d+\.\d+$", # Semantic versioning format | |
| description=( | |
| "Schema version of this manifest (semantic versioning). " | |
| "Example: '1.0.0'. Must match EXPECTED_SCHEMA_VERSION." | |
| ), | |
| ) | |
| created_at: datetime = Field( | |
| ..., # Required field | |
| description=( | |
| "When this manifest was generated (build timestamp). " | |
| "Should be in UTC timezone for consistency." | |
| ), | |
| ) | |
| index_version: str = Field( | |
| ..., # Required field | |
| min_length=1, | |
| description=( | |
| "Index version identifier that matches index_version.txt. " | |
| "Used for cache invalidation and version tracking." | |
| ), | |
| ) | |
| source_files: list[source_file_entry_class] = Field( # type: ignore[valid-type] | |
| default_factory=list, | |
| description=( | |
| "List of source files used to build the index. " | |
| "Each entry includes path, hash, size, and timestamp." | |
| ), | |
| ) | |
| # ===================================================================== | |
| # Validators | |
| # ===================================================================== | |
| def _normalize_schema_version(cls, value: object) -> str: | |
| """Normalize the schema_version field. | |
| Args: | |
| ---- | |
| value: The input value to normalize. | |
| Returns: | |
| ------- | |
| Stripped schema version string. | |
| Raises: | |
| ------ | |
| ValueError: If value is None or empty. | |
| """ | |
| if value is None: | |
| msg = "schema_version cannot be None" | |
| raise ValueError(msg) | |
| version = str(value).strip() | |
| if not version: | |
| msg = "schema_version cannot be empty" | |
| raise ValueError(msg) | |
| return version | |
| def _normalize_index_version(cls, value: object) -> str: | |
| """Normalize the index_version field. | |
| Args: | |
| ---- | |
| value: The input value to normalize. | |
| Returns: | |
| ------- | |
| Stripped index version string. | |
| Raises: | |
| ------ | |
| ValueError: If value is None or empty. | |
| """ | |
| if value is None: | |
| msg = "index_version cannot be None" | |
| raise ValueError(msg) | |
| version = str(value).strip() | |
| if not version: | |
| msg = "index_version cannot be empty" | |
| raise ValueError(msg) | |
| return version | |
| # ===================================================================== | |
| # Instance Methods | |
| # ===================================================================== | |
| def to_dict(self) -> dict[str, Any]: | |
| """Convert the manifest to a JSON-serializable dictionary. | |
| This method produces a dictionary suitable for JSON serialization. | |
| Datetime fields are converted to ISO 8601 format strings. | |
| Returns: | |
| ------- | |
| dict[str, Any] | |
| Dictionary with all fields, datetimes as ISO 8601 strings. | |
| Example: | |
| ------- | |
| >>> manifest.to_dict() | |
| { | |
| "schema_version": "1.0.0", | |
| "created_at": "2024-01-15T10:30:00Z", | |
| "index_version": "2024.01.15.001", | |
| "source_files": [...] | |
| } | |
| """ | |
| # Build list of source file dictionaries for JSON serialization | |
| # Using Any type since source_files contains dynamically typed instances | |
| source_file_dicts: list[dict[str, Any]] = [] | |
| for file_entry in self.source_files: | |
| # Access attributes directly - type safety ensured by Pydantic | |
| entry: Any = file_entry | |
| source_file_dicts.append( | |
| { | |
| "path": entry.path, | |
| "sha256": entry.sha256, | |
| "size_bytes": entry.size_bytes, | |
| "modified_at": entry.modified_at.isoformat(), | |
| } | |
| ) | |
| return { | |
| "schema_version": self.schema_version, | |
| "created_at": self.created_at.isoformat(), | |
| "index_version": self.index_version, | |
| "source_files": source_file_dicts, | |
| } | |
| def total_source_size_bytes(self) -> int: | |
| """Calculate the total size of all source files. | |
| Returns | |
| ------- | |
| Total size in bytes of all source files in the manifest. | |
| """ | |
| # Sum size_bytes from all source files | |
| # Using Any type since source_files contains dynamically typed instances | |
| total: int = 0 | |
| for file_entry in self.source_files: | |
| entry: Any = file_entry | |
| total += int(entry.size_bytes) | |
| return total | |
| def source_file_count(self) -> int: | |
| """Get the number of source files in the manifest. | |
| Returns | |
| ------- | |
| Number of source files. | |
| """ | |
| return len(self.source_files) | |
| return _SourceManifest | |
| # ============================================================================= | |
| # Model Class Cache | |
| # ============================================================================= | |
| # These module-level variables cache the lazily-created Pydantic model classes. | |
| # The first access creates the class; subsequent accesses return the cached class. | |
| # ============================================================================= | |
| _source_file_entry_model: type | None = None | |
| _source_manifest_model: type | None = None | |
| def _get_source_file_entry() -> type: | |
| """Get or create the SourceFileEntry model class. | |
| This function implements the lazy loading pattern. The Pydantic | |
| model class is created on first call and cached for subsequent calls. | |
| Returns | |
| ------- | |
| type: The SourceFileEntry Pydantic model class. | |
| """ | |
| global _source_file_entry_model # noqa: PLW0603 | |
| if _source_file_entry_model is None: | |
| _source_file_entry_model = _create_source_file_entry_model() | |
| return _source_file_entry_model | |
| def _get_source_manifest() -> type: | |
| """Get or create the SourceManifest model class. | |
| This function implements the lazy loading pattern. The Pydantic | |
| model class is created on first call and cached for subsequent calls. | |
| Returns | |
| ------- | |
| type: The SourceManifest Pydantic model class. | |
| """ | |
| global _source_manifest_model # noqa: PLW0603 | |
| if _source_manifest_model is None: | |
| # First get the SourceFileEntry class (creates it if needed) | |
| source_file_entry_class = _get_source_file_entry() | |
| _source_manifest_model = _create_source_manifest_model(source_file_entry_class) | |
| return _source_manifest_model | |
| # ============================================================================= | |
| # Public Model Classes (Lazy Proxies) | |
| # ============================================================================= | |
| # These classes act as proxies that defer model creation until first use. | |
| # This enables lazy loading while maintaining the appearance of regular classes. | |
| # ============================================================================= | |
| class SourceFileEntry: | |
| """Model for a single source file entry in the manifest. | |
| This is a lazy-loading proxy class. The actual Pydantic model is | |
| created on first use to avoid importing Pydantic at module load time. | |
| Each SourceFileEntry represents one source file that was used | |
| to build the RAG index. It includes the file path, content hash, | |
| size, and modification timestamp for verification and debugging. | |
| Attributes: | |
| ---------- | |
| path : str | |
| Relative path to the source file from the project root. | |
| sha256 : str | |
| SHA256 hash of the file content (64 hex characters). | |
| size_bytes : int | |
| File size in bytes. | |
| modified_at : datetime | |
| Last modification timestamp of the source file. | |
| Example: | |
| ------- | |
| >>> from datetime import datetime, UTC | |
| >>> entry = SourceFileEntry( | |
| ... path="data/raw/ashrae_55.pdf", | |
| ... sha256="abc123def456...", | |
| ... size_bytes=1024000, | |
| ... modified_at=datetime.now(UTC), | |
| ... ) | |
| >>> entry.path | |
| 'data/raw/ashrae_55.pdf' | |
| """ | |
| # Type stubs for mypy | |
| path: str | |
| sha256: str | |
| size_bytes: int | |
| modified_at: datetime | |
| def __new__(cls, **kwargs: object) -> SourceFileEntry: | |
| """Create a new SourceFileEntry instance. | |
| Args: | |
| ---- | |
| **kwargs: Field values for the model. Required fields: | |
| - path: str | |
| - sha256: str | |
| - size_bytes: int | |
| - modified_at: datetime | |
| Returns: | |
| ------- | |
| SourceFileEntry: A SourceFileEntry Pydantic model instance. | |
| Raises: | |
| ------ | |
| pydantic.ValidationError: If required fields are missing or | |
| field values fail validation. | |
| """ | |
| model_class = _get_source_file_entry() | |
| return model_class(**kwargs) # type: ignore[no-any-return] | |
| def model_validate(cls, obj: object) -> SourceFileEntry: | |
| """Validate and create a model from an object. | |
| Args: | |
| ---- | |
| obj: Object to validate. Can be a dict with the required fields | |
| or another object with matching attributes. | |
| Returns: | |
| ------- | |
| SourceFileEntry: Validated SourceFileEntry instance. | |
| Raises: | |
| ------ | |
| pydantic.ValidationError: If validation fails. | |
| """ | |
| model_class = _get_source_file_entry() | |
| return model_class.model_validate(obj) # type: ignore[attr-defined, no-any-return] | |
| def model_json_schema(cls) -> dict[str, Any]: | |
| """Get the JSON schema for the SourceFileEntry model. | |
| Returns | |
| ------- | |
| dict[str, Any]: JSON schema dictionary. | |
| """ | |
| model_class = _get_source_file_entry() | |
| return model_class.model_json_schema() # type: ignore[attr-defined, no-any-return] | |
| class SourceManifest: | |
| """Model for the source_manifest.json file. | |
| This is a lazy-loading proxy class. The actual Pydantic model is | |
| created on first use to avoid importing Pydantic at module load time. | |
| The SourceManifest tracks metadata about the source files and | |
| build process for the RAG index. It enables: | |
| - Schema version validation for compatibility checking | |
| - Build timestamp tracking for debugging | |
| - Index version matching with index_version.txt | |
| - Source file tracking for change detection | |
| Attributes: | |
| ---------- | |
| schema_version : str | |
| Schema version of this manifest (e.g., "1.0.0"). | |
| created_at : datetime | |
| When this manifest was generated. | |
| index_version : str | |
| Index version identifier. | |
| source_files : list[SourceFileEntry] | |
| List of source files used to build the index. | |
| Example: | |
| ------- | |
| >>> from datetime import datetime, UTC | |
| >>> manifest = SourceManifest( | |
| ... schema_version="1.0.0", | |
| ... created_at=datetime.now(UTC), | |
| ... index_version="2024.01.15.001", | |
| ... source_files=[], | |
| ... ) | |
| >>> manifest.schema_version | |
| '1.0.0' | |
| """ | |
| # Type stubs for mypy | |
| schema_version: str | |
| created_at: datetime | |
| index_version: str | |
| source_files: list[SourceFileEntry] | |
| def __new__(cls, **kwargs: object) -> SourceManifest: | |
| """Create a new SourceManifest instance. | |
| Args: | |
| ---- | |
| **kwargs: Field values for the model. Required fields: | |
| - schema_version: str | |
| - created_at: datetime | |
| - index_version: str | |
| Optional fields: | |
| - source_files: list[SourceFileEntry] (defaults to []) | |
| Returns: | |
| ------- | |
| SourceManifest: A SourceManifest Pydantic model instance. | |
| Raises: | |
| ------ | |
| pydantic.ValidationError: If required fields are missing or | |
| field values fail validation. | |
| """ | |
| model_class = _get_source_manifest() | |
| return model_class(**kwargs) # type: ignore[no-any-return] | |
| def model_validate(cls, obj: object) -> SourceManifest: | |
| """Validate and create a model from an object. | |
| Args: | |
| ---- | |
| obj: Object to validate. Can be a dict with the required fields | |
| or another object with matching attributes. | |
| Returns: | |
| ------- | |
| SourceManifest: Validated SourceManifest instance. | |
| Raises: | |
| ------ | |
| pydantic.ValidationError: If validation fails. | |
| """ | |
| model_class = _get_source_manifest() | |
| return model_class.model_validate(obj) # type: ignore[attr-defined, no-any-return] | |
| def model_json_schema(cls) -> dict[str, Any]: | |
| """Get the JSON schema for the SourceManifest model. | |
| Returns | |
| ------- | |
| dict[str, Any]: JSON schema dictionary. | |
| """ | |
| model_class = _get_source_manifest() | |
| return model_class.model_json_schema() # type: ignore[attr-defined, no-any-return] | |
| def to_dict(self) -> dict[str, Any]: | |
| """Convert the manifest to a JSON-serializable dictionary. | |
| This method is a proxy to the underlying Pydantic model's to_dict(). | |
| It produces a dictionary suitable for JSON serialization with | |
| datetime fields converted to ISO 8601 format strings. | |
| Returns: | |
| ------- | |
| dict[str, Any]: Dictionary with all fields serialized. | |
| Note: | |
| ---- | |
| This is a stub method. The actual implementation is on | |
| the dynamically created Pydantic model class. | |
| """ | |
| # This should never be called directly on the proxy class | |
| # Instances are actually the underlying Pydantic model | |
| msg = "Call to_dict() on actual instance" | |
| raise NotImplementedError(msg) # pragma: no cover | |
| def total_source_size_bytes(self) -> int: | |
| """Calculate the total size of all source files. | |
| Returns | |
| ------- | |
| Total size in bytes. | |
| """ | |
| # This is a stub - actual implementation on the model | |
| raise NotImplementedError # pragma: no cover | |
| def source_file_count(self) -> int: | |
| """Get the number of source files. | |
| Returns | |
| ------- | |
| Number of source files. | |
| """ | |
| # This is a stub - actual implementation on the model | |
| raise NotImplementedError # pragma: no cover | |