Spaces:
Running
Running
| """Abstract storage backend interface for HyperView.""" | |
| from abc import ABC, abstractmethod | |
| from collections.abc import Callable, Iterator | |
| from typing import Any | |
| import numpy as np | |
| from hyperview.core.sample import Sample | |
| class StorageBackend(ABC): | |
| """Abstract base class for storage backends.""" | |
| def add_sample(self, sample: Sample) -> None: | |
| """Add a single sample (idempotent upsert).""" | |
| def add_samples_batch(self, samples: list[Sample]) -> None: | |
| """Add multiple samples (idempotent upsert).""" | |
| def get_sample(self, sample_id: str) -> Sample | None: | |
| """Retrieve a sample by ID.""" | |
| def get_samples_paginated( | |
| self, | |
| offset: int = 0, | |
| limit: int = 100, | |
| label: str | None = None, | |
| ) -> tuple[list[Sample], int]: | |
| """Get paginated samples. Returns (samples, total_count).""" | |
| def get_all_samples(self) -> list[Sample]: | |
| """Get all samples.""" | |
| def update_sample(self, sample: Sample) -> None: | |
| """Update an existing sample.""" | |
| def update_samples_batch(self, samples: list[Sample]) -> None: | |
| """Batch update samples.""" | |
| def delete_sample(self, sample_id: str) -> bool: | |
| """Delete a sample by ID.""" | |
| def __len__(self) -> int: | |
| """Return total number of samples.""" | |
| def __iter__(self) -> Iterator[Sample]: | |
| """Iterate over all samples.""" | |
| def __contains__(self, sample_id: str) -> bool: | |
| """Check if sample exists.""" | |
| def get_unique_labels(self) -> list[str]: | |
| """Get all unique labels.""" | |
| def get_existing_ids(self, sample_ids: list[str]) -> set[str]: | |
| """Return set of sample_ids that already exist in storage.""" | |
| def get_samples_by_ids(self, sample_ids: list[str]) -> list[Sample]: | |
| """Retrieve multiple samples by ID.""" | |
| def get_labels_by_ids(self, sample_ids: list[str]) -> dict[str, str | None]: | |
| """Get labels for sample IDs. Missing IDs not included in result.""" | |
| def filter(self, predicate: Callable[[Sample], bool]) -> list[Sample]: | |
| """Filter samples based on a predicate function.""" | |
| def list_spaces(self) -> list[Any]: | |
| """List all embedding spaces.""" | |
| def get_space(self, space_key: str) -> Any | None: | |
| """Get info for a specific embedding space.""" | |
| def ensure_space( | |
| self, | |
| model_id: str, | |
| dim: int, | |
| config: dict | None = None, | |
| space_key: str | None = None, | |
| ) -> Any: | |
| """Ensure an embedding space exists, creating if needed. | |
| Args: | |
| model_id: Model identifier for this space. | |
| dim: Vector dimension. | |
| config: Optional config dict for SpaceInfo.config_json. | |
| space_key: Optional explicit space key. If None, derived from model_id. | |
| """ | |
| def delete_space(self, space_key: str) -> bool: | |
| """Delete an embedding space and its embeddings.""" | |
| def add_embeddings(self, space_key: str, ids: list[str], vectors: np.ndarray) -> None: | |
| """Add embeddings to a space.""" | |
| def get_embeddings(self, space_key: str, ids: list[str] | None = None) -> tuple[list[str], np.ndarray]: | |
| """Get embeddings from a space. Returns (ids, vectors).""" | |
| def get_embedded_ids(self, space_key: str) -> set[str]: | |
| """Get sample IDs that have embeddings in a space.""" | |
| def get_missing_embedding_ids(self, space_key: str) -> list[str]: | |
| """Get sample IDs without embeddings in a space.""" | |
| def list_layouts(self) -> list[Any]: | |
| """List all layouts.""" | |
| def get_layout(self, layout_key: str) -> Any | None: | |
| """Get layout info.""" | |
| def ensure_layout( | |
| self, | |
| layout_key: str, | |
| space_key: str, | |
| method: str, | |
| geometry: str, | |
| params: dict | None = None, | |
| ) -> Any: | |
| """Ensure a layout exists.""" | |
| def delete_layout(self, layout_key: str) -> bool: | |
| """Delete a layout.""" | |
| def add_layout_coords(self, layout_key: str, ids: list[str], coords: np.ndarray) -> None: | |
| """Add layout coordinates (N x 2).""" | |
| def get_layout_coords( | |
| self, | |
| layout_key: str, | |
| ids: list[str] | None = None, | |
| ) -> tuple[list[str], np.ndarray]: | |
| """Get layout coordinates. Returns (ids, coords).""" | |
| def get_lasso_candidates_aabb( | |
| self, | |
| *, | |
| layout_key: str, | |
| x_min: float, | |
| x_max: float, | |
| y_min: float, | |
| y_max: float, | |
| ) -> tuple[list[str], np.ndarray]: | |
| """Return candidate (id, xy) rows within an axis-aligned bounding box.""" | |
| def find_similar( | |
| self, | |
| sample_id: str, | |
| k: int = 10, | |
| space_key: str | None = None, | |
| ) -> list[tuple[Sample, float]]: | |
| """Find k nearest neighbors.""" | |
| def find_similar_by_vector( | |
| self, | |
| vector: list[float] | np.ndarray, | |
| k: int = 10, | |
| space_key: str | None = None, | |
| ) -> list[tuple[Sample, float]]: | |
| """Find k nearest neighbors to a query vector.""" | |
| def close(self) -> None: | |
| """Close the storage connection.""" | |