Spaces:
Running
Running
File size: 1,725 Bytes
23680f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
"""EmbedAnything embedding provider - default lightweight provider."""
from __future__ import annotations
from typing import Any
import numpy as np
from hyperview.core.sample import Sample
from hyperview.embeddings.providers import (
BaseEmbeddingProvider,
ModelSpec,
register_provider,
)
__all__ = ["EmbedAnythingProvider"]
class EmbedAnythingProvider(BaseEmbeddingProvider):
"""Default embedding provider using EmbedAnything.
Supports HuggingFace vision models via EmbedAnything's inference engine.
Model is cached per model_id to avoid repeated initialization.
"""
def __init__(self) -> None:
self._computers: dict[str, Any] = {} # model_id -> EmbeddingComputer
@property
def provider_id(self) -> str:
return "embed_anything"
def _get_computer(self, model_id: str) -> Any:
"""Get or create an EmbeddingComputer for the given model_id."""
if model_id not in self._computers:
from hyperview.embeddings.compute import EmbeddingComputer
self._computers[model_id] = EmbeddingComputer(model=model_id)
return self._computers[model_id]
def compute_embeddings(
self,
samples: list[Sample],
model_spec: ModelSpec,
batch_size: int = 32,
show_progress: bool = True,
) -> np.ndarray:
"""Compute embeddings using EmbedAnything."""
computer = self._get_computer(model_spec.model_id)
embeddings = computer.compute_batch(
samples, batch_size=batch_size, show_progress=show_progress
)
return np.array(embeddings, dtype=np.float32)
# Auto-register on import
register_provider("embed_anything", EmbedAnythingProvider)
|