File size: 3,659 Bytes
23680f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""Sample class representing a single data point in a dataset."""

import base64
import io
from pathlib import Path
from typing import Any

from PIL import Image
from pydantic import BaseModel, Field


class Sample(BaseModel):
    """A single sample in a HyperView dataset.

    Samples are pure metadata containers. Embeddings and layouts are stored
    separately in dedicated tables (per embedding space / per layout).
    """

    id: str = Field(..., description="Unique identifier for the sample")
    filepath: str = Field(..., description="Path to the image file")
    label: str | None = Field(default=None, description="Label for the sample")
    metadata: dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
    thumbnail_base64: str | None = Field(default=None, description="Cached thumbnail as base64")
    width: int | None = Field(default=None, description="Image width in pixels")
    height: int | None = Field(default=None, description="Image height in pixels")

    model_config = {"arbitrary_types_allowed": True}

    @property
    def filename(self) -> str:
        """Get the filename from the filepath."""
        return Path(self.filepath).name

    def load_image(self) -> Image.Image:
        """Load the image from disk."""
        return Image.open(self.filepath)

    def get_thumbnail(self, size: tuple[int, int] = (128, 128)) -> Image.Image:
        """Get a thumbnail of the image. Also captures original dimensions."""
        img = self.load_image()
        # Capture original dimensions while we have the image loaded
        if self.width is None or self.height is None:
            self.width, self.height = img.size
        img.thumbnail(size, Image.Resampling.LANCZOS)
        return img

    def _encode_thumbnail(self, size: tuple[int, int] = (128, 128)) -> str:
        """Encode thumbnail as base64 JPEG."""
        thumb = self.get_thumbnail(size)
        if thumb.mode in ("RGBA", "P"):
            thumb = thumb.convert("RGB")
        buffer = io.BytesIO()
        thumb.save(buffer, format="JPEG", quality=85)
        return base64.b64encode(buffer.getvalue()).decode("utf-8")

    def get_thumbnail_base64(self, size: tuple[int, int] = (128, 128)) -> str:
        """Get thumbnail as base64 encoded string."""
        return self.thumbnail_base64 or self._encode_thumbnail(size)

    def cache_thumbnail(self, size: tuple[int, int] = (128, 128)) -> None:
        """Cache the thumbnail as base64 for persistence."""
        if self.thumbnail_base64 is None:
            self.thumbnail_base64 = self._encode_thumbnail(size)

    def to_api_dict(self, include_thumbnail: bool = True) -> dict[str, Any]:
        """Convert to dictionary for API response."""
        # Ensure dimensions are populated (loads image if needed but not cached)
        if self.width is None or self.height is None:
            self.ensure_dimensions()

        data = {
            "id": self.id,
            "filepath": self.filepath,
            "filename": self.filename,
            "label": self.label,
            "metadata": self.metadata,
            "width": self.width,
            "height": self.height,
        }
        if include_thumbnail:
            data["thumbnail"] = self.get_thumbnail_base64()
        return data

    def ensure_dimensions(self) -> None:
        """Load image dimensions if not already set."""
        if self.width is None or self.height is None:
            try:
                img = self.load_image()
                self.width, self.height = img.size
            except Exception:
                # If image can't be loaded, leave as None
                pass