File size: 2,208 Bytes

import base64
import io
import json
from typing import Dict, Any
from PIL import Image
from transformers import pipeline

class EndpointHandler:
    """
    Custom handler for the ZoeDepth model, fully compliant with the latest
    Hugging Face Inference Endpoints documentation.
    The final result is serialized into a single JSON string.
    """
    def __init__(self, path=""):
        # Initialize the pipeline for depth-estimation
        self.pipe = pipeline(task="depth-estimation", model=path)
        print("Depth estimation pipeline initialized successfully.")

    def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
        """
        This method is called for every API request.
        
        Args:
            data (Dict): The input data dictionary. Can be PIL Image or bytes.
        
        Returns:
            Dict[str, str]: A dictionary with a single key "generated_text", 
                            containing a JSON string of the results.
        """
        # Get image from the request
        inputs = data.pop("inputs", data)
        
        # Handle both PIL Image objects (from image content-type) and bytes (from JSON)
        if isinstance(inputs, Image.Image):
            image = inputs
        else:
            image = Image.open(io.BytesIO(inputs))

        # Pass the image to the pipeline
        prediction = self.pipe(image)
        
        # Extract raw depth data and visual map
        raw_depth_tensor = prediction["predicted_depth"]
        raw_depth_data = raw_depth_tensor.cpu().tolist()

        visual_map_image = prediction["depth"]
        buffered = io.BytesIO()
        visual_map_image.save(buffered, format="PNG")
        visual_map_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")

        # Create a dictionary to hold all results
        results = {
            "raw_depth_data": raw_depth_data,
            "visual_depth_map": f"data:image/png;base64,{visual_map_base64}"
        }

        # Serialize the entire results dictionary into a JSON string
        json_output_string = json.dumps(results)

        # Return the final dictionary in the required format
        return {"generated_text": json_output_string}