dlaima's picture
Update app.py
c7839f4 verified
import os
import gradio as gr
from transformers import pipeline
import torch
import numpy as np
from PIL import Image
# Load the depth estimation model or segmentation model
depth_estimator = pipeline(task="depth-estimation", model="Intel/dpt-hybrid-midas")
def launch(input_image):
try:
# Resize the input image to a fixed size (e.g., 640x480)
input_image = input_image.resize((640, 480))
# Ensure the input image is in RGB mode
if input_image.mode != "RGB":
input_image = input_image.convert("RGB")
# Run the image segmentation model (or depth estimation)
out = depth_estimator(input_image)
# Assuming the output contains the predicted depth or segmentation mask
predicted_output = out["predicted_depth"] if "predicted_depth" in out else out["segmentation_mask"]
# Resize the output to match the input image size (H, W)
predicted_output_resized = torch.nn.functional.interpolate(
predicted_output.unsqueeze(0), # Add batch dimension
size=input_image.size[::-1], # Match input image size (H, W)
mode="bicubic",
align_corners=False
)
# Normalize the prediction for display
output = predicted_output_resized.squeeze().cpu().numpy()
formatted = (output * 255 / np.max(output)).astype("uint8")
# Convert the depth map or segmentation mask to an image
output_image = Image.fromarray(formatted)
return output_image
except Exception as e:
print(f"Error processing the image: {str(e)}")
return "An error occurred while processing the image."
# Define the Gradio interface without 'image_size' argument
iface = gr.Interface(
fn=launch,
inputs=gr.Image(type="pil"),
outputs=gr.Image(type="pil") # Remove image_size argument
)
# Launch the interface
iface.launch()