import gradio as gr
import yolov5
import os
import gradio as gr
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
import torch
import numpy as np
from PIL import Image

torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/000000039769.jpg', 'cats.jpg')

feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
model1 = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")

def process_image(image):
    # prepare image for the model
    encoding = feature_extractor(image, return_tensors="pt")
    
    # forward pass
    with torch.no_grad():
       outputs = model1(**encoding)
       predicted_depth = outputs.predicted_depth
    
    # interpolate to original size
    prediction = torch.nn.functional.interpolate(
                        predicted_depth.unsqueeze(1),
                        size=image.size[::-1],
                        mode="bicubic",
                        align_corners=False,
                 ).squeeze()
    output = prediction.cpu().numpy()
    formatted = (output * 255 / np.max(output)).astype('uint8')
    img = Image.fromarray(formatted)
    return img
    
# .......................................................

model = yolov5.load('./best.pt', device="cpu")

def predict(image):
    results = model([image], size=640)
    results1= process_image(image)

    width, height = 640, 640
    results_image = Image.fromarray(results.render()[0]).resize((width, height))
    results1_resized = results1.resize((width, height))

    # return results.render()[0], results1
    return results_image, results1_resized


title = "Detecting objects for elderly and blind"
description = """
  Try the examples at bottom to get started.
"""
examples = [
    [os.path.join(os.path.abspath(''), './Optional1.jpeg')],
    [os.path.join(os.path.abspath(''), './option2.jpeg')],
    [os.path.join(os.path.abspath(''), './option3.jpeg')],
    [os.path.join(os.path.abspath(''), './option4.jpeg')],
    
]

inputs = gr.Image(type="pil", shape=(640, 640),
                  label="Upload your image for detection")

outputs = [
    gr.Image(type="pil", shape=(640, 640), label="Object Detections"),
    gr.Image(type="pil", shape=(640, 640), label="Processed Image")  
]

interface = gr.Interface(
    fn=predict,
    inputs=inputs,
    outputs=outputs,
    examples= examples,
    title=title,
    description=description,
    cache_examples=True,
    theme='huggingface'
)
interface.launch(debug=True, enable_queue=True)