Spaces:
Runtime error
Runtime error
Upload 5 files
Browse files- app.py +15 -0
- depth_estimation.py +48 -0
- detection.py +50 -0
- detection_utils.py +69 -0
- requirements.txt +8 -0
app.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from detection import detection_with_markdown
|
| 3 |
+
from depth_estimation import depth_estimation_with_markdown
|
| 4 |
+
|
| 5 |
+
# Combine both the app
|
| 6 |
+
demo = gr.Blocks()
|
| 7 |
+
with demo:
|
| 8 |
+
gr.TabbedInterface(
|
| 9 |
+
[detection_with_markdown, depth_estimation_with_markdown],
|
| 10 |
+
['Object Detection', 'Depth Estimation']
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
if __name__ == "__main__":
|
| 15 |
+
demo.launch()
|
depth_estimation.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import numpy as np
|
| 3 |
+
import gradio as gr
|
| 4 |
+
from transformers import pipeline
|
| 5 |
+
from PIL import Image
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
depth_estimator = pipeline(task = 'depth-estimation',
|
| 9 |
+
model = 'Intel/dpt-hybrid-midas')
|
| 10 |
+
|
| 11 |
+
def launch(input_image):
|
| 12 |
+
out = depth_estimator(input_image)
|
| 13 |
+
|
| 14 |
+
# resize the prediction
|
| 15 |
+
prediction = torch.nn.functional.interpolate(
|
| 16 |
+
out["predicted_depth"].unsqueeze(1),
|
| 17 |
+
size=input_image.size[::-1],
|
| 18 |
+
mode="bicubic",
|
| 19 |
+
align_corners=False,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# normalize the prediction
|
| 23 |
+
output = prediction.squeeze().numpy()
|
| 24 |
+
formatted = (output * 255 / np.max(output)).astype("uint8")
|
| 25 |
+
depth = Image.fromarray(formatted)
|
| 26 |
+
return depth
|
| 27 |
+
|
| 28 |
+
depth_interface = gr.Interface(launch,
|
| 29 |
+
inputs=gr.Image(type='pil', label = "Input Image"),
|
| 30 |
+
outputs=gr.Image(type='pil', label = "Depth Estimation"),
|
| 31 |
+
allow_flagging = 'never')
|
| 32 |
+
|
| 33 |
+
# Add Markdown content
|
| 34 |
+
markdown_content_depth_estimation = gr.Markdown(
|
| 35 |
+
"""
|
| 36 |
+
<div style='text-align: center; font-family: "Times New Roman";'>
|
| 37 |
+
<h1 style='color: #FF6347;'>Image Depth Estimation</h1>
|
| 38 |
+
<h3 style='color: #4682B4;'>Model: Intel/dpt-hybrid-midas</h3>
|
| 39 |
+
<h3 style='color: #32CD32;'>Made By: Md. Mahmudun Nabi</h3>
|
| 40 |
+
</div>
|
| 41 |
+
"""
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# Combine the Markdown content and the demo interface
|
| 45 |
+
depth_estimation_with_markdown = gr.Blocks()
|
| 46 |
+
with depth_estimation_with_markdown:
|
| 47 |
+
markdown_content_depth_estimation.render()
|
| 48 |
+
depth_interface.render()
|
detection.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from transformers import pipeline
|
| 3 |
+
|
| 4 |
+
from detection_utils import render_results_in_image, summarize_predictions_natural_language
|
| 5 |
+
|
| 6 |
+
obj_detector = pipeline(
|
| 7 |
+
task = "object-detection",
|
| 8 |
+
model = "facebook/detr-resnet-50"
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
def get_pipeline_prediction(pil_image):
|
| 12 |
+
pipeline_output = obj_detector(pil_image)
|
| 13 |
+
processed_image = render_results_in_image(
|
| 14 |
+
pil_image,
|
| 15 |
+
pipeline_output
|
| 16 |
+
)
|
| 17 |
+
detection_summary = summarize_predictions_natural_language(pipeline_output)
|
| 18 |
+
return processed_image, detection_summary
|
| 19 |
+
|
| 20 |
+
detection_interface = gr.Interface(
|
| 21 |
+
fn = get_pipeline_prediction,
|
| 22 |
+
inputs = gr.Image(
|
| 23 |
+
label = "Input Image",
|
| 24 |
+
type = 'pil'
|
| 25 |
+
),
|
| 26 |
+
outputs = [gr.Image(
|
| 27 |
+
label = "Output image with predicted instances",
|
| 28 |
+
type = 'pil'
|
| 29 |
+
),
|
| 30 |
+
gr.Textbox(label="Detection Summary")],
|
| 31 |
+
allow_flagging = 'never'
|
| 32 |
+
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# Add Markdown content
|
| 36 |
+
markdown_content_detection = gr.Markdown(
|
| 37 |
+
"""
|
| 38 |
+
<div style='text-align: center; font-family: "Times New Roman";'>
|
| 39 |
+
<h1 style='color: #FF6347;'>Object Detection with Summary</h1>
|
| 40 |
+
<h3 style='color: #4682B4;'>Model: facebook/detr-resnet-50</h3>
|
| 41 |
+
<h3 style='color: #32CD32;'>Made By: Md. Mahmudun Nabi</h3>
|
| 42 |
+
</div>
|
| 43 |
+
"""
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
# Combine the Markdown content and the demo interface
|
| 47 |
+
detection_with_markdown = gr.Blocks()
|
| 48 |
+
with detection_with_markdown:
|
| 49 |
+
markdown_content_detection.render()
|
| 50 |
+
detection_interface.render()
|
detection_utils.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import matplotlib.pyplot as plt
|
| 3 |
+
import requests
|
| 4 |
+
import inflect
|
| 5 |
+
from PIL import Image
|
| 6 |
+
|
| 7 |
+
def load_image_from_url(url):
|
| 8 |
+
return Image.open(requests.get(url, stream = True).raw)
|
| 9 |
+
|
| 10 |
+
def render_results_in_image(in_pil_img, in_results):
|
| 11 |
+
plt.figure(figsize=(16,10))
|
| 12 |
+
plt.imshow(in_pil_img)
|
| 13 |
+
|
| 14 |
+
ax = plt.gca()
|
| 15 |
+
|
| 16 |
+
for prediction in in_results:
|
| 17 |
+
x, y = prediction['box']['xmin'], prediction['box']['ymin']
|
| 18 |
+
w = prediction['box']['xmax'] - prediction['box']['xmin']
|
| 19 |
+
h = prediction['box']['ymax'] - prediction['box']['ymin']
|
| 20 |
+
|
| 21 |
+
ax.add_patch(plt.Rectangle(
|
| 22 |
+
(x,y),
|
| 23 |
+
w,
|
| 24 |
+
h,
|
| 25 |
+
fill = False,
|
| 26 |
+
color = 'green',
|
| 27 |
+
linewidth = 2
|
| 28 |
+
))
|
| 29 |
+
|
| 30 |
+
ax.text(
|
| 31 |
+
x,
|
| 32 |
+
y,
|
| 33 |
+
f"{prediction['label']}: {round(prediction['score']*100,1)}%",
|
| 34 |
+
color = 'red'
|
| 35 |
+
)
|
| 36 |
+
plt.axis('off')
|
| 37 |
+
|
| 38 |
+
img_buf = io.BytesIO()
|
| 39 |
+
plt.savefig(img_buf, format = 'png', bbox_inches = 'tight', pad_inches = 0)
|
| 40 |
+
img_buf.seek(0)
|
| 41 |
+
modified_image = Image.open(img_buf)
|
| 42 |
+
plt.close()
|
| 43 |
+
return modified_image
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def summarize_predictions_natural_language(predictions):
|
| 47 |
+
summary = {}
|
| 48 |
+
p = inflect.engine()
|
| 49 |
+
|
| 50 |
+
for prediction in predictions:
|
| 51 |
+
label = prediction['label']
|
| 52 |
+
if label in summary:
|
| 53 |
+
summary[label] += 1
|
| 54 |
+
else:
|
| 55 |
+
summary[label] = 1
|
| 56 |
+
|
| 57 |
+
result_string = "In this image, there are "
|
| 58 |
+
for i, (label, count) in enumerate(summary.items()):
|
| 59 |
+
count_string = p.number_to_words(count)
|
| 60 |
+
result_string += f"{count_string} {label}"
|
| 61 |
+
|
| 62 |
+
if count > 1:
|
| 63 |
+
result_string += 's'
|
| 64 |
+
result_string += " "
|
| 65 |
+
|
| 66 |
+
if i == len(summary) - 2:
|
| 67 |
+
result_string += "and "
|
| 68 |
+
result_string = result_string.rstrip(', ') + "."
|
| 69 |
+
return result_string
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
matplotlib
|
| 3 |
+
request
|
| 4 |
+
inflect
|
| 5 |
+
PIL
|
| 6 |
+
transformers
|
| 7 |
+
numpy
|
| 8 |
+
torch
|