Spaces:

mahmudunnabi
/

Object_Detection_and_Depth_Estimation_App

Runtime error

App Files Files Community

mahmudunnabi commited on Jul 14, 2024

Commit

4716bb5

verified ·

1 Parent(s): 26b2df0

Upload 5 files

Browse files

Files changed (5) hide show

app.py +15 -0
depth_estimation.py +48 -0
detection.py +50 -0
detection_utils.py +69 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import gradio as gr
+from detection import detection_with_markdown
+from depth_estimation import depth_estimation_with_markdown
+# Combine both the app
+demo = gr.Blocks()
+with demo:
+    gr.TabbedInterface(
+        [detection_with_markdown, depth_estimation_with_markdown],
+        ['Object Detection', 'Depth Estimation']
+    )
+if __name__ == "__main__":
+    demo.launch()

depth_estimation.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import torch
+import numpy as np
+import gradio as gr
+from transformers import pipeline
+from PIL import Image
+depth_estimator = pipeline(task = 'depth-estimation',
+                           model = 'Intel/dpt-hybrid-midas')
+def launch(input_image):
+    out = depth_estimator(input_image)
+    # resize the prediction
+    prediction = torch.nn.functional.interpolate(
+        out["predicted_depth"].unsqueeze(1),
+        size=input_image.size[::-1],
+        mode="bicubic",
+        align_corners=False,
+    )
+    # normalize the prediction
+    output = prediction.squeeze().numpy()
+    formatted = (output * 255 / np.max(output)).astype("uint8")
+    depth = Image.fromarray(formatted)
+    return depth
+depth_interface = gr.Interface(launch,
+                     inputs=gr.Image(type='pil', label = "Input Image"),
+                     outputs=gr.Image(type='pil', label = "Depth Estimation"),
+                              allow_flagging = 'never')
+# Add Markdown content
+markdown_content_depth_estimation = gr.Markdown(
+    """
+    <div style='text-align: center; font-family: "Times New Roman";'>
+        <h1 style='color: #FF6347;'>Image Depth Estimation</h1>
+        <h3 style='color: #4682B4;'>Model: Intel/dpt-hybrid-midas</h3>
+        <h3 style='color: #32CD32;'>Made By: Md. Mahmudun Nabi</h3>
+    </div>
+    """
+)
+# Combine the Markdown content and the demo interface
+depth_estimation_with_markdown = gr.Blocks()
+with depth_estimation_with_markdown:
+    markdown_content_depth_estimation.render()
+    depth_interface.render()

detection.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import gradio as gr
+from transformers import pipeline
+from detection_utils import render_results_in_image, summarize_predictions_natural_language
+obj_detector = pipeline(
+    task = "object-detection",
+    model = "facebook/detr-resnet-50"
+)
+def get_pipeline_prediction(pil_image):
+    pipeline_output = obj_detector(pil_image)
+    processed_image = render_results_in_image(
+        pil_image,
+        pipeline_output
+    )
+    detection_summary = summarize_predictions_natural_language(pipeline_output)
+    return processed_image, detection_summary
+detection_interface = gr.Interface(
+    fn = get_pipeline_prediction,
+    inputs = gr.Image(
+        label = "Input Image",
+        type = 'pil'
+    ),
+    outputs = [gr.Image(
+    label = "Output image with predicted instances",
+    type = 'pil'
+    ),
+    gr.Textbox(label="Detection Summary")],
+    allow_flagging = 'never'
+)
+# Add Markdown content
+markdown_content_detection = gr.Markdown(
+    """
+    <div style='text-align: center; font-family: "Times New Roman";'>
+        <h1 style='color: #FF6347;'>Object Detection with Summary</h1>
+        <h3 style='color: #4682B4;'>Model: facebook/detr-resnet-50</h3>
+        <h3 style='color: #32CD32;'>Made By: Md. Mahmudun Nabi</h3>
+    </div>
+    """
+)
+# Combine the Markdown content and the demo interface
+detection_with_markdown = gr.Blocks()
+with detection_with_markdown:
+    markdown_content_detection.render()
+    detection_interface.render()

detection_utils.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import io
+import matplotlib.pyplot as plt
+import requests
+import inflect
+from PIL import Image
+def load_image_from_url(url):
+    return Image.open(requests.get(url, stream = True).raw)
+def render_results_in_image(in_pil_img, in_results):
+    plt.figure(figsize=(16,10))
+    plt.imshow(in_pil_img)
+    ax = plt.gca()
+    for prediction in in_results:
+        x, y = prediction['box']['xmin'], prediction['box']['ymin']
+        w = prediction['box']['xmax'] - prediction['box']['xmin']
+        h = prediction['box']['ymax'] - prediction['box']['ymin']
+        ax.add_patch(plt.Rectangle(
+            (x,y),
+            w,
+            h,
+            fill = False,
+            color = 'green',
+            linewidth = 2
+        ))
+        ax.text(
+            x,
+            y,
+            f"{prediction['label']}: {round(prediction['score']*100,1)}%",
+            color = 'red'
+        )
+    plt.axis('off')
+    img_buf = io.BytesIO()
+    plt.savefig(img_buf, format = 'png', bbox_inches = 'tight', pad_inches = 0)
+    img_buf.seek(0)
+    modified_image = Image.open(img_buf)
+    plt.close()
+    return modified_image
+def summarize_predictions_natural_language(predictions):
+    summary = {}
+    p = inflect.engine()
+    for prediction in predictions:
+        label = prediction['label']
+        if label in summary:
+            summary[label] += 1
+        else:
+            summary[label] = 1
+    result_string = "In this image, there are "
+    for i, (label, count) in enumerate(summary.items()):
+        count_string = p.number_to_words(count)
+        result_string += f"{count_string} {label}"
+        if count > 1:
+            result_string += 's'
+        result_string += " "
+        if i == len(summary) - 2:
+            result_string += "and "
+    result_string = result_string.rstrip(', ') + "."
+    return result_string

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio
+matplotlib
+request
+inflect
+PIL
+transformers
+numpy
+torch