Spaces:
Sleeping
Sleeping
Commit
·
de59cd4
1
Parent(s):
6f64e41
Decrease Img number
Browse files
app.py
CHANGED
|
@@ -26,9 +26,9 @@ import groundingdino.datasets.transforms as T
|
|
| 26 |
|
| 27 |
from huggingface_hub import hf_hub_download
|
| 28 |
|
| 29 |
-
picture_height =
|
| 30 |
-
picture_width =
|
| 31 |
-
picture_fov =
|
| 32 |
|
| 33 |
# Use this command for evaluate the GLIP-T model
|
| 34 |
config_file = "groundingdino/config/GroundingDINO_SwinT_OGC.py"
|
|
@@ -98,21 +98,18 @@ model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
|
|
| 98 |
def run_grounding(input_image):
|
| 99 |
pil_img = Image.fromarray(input_image)
|
| 100 |
init_image = pil_img.convert("RGB")
|
| 101 |
-
|
| 102 |
-
grounding_caption = "traffic sign"
|
| 103 |
box_threshold = 0.25
|
| 104 |
text_threshold = 0.25
|
| 105 |
|
| 106 |
_, image_tensor = image_transform_grounding(init_image)
|
| 107 |
image_pil: Image = image_transform_grounding_for_vis(init_image)
|
| 108 |
|
| 109 |
-
# run grounidng
|
| 110 |
boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold,
|
| 111 |
device='cpu')
|
| 112 |
annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
return annotated_frame
|
| 116 |
|
| 117 |
|
| 118 |
if __name__ == "__main__":
|
|
@@ -128,7 +125,7 @@ if __name__ == "__main__":
|
|
| 128 |
with gr.Row():
|
| 129 |
with gr.Column():
|
| 130 |
gallery = gr.Gallery(label="Detection Results").style(
|
| 131 |
-
|
| 132 |
|
| 133 |
run_button.click(fn=detection, inputs=[
|
| 134 |
input_image], outputs=[gallery])
|
|
|
|
| 26 |
|
| 27 |
from huggingface_hub import hf_hub_download
|
| 28 |
|
| 29 |
+
picture_height = 360
|
| 30 |
+
picture_width = 540
|
| 31 |
+
picture_fov = 70
|
| 32 |
|
| 33 |
# Use this command for evaluate the GLIP-T model
|
| 34 |
config_file = "groundingdino/config/GroundingDINO_SwinT_OGC.py"
|
|
|
|
| 98 |
def run_grounding(input_image):
|
| 99 |
pil_img = Image.fromarray(input_image)
|
| 100 |
init_image = pil_img.convert("RGB")
|
| 101 |
+
grounding_caption = "traffic sign, car"
|
|
|
|
| 102 |
box_threshold = 0.25
|
| 103 |
text_threshold = 0.25
|
| 104 |
|
| 105 |
_, image_tensor = image_transform_grounding(init_image)
|
| 106 |
image_pil: Image = image_transform_grounding_for_vis(init_image)
|
| 107 |
|
|
|
|
| 108 |
boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold,
|
| 109 |
device='cpu')
|
| 110 |
annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
|
| 111 |
+
image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
|
| 112 |
+
return image_with_box
|
|
|
|
| 113 |
|
| 114 |
|
| 115 |
if __name__ == "__main__":
|
|
|
|
| 125 |
with gr.Row():
|
| 126 |
with gr.Column():
|
| 127 |
gallery = gr.Gallery(label="Detection Results").style(
|
| 128 |
+
grid=(1,4), preview=True, object_fit="none")
|
| 129 |
|
| 130 |
run_button.click(fn=detection, inputs=[
|
| 131 |
input_image], outputs=[gallery])
|