File size: 7,103 Bytes
8e851a8
 
 
dcb538a
8e851a8
1ac44f6
8e851a8
dcb538a
8e851a8
 
 
 
 
 
 
 
 
 
 
 
 
68d1028
8e851a8
62ff382
68d1028
8e851a8
 
66b3d7b
8e851a8
 
64b01d4
3d4b1ac
8e851a8
 
2481812
1402b62
 
8e851a8
26efb89
 
 
731fbc6
 
26efb89
731fbc6
 
 
 
 
 
 
26efb89
 
 
 
 
 
 
 
 
 
370adbe
26efb89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ac44f6
8e851a8
7d342be
 
 
 
d44f300
8e851a8
 
 
 
 
c7e2f5d
6d2fbd1
c7e2f5d
e2afd26
8e851a8
 
 
 
0ae9e18
8e851a8
e2afd26
8e851a8
 
 
 
1ac44f6
4643a08
26efb89
4643a08
 
 
 
 
 
 
 
 
 
 
 
 
10f2d1a
4643a08
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
649e06f
62ff382
 
8e851a8
731fbc6
 
 
8e1b8c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ce3f83
8e1b8c4
 
 
 
 
99d4997
4efca65
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
try:
    import detectron2
except:
    import os
    os.system('pip install git+https://github.com/facebookresearch/detectron2.git')
import spaces
import cv2
import torch
from matplotlib.pyplot import axis
import gradio as gr
import requests
import numpy as np
from torch import nn
import requests

import torch

from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.utils.video_visualizer import VideoVisualizer
from detectron2.data import MetadataCatalog
from detectron2.utils.visualizer import ColorMode
import tqdm


model_path = "https://huggingface.co/asalhi85/Smartathon-Detectron2/resolve/9f4d573340b033e651d4937906f23850f9b6bc57/phase2_detectron_model.pth"

cfg = get_cfg()
cfg.merge_from_file("./faster_rcnn_X_101_32x8d_FPN_3x.yaml")
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 11
cfg.MODEL.WEIGHTS = model_path

my_metadata = MetadataCatalog.get("dbmdz_coco_all")
my_metadata.thing_classes = ["None", "BAD_BILLBOARD","BROKEN_SIGNAGE","CLUTTER_SIDEWALK","CONSTRUCTION_ROAD","FADED_SIGNAGE","GARBAGE","GRAFFITI","POTHOLES","SAND_ON_ROAD","UNKEPT_FACADE"]





    
def predict_frame(frame,_):
    
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3
    predictor = DefaultPredictor(cfg)
    outputs = predictor(frame)
    v = Visualizer(frame[:,:,::-1], my_metadata, scale=1.2, instance_mode=ColorMode.IMAGE )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    return out.get_image()

@spaces.GPU
def opt_process_vid(video_path):
   cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3
   cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
   predictor = DefaultPredictor(cfg)
   v = VideoVisualizer(my_metadata,ColorMode.IMAGE)
   cap = cv2.VideoCapture(video_path)
   frame_size = (int(cap.get(3)), int(cap.get(4)))
   fps = int(cap.get(5))
   vid_fourcc= int(cap.get(cv2.CAP_PROP_FOURCC))
   output_path = './output.mp4'
   fourcc = cv2.VideoWriter_fourcc(*'mp4v')
   video_writer = cv2.VideoWriter(output_path,fourcc, fps, frame_size)
   num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
   skipped_viz = 5
   skipped_outputs= 15
   # Process the video
   for i in tqdm.tqdm(range(num_frames)):
      ret, frame = cap.read()
      if not ret:
          break
      if i % skipped_outputs == 0:
          # Get prediction results for this frame
          outputs = predictor(frame)
      if i % skipped_viz == 0:
        # Draw a visualization of the predictions
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        visualization = v.draw_instance_predictions(frame, outputs["instances"].to("cpu"))
        visualization = cv2.cvtColor(visualization.get_image(), cv2.COLOR_RGB2BGR)
      video_writer.write(visualization)
  # Release resources
   cap.release()
   video_writer.release()
   torch.cuda.empty_cache()

   return output_path
    

@spaces.GPU
def inference(image_url, image, min_score):
  if not torch.cuda.is_available():
    cfg.MODEL.DEVICE = "cpu"
  else:
    cfg.MODEL.DEVICE = "cuda"
    print( cfg.MODEL.DEVICE)
    if image_url:
        r = requests.get(image_url)
        if r:
            im = np.frombuffer(r.content, dtype="uint8")
            im = cv2.imdecode(im, cv2.IMREAD_COLOR)
    else:
        im = cv2.imread(image)
        # Model expect BGR!
        #im = image[:,:,::-1]

    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = min_score
    predictor = DefaultPredictor(cfg)

    outputs = predictor(im)

    v = Visualizer(im[:,:,::-1], my_metadata, scale=1.2, instance_mode=ColorMode.IMAGE )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))

    return out.get_image()

@spaces.GPU
def process_vid(video_path):
    
  cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3
  if not torch.cuda.is_available():
    cfg.MODEL.DEVICE = "cpu"
  else:
    cfg.MODEL.DEVICE = "cuda"
  predictor = DefaultPredictor(cfg)
  v = VideoVisualizer(my_metadata,ColorMode.IMAGE)
  cap = cv2.VideoCapture(video_path)
  frame_width = int(cap.get(3))
  frame_height = int(cap.get(4))
  frame_size = (frame_width,frame_height)
  fps = int(cap.get(5))
  vid_fourcc= int(cap.get(cv2.CAP_PROP_FOURCC))
  output_path = './output.mp4'
  fourcc = cv2.VideoWriter_fourcc(*'MJPG')
  video_writer = cv2.VideoWriter(output_path,fourcc, fps, frame_size)


  def runOnVideo(video, maxFrames):


    """ Runs the predictor on every frame in the video (unless maxFrames is given),
    and returns the frame with the predictions drawn.
    """

    readFrames = 0
    while True:
      hasFrame, frame = video.read()
      if not hasFrame:
          break

      # Get prediction results for this frame
      outputs = predictor(frame)

      # Make sure the frame is colored
      frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

      # Draw a visualization of the predictions using the video visualizer
      visualization = v.draw_instance_predictions(frame, outputs["instances"].to("cpu"))

      # Convert Matplotlib RGB format to OpenCV BGR format
      visualization = cv2.cvtColor(visualization.get_image(), cv2.COLOR_RGB2BGR)

      yield visualization

      readFrames += 1
      if readFrames > maxFrames:
          break

    # Create a cut-off for debugging
  num_frames =  int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

  # Enumerate the frames of the video
  for visualization in tqdm.tqdm(runOnVideo(cap, num_frames), total=num_frames):

      # Write test image
      cv2.imwrite('POSE detectron2.png', visualization)

      # Write to video file
      video_writer.write(visualization)

  # Release resources
  cap.release()
  video_writer.release()
  return output_path


title = "Smartathon Phase2 Demo - Baseer"
description = "This demo introduces an interactive playground for our trained Detectron2 model."
article = '<p>Detectron model is available from our repository <a href="https://github.com/asalhi/Smartathon-Baseer">here</a>.</p>'





with gr.Row():
    with gr.Column():
        #gr.HTML("""<h5 style="color:navy;">3- Or insert direct url of an image.</h5>""")
        input_url = gr.Textbox(label="Image URL", placeholder="")
        #gr.HTML("""<h5 style="color:navy;">2- Or upload an image by clicking on the canvas.<br></h5>""")
        input_image = gr.Image(type="filepath", image_mode="RGB", sources="upload", label="Input Image")
        input_video = gr.Video(format="mp4",sources="upload", label="Input video" )
        gr.HTML("""<h5 style="color:navy;">4- You can use this slider to control boxes min score: </h5>""")
        sliderr = gr.Slider(minimum=0.0, maximum=1.0, value=0.4, label="Minimum score")
    output_image = gr.Image(type="pil", label="Output")
    output_video = gr.Video(format="mp4", label="Output")


img_interface = gr.Interface(
    fn=inference,
   inputs=[input_url,input_image,sliderr], outputs=[output_image], api_name="find"
)
video_interface =  gr.Interface(
    fn=opt_process_vid,
   inputs=[input_video], outputs=[output_video], api_name="vid"
)
demo = gr.TabbedInterface([img_interface, video_interface], ["Image Upload", "Video Upload"])

demo.launch()