replica

Runtime error

File size: 7,103 Bytes

try:
    import detectron2
except:
    import os
    os.system('pip install git+https://github.com/facebookresearch/detectron2.git')
import spaces
import cv2
import torch
from matplotlib.pyplot import axis
import gradio as gr
import requests
import numpy as np
from torch import nn
import requests

import torch

from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.utils.video_visualizer import VideoVisualizer
from detectron2.data import MetadataCatalog
from detectron2.utils.visualizer import ColorMode
import tqdm


model_path = "https://huggingface.co/asalhi85/Smartathon-Detectron2/resolve/9f4d573340b033e651d4937906f23850f9b6bc57/phase2_detectron_model.pth"

cfg = get_cfg()
cfg.merge_from_file("./faster_rcnn_X_101_32x8d_FPN_3x.yaml")
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 11
cfg.MODEL.WEIGHTS = model_path

my_metadata = MetadataCatalog.get("dbmdz_coco_all")
my_metadata.thing_classes = ["None", "BAD_BILLBOARD","BROKEN_SIGNAGE","CLUTTER_SIDEWALK","CONSTRUCTION_ROAD","FADED_SIGNAGE","GARBAGE","GRAFFITI","POTHOLES","SAND_ON_ROAD","UNKEPT_FACADE"]





    
def predict_frame(frame,_):
    
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3
    predictor = DefaultPredictor(cfg)
    outputs = predictor(frame)
    v = Visualizer(frame[:,:,::-1], my_metadata, scale=1.2, instance_mode=ColorMode.IMAGE )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    return out.get_image()

@spaces.GPU
def opt_process_vid(video_path):
   cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3
   cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
   predictor = DefaultPredictor(cfg)
   v = VideoVisualizer(my_metadata,ColorMode.IMAGE)
   cap = cv2.VideoCapture(video_path)
   frame_size = (int(cap.get(3)), int(cap.get(4)))
   fps = int(cap.get(5))
   vid_fourcc= int(cap.get(cv2.CAP_PROP_FOURCC))
   output_path = './output.mp4'
   fourcc = cv2.VideoWriter_fourcc(*'mp4v')
   video_writer = cv2.VideoWriter(output_path,fourcc, fps, frame_size)
   num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
   skipped_viz = 5
   skipped_outputs= 15
   # Process the video
   for i in tqdm.tqdm(range(num_frames)):
      ret, frame = cap.read()
      if not ret:
          break
      if i % skipped_outputs == 0:
          # Get prediction results for this frame
          outputs = predictor(frame)
      if i % skipped_viz == 0:
        # Draw a visualization of the predictions
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        visualization = v.draw_instance_predictions(frame, outputs["instances"].to("cpu"))
        visualization = cv2.cvtColor(visualization.get_image(), cv2.COLOR_RGB2BGR)
      video_writer.write(visualization)
  # Release resources
   cap.release()
   video_writer.release()
   torch.cuda.empty_cache()

   return output_path
    

@spaces.GPU
def inference(image_url, image, min_score):
  if not torch.cuda.is_available():
    cfg.MODEL.DEVICE = "cpu"
  else:
    cfg.MODEL.DEVICE = "cuda"
    print( cfg.MODEL.DEVICE)
    if image_url:
        r = requests.get(image_url)
        if r:
            im = np.frombuffer(r.content, dtype="uint8")
            im = cv2.imdecode(im, cv2.IMREAD_COLOR)
    else:
        im = cv2.imread(image)
        # Model expect BGR!
        #im = image[:,:,::-1]

    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = min_score
    predictor = DefaultPredictor(cfg)

    outputs = predictor(im)

    v = Visualizer(im[:,:,::-1], my_metadata, scale=1.2, instance_mode=ColorMode.IMAGE )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))

    return out.get_image()

@spaces.GPU
def process_vid(video_path):
    
  cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3
  if not torch.cuda.is_available():
    cfg.MODEL.DEVICE = "cpu"
  else:
    cfg.MODEL.DEVICE = "cuda"
  predictor = DefaultPredictor(cfg)
  v = VideoVisualizer(my_metadata,ColorMode.IMAGE)
  cap = cv2.VideoCapture(video_path)
  frame_width = int(cap.get(3))
  frame_height = int(cap.get(4))
  frame_size = (frame_width,frame_height)
  fps = int(cap.get(5))
  vid_fourcc= int(cap.get(cv2.CAP_PROP_FOURCC))
  output_path = './output.mp4'
  fourcc = cv2.VideoWriter_fourcc(*'MJPG')
  video_writer = cv2.VideoWriter(output_path,fourcc, fps, frame_size)


  def runOnVideo(video, maxFrames):


    """ Runs the predictor on every frame in the video (unless maxFrames is given),
    and returns the frame with the predictions drawn.
    """

    readFrames = 0
    while True:
      hasFrame, frame = video.read()
      if not hasFrame:
          break

      # Get prediction results for this frame
      outputs = predictor(frame)

      # Make sure the frame is colored
      frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

      # Draw a visualization of the predictions using the video visualizer
      visualization = v.draw_instance_predictions(frame, outputs["instances"].to("cpu"))

      # Convert Matplotlib RGB format to OpenCV BGR format
      visualization = cv2.cvtColor(visualization.get_image(), cv2.COLOR_RGB2BGR)

      yield visualization

      readFrames += 1
      if readFrames > maxFrames:
          break

    # Create a cut-off for debugging
  num_frames =  int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

  # Enumerate the frames of the video
  for visualization in tqdm.tqdm(runOnVideo(cap, num_frames), total=num_frames):

      # Write test image
      cv2.imwrite('POSE detectron2.png', visualization)

      # Write to video file
      video_writer.write(visualization)

  # Release resources
  cap.release()
  video_writer.release()
  return output_path


title = "Smartathon Phase2 Demo - Baseer"
description = "This demo introduces an interactive playground for our trained Detectron2 model."
article = '<p>Detectron model is available from our repository <a href="https://github.com/asalhi/Smartathon-Baseer">here</a>.</p>'





with gr.Row():
    with gr.Column():
        #gr.HTML("""<h5 style="color:navy;">3- Or insert direct url of an image.</h5>""")
        input_url = gr.Textbox(label="Image URL", placeholder="")
        #gr.HTML("""<h5 style="color:navy;">2- Or upload an image by clicking on the canvas.<br></h5>""")
        input_image = gr.Image(type="filepath", image_mode="RGB", sources="upload", label="Input Image")
        input_video = gr.Video(format="mp4",sources="upload", label="Input video" )
        gr.HTML("""<h5 style="color:navy;">4- You can use this slider to control boxes min score: </h5>""")
        sliderr = gr.Slider(minimum=0.0, maximum=1.0, value=0.4, label="Minimum score")
    output_image = gr.Image(type="pil", label="Output")
    output_video = gr.Video(format="mp4", label="Output")


img_interface = gr.Interface(
    fn=inference,
   inputs=[input_url,input_image,sliderr], outputs=[output_image], api_name="find"
)
video_interface =  gr.Interface(
    fn=opt_process_vid,
   inputs=[input_video], outputs=[output_video], api_name="vid"
)
demo = gr.TabbedInterface([img_interface, video_interface], ["Image Upload", "Video Upload"])

demo.launch()