try: import detectron2 except: import os os.system('pip install git+https://github.com/facebookresearch/detectron2.git') import spaces import cv2 import torch from matplotlib.pyplot import axis import gradio as gr import requests import numpy as np from torch import nn import requests import torch from detectron2 import model_zoo from detectron2.engine import DefaultPredictor from detectron2.config import get_cfg from detectron2.utils.visualizer import Visualizer from detectron2.utils.video_visualizer import VideoVisualizer from detectron2.data import MetadataCatalog from detectron2.utils.visualizer import ColorMode import tqdm model_path = "https://huggingface.co/asalhi85/Smartathon-Detectron2/resolve/9f4d573340b033e651d4937906f23850f9b6bc57/phase2_detectron_model.pth" cfg = get_cfg() cfg.merge_from_file("./faster_rcnn_X_101_32x8d_FPN_3x.yaml") cfg.MODEL.ROI_HEADS.NUM_CLASSES = 11 cfg.MODEL.WEIGHTS = model_path my_metadata = MetadataCatalog.get("dbmdz_coco_all") my_metadata.thing_classes = ["None", "BAD_BILLBOARD","BROKEN_SIGNAGE","CLUTTER_SIDEWALK","CONSTRUCTION_ROAD","FADED_SIGNAGE","GARBAGE","GRAFFITI","POTHOLES","SAND_ON_ROAD","UNKEPT_FACADE"] def predict_frame(frame,_): cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3 predictor = DefaultPredictor(cfg) outputs = predictor(frame) v = Visualizer(frame[:,:,::-1], my_metadata, scale=1.2, instance_mode=ColorMode.IMAGE ) out = v.draw_instance_predictions(outputs["instances"].to("cpu")) return out.get_image() @spaces.GPU def opt_process_vid(video_path): cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3 cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu" predictor = DefaultPredictor(cfg) v = VideoVisualizer(my_metadata,ColorMode.IMAGE) cap = cv2.VideoCapture(video_path) frame_size = (int(cap.get(3)), int(cap.get(4))) fps = int(cap.get(5)) vid_fourcc= int(cap.get(cv2.CAP_PROP_FOURCC)) output_path = './output.mp4' fourcc = cv2.VideoWriter_fourcc(*'mp4v') video_writer = cv2.VideoWriter(output_path,fourcc, fps, frame_size) num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) skipped_viz = 5 skipped_outputs= 15 # Process the video for i in tqdm.tqdm(range(num_frames)): ret, frame = cap.read() if not ret: break if i % skipped_outputs == 0: # Get prediction results for this frame outputs = predictor(frame) if i % skipped_viz == 0: # Draw a visualization of the predictions frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) visualization = v.draw_instance_predictions(frame, outputs["instances"].to("cpu")) visualization = cv2.cvtColor(visualization.get_image(), cv2.COLOR_RGB2BGR) video_writer.write(visualization) # Release resources cap.release() video_writer.release() torch.cuda.empty_cache() return output_path @spaces.GPU def inference(image_url, image, min_score): if not torch.cuda.is_available(): cfg.MODEL.DEVICE = "cpu" else: cfg.MODEL.DEVICE = "cuda" print( cfg.MODEL.DEVICE) if image_url: r = requests.get(image_url) if r: im = np.frombuffer(r.content, dtype="uint8") im = cv2.imdecode(im, cv2.IMREAD_COLOR) else: im = cv2.imread(image) # Model expect BGR! #im = image[:,:,::-1] cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = min_score predictor = DefaultPredictor(cfg) outputs = predictor(im) v = Visualizer(im[:,:,::-1], my_metadata, scale=1.2, instance_mode=ColorMode.IMAGE ) out = v.draw_instance_predictions(outputs["instances"].to("cpu")) return out.get_image() @spaces.GPU def process_vid(video_path): cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3 if not torch.cuda.is_available(): cfg.MODEL.DEVICE = "cpu" else: cfg.MODEL.DEVICE = "cuda" predictor = DefaultPredictor(cfg) v = VideoVisualizer(my_metadata,ColorMode.IMAGE) cap = cv2.VideoCapture(video_path) frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) frame_size = (frame_width,frame_height) fps = int(cap.get(5)) vid_fourcc= int(cap.get(cv2.CAP_PROP_FOURCC)) output_path = './output.mp4' fourcc = cv2.VideoWriter_fourcc(*'MJPG') video_writer = cv2.VideoWriter(output_path,fourcc, fps, frame_size) def runOnVideo(video, maxFrames): """ Runs the predictor on every frame in the video (unless maxFrames is given), and returns the frame with the predictions drawn. """ readFrames = 0 while True: hasFrame, frame = video.read() if not hasFrame: break # Get prediction results for this frame outputs = predictor(frame) # Make sure the frame is colored frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # Draw a visualization of the predictions using the video visualizer visualization = v.draw_instance_predictions(frame, outputs["instances"].to("cpu")) # Convert Matplotlib RGB format to OpenCV BGR format visualization = cv2.cvtColor(visualization.get_image(), cv2.COLOR_RGB2BGR) yield visualization readFrames += 1 if readFrames > maxFrames: break # Create a cut-off for debugging num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # Enumerate the frames of the video for visualization in tqdm.tqdm(runOnVideo(cap, num_frames), total=num_frames): # Write test image cv2.imwrite('POSE detectron2.png', visualization) # Write to video file video_writer.write(visualization) # Release resources cap.release() video_writer.release() return output_path title = "Smartathon Phase2 Demo - Baseer" description = "This demo introduces an interactive playground for our trained Detectron2 model." article = '

Detectron model is available from our repository here.

' with gr.Row(): with gr.Column(): #gr.HTML("""
3- Or insert direct url of an image.
""") input_url = gr.Textbox(label="Image URL", placeholder="") #gr.HTML("""
2- Or upload an image by clicking on the canvas.
""") input_image = gr.Image(type="filepath", image_mode="RGB", sources="upload", label="Input Image") input_video = gr.Video(format="mp4",sources="upload", label="Input video" ) gr.HTML("""
4- You can use this slider to control boxes min score:
""") sliderr = gr.Slider(minimum=0.0, maximum=1.0, value=0.4, label="Minimum score") output_image = gr.Image(type="pil", label="Output") output_video = gr.Video(format="mp4", label="Output") img_interface = gr.Interface( fn=inference, inputs=[input_url,input_image,sliderr], outputs=[output_image], api_name="find" ) video_interface = gr.Interface( fn=opt_process_vid, inputs=[input_video], outputs=[output_video], api_name="vid" ) demo = gr.TabbedInterface([img_interface, video_interface], ["Image Upload", "Video Upload"]) demo.launch()