Spaces:
Sleeping
Sleeping
| ### 1. Imports and class names setup ### | |
| import gradio as gr | |
| import os | |
| import requests | |
| import torch | |
| import numpy as np | |
| from roboflow import Roboflow | |
| import cv2 | |
| rf = Roboflow(api_key="gjZE3lykkitagkxHplyJ") | |
| project = rf.workspace().project("hard-hat-sample-gqvqs") | |
| model = project.version(2).model | |
| from model import create_effnetb2_model | |
| from timeit import default_timer as timer | |
| from typing import Tuple, Dict | |
| file_urls = [ | |
| 'https://www.dropbox.com/s/7sjfwncffg8xej2/video_7.mp4?dl=1' | |
| ] | |
| def download_file(url, save_name): | |
| url = url | |
| if not os.path.exists(save_name): | |
| file = requests.get(url) | |
| open(save_name, 'wb').write(file.content) | |
| for i, url in enumerate(file_urls): | |
| if 'mp4' in file_urls[i]: | |
| download_file( | |
| file_urls[i], | |
| f"video.mp4" | |
| ) | |
| else: | |
| download_file( | |
| file_urls[i], | |
| f"image_{i}.jpg" | |
| ) | |
| video_path = [['video.mp4']] | |
| # Setup class names | |
| class_names = ["hat","nohat"] | |
| ### 2. Model and transforms preparation ### | |
| # Create EffNetB2 model | |
| effnetb2, effnetb2_transforms = create_effnetb2_model( | |
| num_classes=2, # len(class_names) would also work | |
| ) | |
| # Load saved weights | |
| effnetb2.load_state_dict( | |
| torch.load( | |
| f="hat_model.pth", | |
| map_location=torch.device("cpu"), # load to CPU | |
| ) | |
| ) | |
| def detect(imagepath): | |
| pix=model.predict(imagepath, confidence=40, overlap=30) | |
| pix=pix.json() | |
| img=cv2.imread(imagepath) | |
| x1,x2,y1,y2=[],[],[],[] | |
| for i in pix.keys(): | |
| if i=="predictions": | |
| for j in pix["predictions"]: | |
| for a,b in j.items(): | |
| if a=="x": | |
| x1.append(b) | |
| if a=="y": | |
| y1.append(b) | |
| if a=="width": | |
| x2.append(b) | |
| if a=="height": | |
| y2.append(b) | |
| for p in range(0,len(x1)): | |
| x2[p]=x2[p]+x1[p] | |
| for p in range(0,len(x1)): | |
| y2[p]=y2[p]+x1[p] | |
| for (x11,y11,x12,y12) in zip(x1,y1,x2,y2): | |
| cv2.rectangle( | |
| img, | |
| (x11,y11), | |
| (x12,y12), | |
| color=(0, 0, 255), | |
| thickness=2, | |
| lineType=cv2.LINE_AA | |
| ) | |
| return img | |
| ### 3. Predict function ### | |
| # Create predict function | |
| def predict(img) -> Tuple[Dict, float]: | |
| """Transforms and performs a prediction on img and returns prediction and time taken. | |
| """ | |
| # Start the timer | |
| start_time = timer() | |
| # Transform the target image and add a batch dimension | |
| img = effnetb2_transforms(img).unsqueeze(0) | |
| # Put model into evaluation mode and turn on inference mode | |
| effnetb2.eval() | |
| with torch.inference_mode(): | |
| # Pass the transformed image through the model and turn the prediction logits into prediction probabilities | |
| pred_probs = torch.softmax(effnetb2(img), dim=1) | |
| # Create a prediction label and prediction probability dictionary for each prediction class (this is the required format for Gradio's output parameter) | |
| pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))} | |
| # Calculate the prediction time | |
| pred_time = round(timer() - start_time, 5) | |
| # Return the prediction dictionary and prediction time | |
| return pred_labels_and_probs, pred_time | |
| def show_preds_video(video_path): | |
| cap = cv2.VideoCapture(video_path) | |
| while(cap.isOpened()): | |
| ret, frame = cap.read() | |
| if ret: | |
| frame_copy = frame.copy() | |
| pix=model.predict(frame, confidence=40, overlap=30) | |
| pix=pix.json() | |
| x1,x2,y1,y2=[],[],[],[] | |
| for i in pix.keys(): | |
| if i=="predictions": | |
| for j in pix["predictions"]: | |
| for a,b in j.items(): | |
| if a=="x": | |
| x1.append(b) | |
| if a=="y": | |
| y1.append(b) | |
| if a=="width": | |
| x2.append(b) | |
| if a=="height": | |
| y2.append(b) | |
| for p in range(0,len(x1)): | |
| x2[p]=x2[p]+x1[p] | |
| for p in range(0,len(x1)): | |
| y2[p]=y2[p]+x1[p] | |
| for (x11,y11,x12,y12) in zip(x1,y1,x2,y2): | |
| cv2.rectangle( | |
| img, | |
| (x11,y11), | |
| (x12,y12), | |
| color=(0, 0, 255), | |
| thickness=2, | |
| lineType=cv2.LINE_AA | |
| ) | |
| yield cv2.cvtColor(frame_copy, cv2.COLOR_BGR2RGB) | |
| ### 4. Gradio app ### | |
| # Create title, description and article strings | |
| title = "Construction Hat Detector" | |
| description = "An EfficientNetB2 feature extractor computer vision model to classify images as Hat or Nohat." | |
| article = "(https://www.learnpytorch.io/)." | |
| # Create examples list from "examples/" directory | |
| #example_list = [["examples/" + example] for example in os.listdir("examples")] | |
| inputs_image = [ | |
| gr.components.Image(type="filepath", label="Input Image"), | |
| ] | |
| outputs_image = [ | |
| gr.components.Image(type="numpy", label="Output Image"), | |
| ] | |
| inputs_video = [ | |
| gr.components.Video(type="filepath", label="Input Video"), | |
| ] | |
| outputs_video = [ | |
| gr.components.Image(type="numpy", label="Output Image"), | |
| ] | |
| # Create the Gradio demo | |
| app1 = gr.Interface(fn=predict, # mapping function from input to output | |
| inputs=gr.Image(type="pil"), # what are the inputs? | |
| outputs=[gr.Label(num_top_classes=2, label="Predictions"), # what are the outputs? | |
| gr.Number(label="Prediction time (s)") | |
| ], | |
| title=title, | |
| description=description, | |
| article=article) | |
| app2=gr.Interface(fn=detect, | |
| inputs=inputs_image, | |
| outputs=outputs_image, | |
| title=title) | |
| app3=gr.Interface( | |
| fn=show_preds_video, | |
| inputs=inputs_video, | |
| outputs=outputs_video, | |
| examples=video_path, | |
| cache_examples=False, | |
| ) | |
| demo = gr.TabbedInterface([app1, app2,app3], ["Classify", "Detect","Video Interface"]) | |
| # Launch the demo! | |
| demo.launch() | |