Spaces:

Pamudu
/

YOLO-Battlefield

Runtime error

App Files Files Community

Pamudu commited on Jun 30, 2024

Commit

bdb8207

verified ·

1 Parent(s): 72b1fa2

Upload 8 files

Browse files

Files changed (8) hide show

app.py +126 -157
available_models.json +38 -1
requirements.txt +1 -1
yolov10l.pt +3 -0
yolov10m.pt +3 -0
yolov10n.pt +3 -0
yolov10s.pt +3 -0
yolov10x.pt +3 -0

app.py CHANGED Viewed

@@ -1,5 +1,3 @@
-#### PROCESS VIDEO ####
 import cv2
 import time
 import os
@@ -66,41 +64,140 @@ def get_examples(directory):
     paths = [os.path.join(directory, item) for item in item_names]
     return paths
 #############################
 ####### PROCESS VIDEO #######
 #############################
-def process_video(video_path, model_name, frame_limit, conf:float=0.25, iou:float=0.5, img_size:int=640, device:str='cpu'):
     print(time.ctime())
     if (video_path is None) or len(video_path) < 2:
         return None, None, None, None, None
-    if 'yolov5' in model_name:
-        model_name = model_name + "u" # for ultralytics naming convention
-    torch_model_name = f'{model_name}.pt'
-    print(f'Model Name : {torch_model_name}')
-    model = YOLO(torch_model_name)
-    # Initialize variables for FPS calculation
-    t_prev =  t_start = time.time()
     fps_sum = 0.0
     frame_count = 0
     font = cv2.FONT_HERSHEY_SIMPLEX
     class_dict = model.names
-    cap = cv2.VideoCapture(video_path)
-    total_frames = min(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) , frame_limit)
-    # Prepare video writer for the output
     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     fps = int(cap.get(cv2.CAP_PROP_FPS))
     save_dir = r'tmp_videos'
     os.makedirs(save_dir, exist_ok=True)
     output_path = f'{save_dir}/{os.path.basename(video_path).split(".")[0]}__{model_name}_{random.randint(1000, 9999)}.mp4'
-    print(f'Output video path : {output_path}')
-    video_out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))
     # Initialize detection and confidence tracking
     class_detections = {}
@@ -114,164 +211,36 @@ def process_video(video_path, model_name, frame_limit, conf:float=0.25, iou:floa
         if frame_count >= frame_limit:
             break
-        # Run YOLOv8 inference on the frame
-        results = model(frame, imgsz=img_size, conf=conf, iou=iou, half=False, device='cpu', verbose=False)
-        annotated_frame = results[0].plot()
-        # Calculate FPS
         t_new = time.time()
-        fps = 1 / (t_new - t_prev)
         t_prev = t_new
-        # Update the FPS sum and frame count
         fps_sum += fps
         frame_count += 1
-        # Track detections and confidence scores
-        for detection in results[0].boxes:
-            class_id = int(detection.cls)
-            confidence = float(detection.conf)
-            if class_id in class_detections:
-                class_detections[class_id] += 1
-                class_confidences[class_id].append(confidence)
-            else:
-                class_detections[class_id] = 1
-                class_confidences[class_id] = [confidence]
-        # Display FPS on the top-left corner
-        annotated_frame[:30, :150] = (0, 0, 0)
-        fps_text = f"FPS: {fps:.2f}"
-        cv2.putText(annotated_frame, fps_text, (10, 20), font, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
-        # Display model name in the bottom-right corner
-        h, w, _ = annotated_frame.shape
-        annotated_frame[h-30:h, w-120:w] = (0, 0, 0)
-        cv2.putText(annotated_frame, model_name, (w-110, h-10), font, 0.8, (255, 255, 255), 2, cv2.LINE_AA)
         video_out.write(annotated_frame)
-    # Calculate and print the average FPS for the current video
     avg_fps = fps_sum / frame_count
-    # Calculate average confidence for each class
-    class_confidences = {class_dict[class_id]: sum(confidences) / len(confidences) for class_id, confidences in class_confidences.items()}
-    class_detections = {class_dict[class_id]: n_detections / total_frames  for class_id, n_detections in class_detections.items()}
-    # sort detections to have uniform order
-    class_confidences = dict(sorted(class_confidences.items(), key=lambda x: x[0]))
-    class_detections = dict(sorted(class_detections.items(), key=lambda x: x[0]))
-    print(class_confidences, class_detections)
     avg_frame_processing_time = (time.time() - t_start) / total_frames
-    df_class_confidences = pd.DataFrame(list(class_confidences.items()), columns=['object', 'conf score'])
-    df_class_detections = pd.DataFrame(list(class_detections.items()), columns=['object', 'detections'])
-    # create bar plot for class confidence
-    confidence_barplot = gr.BarPlot(
-        df_class_confidences, x="object", y="conf score",
-        title="Distribution of Class Confidences",
-        tooltip=["object", "conf score"],
-        y_lim=[0, 1],
-    )
-    # create bar plot for class confidence
-    detection_barplot = gr.BarPlot(
-        df_class_detections, x="object", y="detections",
-        title="Distribution of Class Detections",
-        tooltip=["object", "detections"],
-        y_lim=[0, 10],
-    )
     avg_frame_processing_time = f'{round(avg_frame_processing_time*1000, 2)} ms/frame'
     return output_path, round(avg_fps, 4), avg_frame_processing_time, confidence_barplot, detection_barplot
-#############################
-####### PROCESS IMAGE #######
-#############################
-def process_image(np_image:np.ndarray, model_name:str, conf:float=0.25, iou:float=0.5, img_size:int=640, device:str='cpu'):
-    print(time.ctime())
-    if (np_image is None) or (np_image.size == 0):
-        return None, None, None, None
-    if 'yolov5' in model_name:
-        model_name = model_name + "u" # for ultralytics naming convention
-    torch_model_name = f'{model_name}.pt'
-    print(f'Model Name : {torch_model_name}')
-    model = YOLO(torch_model_name)
-    font = cv2.FONT_HERSHEY_SIMPLEX
-    class_dict = model.names
-    t_start = time.time()
-    # np_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)
-    # Run YOLOv8 inference on the image
-    results = model(np_image, imgsz=img_size, conf=conf, iou=iou, half=False, device=device, verbose=False)
-    annotated_image = results[0].plot()
-    # Initialize detection and confidence tracking
-    class_detections = {}
-    class_confidences = {}
-    # Track detections and confidence scores
-    for detection in results[0].boxes:
-        class_id = int(detection.cls)
-        confidence = float(detection.conf)
-        if class_id in class_detections:
-            class_detections[class_id] += 1
-            class_confidences[class_id].append(confidence)
-        else:
-            class_detections[class_id] = 1
-            class_confidences[class_id] = [confidence]
-    # Display model name in the bottom-right corner
-    h, w, _ = annotated_image.shape
-    annotated_image[h-30:h, w-120:w] = (0, 0, 0)
-    cv2.putText(annotated_image, model_name, (w-110, h-10), font, 0.8, (255, 255, 255), 2, cv2.LINE_AA)
-    annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
-    save_dir = r'tmp_images'
-    os.makedirs(save_dir, exist_ok=True)
-    output_path = f'{save_dir}/{model_name}_{len(os.listdir(save_dir))+1}.jpg'
-    print(f'Output image path : {output_path}')
-    cv2.imwrite(output_path, annotated_image)
-    # Calculate average confidence for each class
-    class_confidences = {class_dict[class_id]: sum(confidences) / len(confidences) for class_id, confidences in class_confidences.items()}
-    class_detections = {class_dict[class_id]: n_detections for class_id, n_detections in class_detections.items()}
-    # sort detections to have uniform order
-    class_confidences = dict(sorted(class_confidences.items(), key=lambda x: x[0]))
-    class_detections = dict(sorted(class_detections.items(), key=lambda x: x[0]))
-    print(class_confidences, class_detections)
-    processing_time = time.time() - t_start
-    processing_time = f'{round(processing_time*1000, 2)} ms/frame'
-    df_class_confidences = pd.DataFrame(list(class_confidences.items()), columns=['object', 'conf score'])
-    df_class_detections = pd.DataFrame(list(class_detections.items()), columns=['object', 'detections'])
-    # Create bar plot for class confidence
-    confidence_barplot = gr.BarPlot(
-        df_class_confidences, x="object", y="conf score",
-        title="Distribution of Class Confidences",
-        tooltip=["object", "conf score"],
-        y_lim=[0, 1],
-    )
-    # Create bar plot for class detections
-    detection_barplot = gr.BarPlot(
-        df_class_detections, x="object", y="detections",
-        title="Distribution of Class Detections",
-        tooltip=["object", "detections"],
-        y_lim=[0, 20],
-    )
-    return output_path, processing_time, confidence_barplot, detection_barplot
 #############################
 ########  GRADIO APP ########

 import cv2
 import time
 import os
     paths = [os.path.join(directory, item) for item in item_names]
     return paths
+#############################
+##### PROCESS FUNCTIONS #####
+#############################
+def initialize_model(model_name: str):
+    if 'yolov5' in model_name:
+        model_name = model_name + "u"  # for ultralytics naming convention
+    torch_model_name = f'{model_name}.pt'
+    model = YOLO(torch_model_name)
+    return model
+def rearrange_detections_and_confidences(class_confidences, class_detections, class_dict, total_frames=1):
+    class_confidences_ = {class_dict[class_id]: sum(confidences) / len(confidences) for class_id, confidences in class_confidences.items()}
+    class_detections_ = {class_dict[class_id]: n_detections/total_frames for class_id, n_detections in class_detections.items()}
+    class_confidences_ = dict(sorted(class_confidences_.items(), key=lambda x: x[0]))
+    class_detections_ = dict(sorted(class_detections_.items(), key=lambda x: x[0]))
+    df_class_confidences = pd.DataFrame(list(class_confidences_.items()), columns=['object', 'conf score'])
+    df_class_detections = pd.DataFrame(list(class_detections_.items()), columns=['object', 'detections'])
+    return df_class_confidences, df_class_detections
+def process_detections(results, class_detections={}, class_confidences={}):
+    for detection in results[0].boxes:
+        class_id = int(detection.cls)
+        confidence = float(detection.conf)
+        if class_id in class_detections:
+            class_detections[class_id] += 1
+            class_confidences[class_id].append(confidence)
+        else:
+            class_detections[class_id] = 1
+            class_confidences[class_id] = [confidence]
+    return class_detections, class_confidences
+def add_fps_to_frame(frame, fps):
+    # Display FPS on the top-left corner
+    frame[:30, :150] = (0, 0, 0)
+    cv2.putText(frame, f"FPS: {fps:.2f}", (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
+    return frame
+def add_model_name_to_frame(frame, model_name):
+    # Display model name in the bottom-right corner
+    h, w, _ = frame.shape
+    frame[h-30:h, w-120:w] = (0, 0, 0)
+    cv2.putText(frame, model_name, (w-110, h-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2, cv2.LINE_AA)
+    return frame
+def calculate_fps(t_prev, t_new):
+    return 1 / (t_new - t_prev)
+def generate_bar_plot(data, x, y, title, tooltip, y_lim):
+    return gr.BarPlot(
+        data, x=x, y=y,
+        title=title,
+        tooltip=tooltip,
+        y_lim=y_lim,
+    )
+#############################
+####### PROCESS IMAGE #######
+#############################
+def process_image(np_image: np.ndarray, model_name: str, conf: float = 0.25, iou: float = 0.5, img_size: int = 640, device: str = 'cpu'):
+    print(time.ctime())
+    if (np_image is None) or (np_image.size == 0):
+        return None, None, None, None
+    model = initialize_model(model_name)
+    class_dict = model.names
+    t_start = time.time()
+    results = model(np_image, imgsz=img_size, conf=conf, iou=iou, half=False, device=device, verbose=False)
+    class_detections, class_confidences = process_detections(results)
+    # add annotations
+    annotated_image = results[0].plot()
+    annotated_image = add_model_name_to_frame(annotated_image, model_name)
+    annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
+    # save
+    save_dir = r'tmp_images'
+    os.makedirs(save_dir, exist_ok=True)
+    output_path = f'{save_dir}/{model_name}_{len(os.listdir(save_dir))+1}.jpg'
+    cv2.imwrite(output_path, annotated_image)
+    processing_time = time.time() - t_start
+    processing_time = f'{round(processing_time*1000, 2)} ms/frame'
+    # rearrange
+    df_class_confidences, df_class_detections = rearrange_detections_and_confidences(class_confidences, class_detections, class_dict)
+    confidence_barplot = generate_bar_plot(df_class_confidences, "object", "conf score", "Distribution of Class Confidences", ["object", "conf score"], [0, 1])
+    detection_barplot = generate_bar_plot(df_class_detections, "object", "detections", "Distribution of Class Detections", ["object", "detections"], [0, 20])
+    return output_path, processing_time, confidence_barplot, detection_barplot
 #############################
 ####### PROCESS VIDEO #######
 #############################
+def process_video(video_path, model_name, frame_limit, conf: float = 0.25, iou: float = 0.5, img_size: int = 640, device: str = 'cpu'):
     print(time.ctime())
     if (video_path is None) or len(video_path) < 2:
         return None, None, None, None, None
+    model = initialize_model(model_name)
+    t_prev = t_start = time.time()
     fps_sum = 0.0
     frame_count = 0
     font = cv2.FONT_HERSHEY_SIMPLEX
     class_dict = model.names
+    # setup video saving
+    cap = cv2.VideoCapture(video_path)
+    total_frames = min(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), frame_limit)
     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     fps = int(cap.get(cv2.CAP_PROP_FPS))
     save_dir = r'tmp_videos'
     os.makedirs(save_dir, exist_ok=True)
     output_path = f'{save_dir}/{os.path.basename(video_path).split(".")[0]}__{model_name}_{random.randint(1000, 9999)}.mp4'
+    video_out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'MP4V'), fps, (frame_width, frame_height))
     # Initialize detection and confidence tracking
     class_detections = {}
         if frame_count >= frame_limit:
             break
+        results = model(frame, imgsz=img_size, conf=conf, iou=iou, half=False, device=device, verbose=False)
         t_new = time.time()
+        fps = calculate_fps(t_prev, t_new)
         t_prev = t_new
         fps_sum += fps
         frame_count += 1
+        class_detections, class_confidences = process_detections(results, class_detections, class_confidences)
+        annotated_frame = results[0].plot()
+        annotated_frame = add_fps_to_frame(annotated_frame, fps)
+        annotated_frame = add_model_name_to_frame(annotated_frame, model_name)
         video_out.write(annotated_frame)
     avg_fps = fps_sum / frame_count
     avg_frame_processing_time = (time.time() - t_start) / total_frames
+    # rearrange
+    df_class_confidences, df_class_detections = rearrange_detections_and_confidences(class_confidences, class_detections, class_dict, total_frames)
+    confidence_barplot = generate_bar_plot(df_class_confidences, "object", "conf score", "Distribution of Class Confidences", ["object", "conf score"], [0, 1])
+    detection_barplot = generate_bar_plot(df_class_detections, "object", "detections", "Distribution of Class Detections", ["object", "detections"], [0, 10])
     avg_frame_processing_time = f'{round(avg_frame_processing_time*1000, 2)} ms/frame'
     return output_path, round(avg_fps, 4), avg_frame_processing_time, confidence_barplot, detection_barplot
 #############################
 ########  GRADIO APP ########

available_models.json CHANGED Viewed

@@ -83,5 +83,42 @@
             "params_M": 58.1,
             "FLOPs_B": 192.5
         }
-    ]
 }

             "params_M": 58.1,
             "FLOPs_B": 192.5
         }
+    ],
+    "yolov10": [
+        {
+            "model": "yolov10n",
+            "size": 640,
+            "mAPval_50-95": 38.5,
+            "params_M": 2.3,
+            "FLOPs_B": 6.7
+        },
+        {
+            "model": "yolov10s",
+            "size": 640,
+            "mAPval_50-95": 46.3,
+            "params_M": 7.2,
+            "FLOPs_B": 21.6
+        },
+        {
+            "model": "yolov10m",
+            "size": 640,
+            "mAPval_50-95": 51.1,
+            "params_M": 15.4,
+            "FLOPs_B": 59.1
+        },
+        {
+            "model": "yolov10l",
+            "size": 640,
+            "mAPval_50-95": 53.2,
+            "params_M": 24.4,
+            "FLOPs_B": 120.3
+        },
+        {
+            "model": "yolov10x",
+            "size": 640,
+            "mAPval_50-95": 54.4,
+            "params_M": 29.5,
+            "FLOPs_B": 160.4
+        }
+    ]
 }

requirements.txt CHANGED Viewed

@@ -13,7 +13,7 @@ scipy>=1.4.1
 torch>=1.7.0
 torchvision>=0.8.1
 tqdm>=4.64.0
-ultralytics==8.2.10
 # Logging -------------------------------------
 # tensorboard>=2.4.1

 torch>=1.7.0
 torchvision>=0.8.1
 tqdm>=4.64.0
+ultralytics==8.2.48
 # Logging -------------------------------------
 # tensorboard>=2.4.1

yolov10l.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0685e1e597f73aba8e56bc9651b6bb939c5b2f201dff5de0b9ef2a6207ae301
+size 52425230

yolov10m.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6dc78f7a88591cec1e8716b8f5c7e3aefa9206684f025d202be34439ccb329a0
+size 33643667

yolov10n.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11287ed0735678e7ba1ac2a9b3098c049155b3fde123992e724c1264bcc16b6f
+size 5860383

yolov10s.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65fa8332c38a7189597a268897bce6c1026f9499711b9761fb0c2c639b91d4d6
+size 16623111

yolov10x.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:605982e0b2c2ff070351afc8df3ecb38c54cfd7f133813acb366f454eb79b9a6
+size 64395854