Pamudu commited on
Commit
bdb8207
·
verified ·
1 Parent(s): 72b1fa2

Upload 8 files

Browse files
Files changed (8) hide show
  1. app.py +126 -157
  2. available_models.json +38 -1
  3. requirements.txt +1 -1
  4. yolov10l.pt +3 -0
  5. yolov10m.pt +3 -0
  6. yolov10n.pt +3 -0
  7. yolov10s.pt +3 -0
  8. yolov10x.pt +3 -0
app.py CHANGED
@@ -1,5 +1,3 @@
1
- #### PROCESS VIDEO ####
2
-
3
  import cv2
4
  import time
5
  import os
@@ -66,41 +64,140 @@ def get_examples(directory):
66
  paths = [os.path.join(directory, item) for item in item_names]
67
  return paths
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  #############################
70
  ####### PROCESS VIDEO #######
71
  #############################
72
- def process_video(video_path, model_name, frame_limit, conf:float=0.25, iou:float=0.5, img_size:int=640, device:str='cpu'):
 
73
  print(time.ctime())
74
  if (video_path is None) or len(video_path) < 2:
75
  return None, None, None, None, None
76
-
77
- if 'yolov5' in model_name:
78
- model_name = model_name + "u" # for ultralytics naming convention
79
 
80
- torch_model_name = f'{model_name}.pt'
81
- print(f'Model Name : {torch_model_name}')
82
- model = YOLO(torch_model_name)
83
 
84
- # Initialize variables for FPS calculation
85
- t_prev = t_start = time.time()
86
  fps_sum = 0.0
87
  frame_count = 0
88
  font = cv2.FONT_HERSHEY_SIMPLEX
89
  class_dict = model.names
90
-
91
- cap = cv2.VideoCapture(video_path)
92
- total_frames = min(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) , frame_limit)
93
 
94
- # Prepare video writer for the output
 
 
95
  frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
96
  frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
97
  fps = int(cap.get(cv2.CAP_PROP_FPS))
98
-
99
  save_dir = r'tmp_videos'
100
  os.makedirs(save_dir, exist_ok=True)
101
  output_path = f'{save_dir}/{os.path.basename(video_path).split(".")[0]}__{model_name}_{random.randint(1000, 9999)}.mp4'
102
- print(f'Output video path : {output_path}')
103
- video_out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))
104
 
105
  # Initialize detection and confidence tracking
106
  class_detections = {}
@@ -114,164 +211,36 @@ def process_video(video_path, model_name, frame_limit, conf:float=0.25, iou:floa
114
  if frame_count >= frame_limit:
115
  break
116
 
117
- # Run YOLOv8 inference on the frame
118
- results = model(frame, imgsz=img_size, conf=conf, iou=iou, half=False, device='cpu', verbose=False)
119
- annotated_frame = results[0].plot()
120
 
121
- # Calculate FPS
122
  t_new = time.time()
123
- fps = 1 / (t_new - t_prev)
124
  t_prev = t_new
125
 
126
- # Update the FPS sum and frame count
127
  fps_sum += fps
128
  frame_count += 1
129
 
130
- # Track detections and confidence scores
131
- for detection in results[0].boxes:
132
- class_id = int(detection.cls)
133
- confidence = float(detection.conf)
134
 
135
- if class_id in class_detections:
136
- class_detections[class_id] += 1
137
- class_confidences[class_id].append(confidence)
138
- else:
139
- class_detections[class_id] = 1
140
- class_confidences[class_id] = [confidence]
141
-
142
- # Display FPS on the top-left corner
143
- annotated_frame[:30, :150] = (0, 0, 0)
144
- fps_text = f"FPS: {fps:.2f}"
145
- cv2.putText(annotated_frame, fps_text, (10, 20), font, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
146
-
147
- # Display model name in the bottom-right corner
148
- h, w, _ = annotated_frame.shape
149
- annotated_frame[h-30:h, w-120:w] = (0, 0, 0)
150
- cv2.putText(annotated_frame, model_name, (w-110, h-10), font, 0.8, (255, 255, 255), 2, cv2.LINE_AA)
151
 
152
  video_out.write(annotated_frame)
153
 
154
- # Calculate and print the average FPS for the current video
155
  avg_fps = fps_sum / frame_count
156
-
157
- # Calculate average confidence for each class
158
- class_confidences = {class_dict[class_id]: sum(confidences) / len(confidences) for class_id, confidences in class_confidences.items()}
159
- class_detections = {class_dict[class_id]: n_detections / total_frames for class_id, n_detections in class_detections.items()}
160
-
161
- # sort detections to have uniform order
162
- class_confidences = dict(sorted(class_confidences.items(), key=lambda x: x[0]))
163
- class_detections = dict(sorted(class_detections.items(), key=lambda x: x[0]))
164
- print(class_confidences, class_detections)
165
-
166
  avg_frame_processing_time = (time.time() - t_start) / total_frames
167
-
168
- df_class_confidences = pd.DataFrame(list(class_confidences.items()), columns=['object', 'conf score'])
169
- df_class_detections = pd.DataFrame(list(class_detections.items()), columns=['object', 'detections'])
170
-
171
- # create bar plot for class confidence
172
- confidence_barplot = gr.BarPlot(
173
- df_class_confidences, x="object", y="conf score",
174
- title="Distribution of Class Confidences",
175
- tooltip=["object", "conf score"],
176
- y_lim=[0, 1],
177
- )
178
 
179
- # create bar plot for class confidence
180
- detection_barplot = gr.BarPlot(
181
- df_class_detections, x="object", y="detections",
182
- title="Distribution of Class Detections",
183
- tooltip=["object", "detections"],
184
- y_lim=[0, 10],
185
- )
186
 
187
  avg_frame_processing_time = f'{round(avg_frame_processing_time*1000, 2)} ms/frame'
188
 
189
  return output_path, round(avg_fps, 4), avg_frame_processing_time, confidence_barplot, detection_barplot
190
 
191
- #############################
192
- ####### PROCESS IMAGE #######
193
- #############################
194
- def process_image(np_image:np.ndarray, model_name:str, conf:float=0.25, iou:float=0.5, img_size:int=640, device:str='cpu'):
195
- print(time.ctime())
196
- if (np_image is None) or (np_image.size == 0):
197
- return None, None, None, None
198
-
199
- if 'yolov5' in model_name:
200
- model_name = model_name + "u" # for ultralytics naming convention
201
- torch_model_name = f'{model_name}.pt'
202
- print(f'Model Name : {torch_model_name}')
203
- model = YOLO(torch_model_name)
204
-
205
- font = cv2.FONT_HERSHEY_SIMPLEX
206
- class_dict = model.names
207
- t_start = time.time()
208
-
209
- # np_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)
210
- # Run YOLOv8 inference on the image
211
- results = model(np_image, imgsz=img_size, conf=conf, iou=iou, half=False, device=device, verbose=False)
212
- annotated_image = results[0].plot()
213
-
214
- # Initialize detection and confidence tracking
215
- class_detections = {}
216
- class_confidences = {}
217
-
218
- # Track detections and confidence scores
219
- for detection in results[0].boxes:
220
- class_id = int(detection.cls)
221
- confidence = float(detection.conf)
222
-
223
- if class_id in class_detections:
224
- class_detections[class_id] += 1
225
- class_confidences[class_id].append(confidence)
226
- else:
227
- class_detections[class_id] = 1
228
- class_confidences[class_id] = [confidence]
229
-
230
- # Display model name in the bottom-right corner
231
- h, w, _ = annotated_image.shape
232
- annotated_image[h-30:h, w-120:w] = (0, 0, 0)
233
- cv2.putText(annotated_image, model_name, (w-110, h-10), font, 0.8, (255, 255, 255), 2, cv2.LINE_AA)
234
-
235
- annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
236
- save_dir = r'tmp_images'
237
- os.makedirs(save_dir, exist_ok=True)
238
- output_path = f'{save_dir}/{model_name}_{len(os.listdir(save_dir))+1}.jpg'
239
- print(f'Output image path : {output_path}')
240
- cv2.imwrite(output_path, annotated_image)
241
-
242
- # Calculate average confidence for each class
243
- class_confidences = {class_dict[class_id]: sum(confidences) / len(confidences) for class_id, confidences in class_confidences.items()}
244
- class_detections = {class_dict[class_id]: n_detections for class_id, n_detections in class_detections.items()}
245
-
246
- # sort detections to have uniform order
247
- class_confidences = dict(sorted(class_confidences.items(), key=lambda x: x[0]))
248
- class_detections = dict(sorted(class_detections.items(), key=lambda x: x[0]))
249
- print(class_confidences, class_detections)
250
-
251
- processing_time = time.time() - t_start
252
- processing_time = f'{round(processing_time*1000, 2)} ms/frame'
253
-
254
- df_class_confidences = pd.DataFrame(list(class_confidences.items()), columns=['object', 'conf score'])
255
- df_class_detections = pd.DataFrame(list(class_detections.items()), columns=['object', 'detections'])
256
-
257
- # Create bar plot for class confidence
258
- confidence_barplot = gr.BarPlot(
259
- df_class_confidences, x="object", y="conf score",
260
- title="Distribution of Class Confidences",
261
- tooltip=["object", "conf score"],
262
- y_lim=[0, 1],
263
- )
264
-
265
- # Create bar plot for class detections
266
- detection_barplot = gr.BarPlot(
267
- df_class_detections, x="object", y="detections",
268
- title="Distribution of Class Detections",
269
- tooltip=["object", "detections"],
270
- y_lim=[0, 20],
271
- )
272
-
273
- return output_path, processing_time, confidence_barplot, detection_barplot
274
-
275
 
276
  #############################
277
  ######## GRADIO APP ########
 
 
 
1
  import cv2
2
  import time
3
  import os
 
64
  paths = [os.path.join(directory, item) for item in item_names]
65
  return paths
66
 
67
+
68
+ #############################
69
+ ##### PROCESS FUNCTIONS #####
70
+ #############################
71
+
72
+ def initialize_model(model_name: str):
73
+ if 'yolov5' in model_name:
74
+ model_name = model_name + "u" # for ultralytics naming convention
75
+ torch_model_name = f'{model_name}.pt'
76
+ model = YOLO(torch_model_name)
77
+ return model
78
+
79
+
80
+ def rearrange_detections_and_confidences(class_confidences, class_detections, class_dict, total_frames=1):
81
+ class_confidences_ = {class_dict[class_id]: sum(confidences) / len(confidences) for class_id, confidences in class_confidences.items()}
82
+ class_detections_ = {class_dict[class_id]: n_detections/total_frames for class_id, n_detections in class_detections.items()}
83
+
84
+ class_confidences_ = dict(sorted(class_confidences_.items(), key=lambda x: x[0]))
85
+ class_detections_ = dict(sorted(class_detections_.items(), key=lambda x: x[0]))
86
+
87
+ df_class_confidences = pd.DataFrame(list(class_confidences_.items()), columns=['object', 'conf score'])
88
+ df_class_detections = pd.DataFrame(list(class_detections_.items()), columns=['object', 'detections'])
89
+
90
+ return df_class_confidences, df_class_detections
91
+
92
+
93
+ def process_detections(results, class_detections={}, class_confidences={}):
94
+ for detection in results[0].boxes:
95
+ class_id = int(detection.cls)
96
+ confidence = float(detection.conf)
97
+
98
+ if class_id in class_detections:
99
+ class_detections[class_id] += 1
100
+ class_confidences[class_id].append(confidence)
101
+ else:
102
+ class_detections[class_id] = 1
103
+ class_confidences[class_id] = [confidence]
104
+
105
+ return class_detections, class_confidences
106
+
107
+ def add_fps_to_frame(frame, fps):
108
+ # Display FPS on the top-left corner
109
+ frame[:30, :150] = (0, 0, 0)
110
+ cv2.putText(frame, f"FPS: {fps:.2f}", (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
111
+ return frame
112
+
113
+ def add_model_name_to_frame(frame, model_name):
114
+ # Display model name in the bottom-right corner
115
+ h, w, _ = frame.shape
116
+ frame[h-30:h, w-120:w] = (0, 0, 0)
117
+ cv2.putText(frame, model_name, (w-110, h-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2, cv2.LINE_AA)
118
+ return frame
119
+
120
+ def calculate_fps(t_prev, t_new):
121
+ return 1 / (t_new - t_prev)
122
+
123
+ def generate_bar_plot(data, x, y, title, tooltip, y_lim):
124
+ return gr.BarPlot(
125
+ data, x=x, y=y,
126
+ title=title,
127
+ tooltip=tooltip,
128
+ y_lim=y_lim,
129
+ )
130
+
131
+
132
+ #############################
133
+ ####### PROCESS IMAGE #######
134
+ #############################
135
+
136
+ def process_image(np_image: np.ndarray, model_name: str, conf: float = 0.25, iou: float = 0.5, img_size: int = 640, device: str = 'cpu'):
137
+ print(time.ctime())
138
+ if (np_image is None) or (np_image.size == 0):
139
+ return None, None, None, None
140
+
141
+ model = initialize_model(model_name)
142
+
143
+ class_dict = model.names
144
+ t_start = time.time()
145
+
146
+ results = model(np_image, imgsz=img_size, conf=conf, iou=iou, half=False, device=device, verbose=False)
147
+
148
+ class_detections, class_confidences = process_detections(results)
149
+
150
+ # add annotations
151
+ annotated_image = results[0].plot()
152
+ annotated_image = add_model_name_to_frame(annotated_image, model_name)
153
+ annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
154
+
155
+ # save
156
+ save_dir = r'tmp_images'
157
+ os.makedirs(save_dir, exist_ok=True)
158
+ output_path = f'{save_dir}/{model_name}_{len(os.listdir(save_dir))+1}.jpg'
159
+ cv2.imwrite(output_path, annotated_image)
160
+
161
+ processing_time = time.time() - t_start
162
+ processing_time = f'{round(processing_time*1000, 2)} ms/frame'
163
+
164
+ # rearrange
165
+ df_class_confidences, df_class_detections = rearrange_detections_and_confidences(class_confidences, class_detections, class_dict)
166
+
167
+ confidence_barplot = generate_bar_plot(df_class_confidences, "object", "conf score", "Distribution of Class Confidences", ["object", "conf score"], [0, 1])
168
+ detection_barplot = generate_bar_plot(df_class_detections, "object", "detections", "Distribution of Class Detections", ["object", "detections"], [0, 20])
169
+
170
+ return output_path, processing_time, confidence_barplot, detection_barplot
171
+
172
+
173
  #############################
174
  ####### PROCESS VIDEO #######
175
  #############################
176
+
177
+ def process_video(video_path, model_name, frame_limit, conf: float = 0.25, iou: float = 0.5, img_size: int = 640, device: str = 'cpu'):
178
  print(time.ctime())
179
  if (video_path is None) or len(video_path) < 2:
180
  return None, None, None, None, None
 
 
 
181
 
182
+ model = initialize_model(model_name)
 
 
183
 
184
+ t_prev = t_start = time.time()
 
185
  fps_sum = 0.0
186
  frame_count = 0
187
  font = cv2.FONT_HERSHEY_SIMPLEX
188
  class_dict = model.names
 
 
 
189
 
190
+ # setup video saving
191
+ cap = cv2.VideoCapture(video_path)
192
+ total_frames = min(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), frame_limit)
193
  frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
194
  frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
195
  fps = int(cap.get(cv2.CAP_PROP_FPS))
196
+
197
  save_dir = r'tmp_videos'
198
  os.makedirs(save_dir, exist_ok=True)
199
  output_path = f'{save_dir}/{os.path.basename(video_path).split(".")[0]}__{model_name}_{random.randint(1000, 9999)}.mp4'
200
+ video_out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'MP4V'), fps, (frame_width, frame_height))
 
201
 
202
  # Initialize detection and confidence tracking
203
  class_detections = {}
 
211
  if frame_count >= frame_limit:
212
  break
213
 
214
+ results = model(frame, imgsz=img_size, conf=conf, iou=iou, half=False, device=device, verbose=False)
 
 
215
 
 
216
  t_new = time.time()
217
+ fps = calculate_fps(t_prev, t_new)
218
  t_prev = t_new
219
 
 
220
  fps_sum += fps
221
  frame_count += 1
222
 
223
+ class_detections, class_confidences = process_detections(results, class_detections, class_confidences)
 
 
 
224
 
225
+ annotated_frame = results[0].plot()
226
+ annotated_frame = add_fps_to_frame(annotated_frame, fps)
227
+ annotated_frame = add_model_name_to_frame(annotated_frame, model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
  video_out.write(annotated_frame)
230
 
 
231
  avg_fps = fps_sum / frame_count
 
 
 
 
 
 
 
 
 
 
232
  avg_frame_processing_time = (time.time() - t_start) / total_frames
 
 
 
 
 
 
 
 
 
 
 
233
 
234
+ # rearrange
235
+ df_class_confidences, df_class_detections = rearrange_detections_and_confidences(class_confidences, class_detections, class_dict, total_frames)
236
+
237
+ confidence_barplot = generate_bar_plot(df_class_confidences, "object", "conf score", "Distribution of Class Confidences", ["object", "conf score"], [0, 1])
238
+ detection_barplot = generate_bar_plot(df_class_detections, "object", "detections", "Distribution of Class Detections", ["object", "detections"], [0, 10])
 
 
239
 
240
  avg_frame_processing_time = f'{round(avg_frame_processing_time*1000, 2)} ms/frame'
241
 
242
  return output_path, round(avg_fps, 4), avg_frame_processing_time, confidence_barplot, detection_barplot
243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
  #############################
246
  ######## GRADIO APP ########
available_models.json CHANGED
@@ -83,5 +83,42 @@
83
  "params_M": 58.1,
84
  "FLOPs_B": 192.5
85
  }
86
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  }
 
83
  "params_M": 58.1,
84
  "FLOPs_B": 192.5
85
  }
86
+ ],
87
+ "yolov10": [
88
+ {
89
+ "model": "yolov10n",
90
+ "size": 640,
91
+ "mAPval_50-95": 38.5,
92
+ "params_M": 2.3,
93
+ "FLOPs_B": 6.7
94
+ },
95
+ {
96
+ "model": "yolov10s",
97
+ "size": 640,
98
+ "mAPval_50-95": 46.3,
99
+ "params_M": 7.2,
100
+ "FLOPs_B": 21.6
101
+ },
102
+ {
103
+ "model": "yolov10m",
104
+ "size": 640,
105
+ "mAPval_50-95": 51.1,
106
+ "params_M": 15.4,
107
+ "FLOPs_B": 59.1
108
+ },
109
+ {
110
+ "model": "yolov10l",
111
+ "size": 640,
112
+ "mAPval_50-95": 53.2,
113
+ "params_M": 24.4,
114
+ "FLOPs_B": 120.3
115
+ },
116
+ {
117
+ "model": "yolov10x",
118
+ "size": 640,
119
+ "mAPval_50-95": 54.4,
120
+ "params_M": 29.5,
121
+ "FLOPs_B": 160.4
122
+ }
123
+ ]
124
  }
requirements.txt CHANGED
@@ -13,7 +13,7 @@ scipy>=1.4.1
13
  torch>=1.7.0
14
  torchvision>=0.8.1
15
  tqdm>=4.64.0
16
- ultralytics==8.2.10
17
 
18
  # Logging -------------------------------------
19
  # tensorboard>=2.4.1
 
13
  torch>=1.7.0
14
  torchvision>=0.8.1
15
  tqdm>=4.64.0
16
+ ultralytics==8.2.48
17
 
18
  # Logging -------------------------------------
19
  # tensorboard>=2.4.1
yolov10l.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0685e1e597f73aba8e56bc9651b6bb939c5b2f201dff5de0b9ef2a6207ae301
3
+ size 52425230
yolov10m.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dc78f7a88591cec1e8716b8f5c7e3aefa9206684f025d202be34439ccb329a0
3
+ size 33643667
yolov10n.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11287ed0735678e7ba1ac2a9b3098c049155b3fde123992e724c1264bcc16b6f
3
+ size 5860383
yolov10s.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65fa8332c38a7189597a268897bce6c1026f9499711b9761fb0c2c639b91d4d6
3
+ size 16623111
yolov10x.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:605982e0b2c2ff070351afc8df3ecb38c54cfd7f133813acb366f454eb79b9a6
3
+ size 64395854