Arizal Firdaus Bagus Pratama commited on
Commit
370c4af
·
verified ·
1 Parent(s): 196408b

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +131 -0
  2. requirements.txt +7 -0
  3. sort.py +330 -0
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import torch
4
+ from transformers import AutoImageProcessor, AutoModelForObjectDetection
5
+ import cv2
6
+ from PIL import Image
7
+ import numpy as np
8
+ import gradio as gr
9
+ import os
10
+
11
+ # Import the Sort class from the local 'sort.py' file
12
+ from sort import Sort
13
+
14
+ # --- LOAD MODELS AND TRACKER ONCE (PENTING!) ---
15
+ # This part runs only once when the app starts, so we don't reload the model for every user.
16
+ print("Loading model and processor...")
17
+ model_checkpoint = "facebook/detr-resnet-50"
18
+ image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
19
+ model = AutoModelForObjectDetection.from_pretrained(
20
+ model_checkpoint,
21
+ trust_remote_code=True
22
+ )
23
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
+ model.to(device)
25
+ print("Model loaded successfully.")
26
+ # ---------------------------------------------------
27
+
28
+ def iou(boxA, boxB):
29
+ # Standard IoU calculation
30
+ xA = max(boxA[0], boxB[0])
31
+ yA = max(boxA[1], boxB[1])
32
+ xB = min(boxA[2], boxB[2])
33
+ yB = min(boxA[3], boxB[3])
34
+ interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
35
+ boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
36
+ boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
37
+ iou_score = interArea / float(boxAArea + boxBArea - interArea)
38
+ return iou_score
39
+
40
+ # --- THE MAIN PROCESSING FUNCTION ---
41
+ def process_video(input_video_path):
42
+ # Initialize tracker and counters for each new video
43
+ tracker = Sort(min_hits=1, iou_threshold=0.3)
44
+ total_counts = {'person': 0, 'bicycle': 0, 'car': 0, 'motorcycle': 0}
45
+ counted_ids = set()
46
+
47
+ # Define the output path for the processed video
48
+ output_video_path = "output.mp4"
49
+
50
+ cap = cv2.VideoCapture(input_video_path)
51
+ if not cap.isOpened():
52
+ raise gr.Error(f"Could not open video file.")
53
+
54
+ frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
55
+ frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
56
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
57
+
58
+ # Use 'mp4v' codec which is widely compatible
59
+ out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))
60
+
61
+ while True:
62
+ ret, frame = cap.read()
63
+ if not ret:
64
+ break
65
+
66
+ # --- (Logic from our notebook goes here) ---
67
+ pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
68
+ inputs = image_processor(images=pil_image, return_tensors="pt").to(device)
69
+ with torch.no_grad():
70
+ outputs = model(**inputs)
71
+ target_sizes = torch.tensor([pil_image.size[::-1]])
72
+ results = image_processor.post_process_object_detection(outputs, threshold=0.6, target_sizes=target_sizes)[0]
73
+
74
+ detections_for_sort = []
75
+ original_detections = []
76
+ for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
77
+ label_name = model.config.id2label[label.item()]
78
+ if label_name in total_counts:
79
+ box_list = box.tolist()
80
+ detections_for_sort.append([box_list[0], box_list[1], box_list[2], box_list[3], score.item()])
81
+ original_detections.append({'box': box_list, 'label': label_name})
82
+
83
+ tracked_objects_raw = []
84
+ if len(detections_for_sort) > 0:
85
+ tracked_objects_raw = tracker.update(np.array(detections_for_sort))
86
+
87
+ for obj in tracked_objects_raw:
88
+ x1, y1, x2, y2, obj_id = [int(val) for val in obj]
89
+ best_iou = 0
90
+ best_label = None
91
+ for det in original_detections:
92
+ iou_score = iou([x1, y1, x2, y2], det['box'])
93
+ if iou_score > best_iou:
94
+ best_iou = iou_score
95
+ best_label = det['label']
96
+
97
+ if best_label and obj_id not in counted_ids:
98
+ total_counts[best_label] += 1
99
+ counted_ids.add(obj_id)
100
+
101
+ if best_label:
102
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
103
+ cv2.putText(frame, f'{best_label} ID: {obj_id}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
104
+
105
+ y_offset = 30
106
+ for obj_name, count in total_counts.items():
107
+ text = f'Total {obj_name.capitalize()}: {count}'
108
+ cv2.putText(frame, text, (15, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 5)
109
+ cv2.putText(frame, text, (15, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
110
+ y_offset += 30
111
+
112
+ out.write(frame)
113
+
114
+ cap.release()
115
+ out.release()
116
+
117
+ # Return the path to the processed video
118
+ return output_video_path
119
+
120
+ # --- GRADIO INTERFACE ---
121
+ title = "Real-Time Object Tracking & Counting with DETR and SORT"
122
+ description = "Upload a video to see object detection and tracking in action. This demo uses Facebook's DETR model for detection and the SORT algorithm to assign unique IDs and count objects. For the full code, visit the associated GitHub repo."
123
+
124
+ gr.Interface(
125
+ fn=process_video,
126
+ inputs=gr.Video(label="Input Video"),
127
+ outputs=gr.Video(label="Processed Video"),
128
+ title=title,
129
+ description=description,
130
+ examples=[['5402016-hd_1920_1080_30fps.mp4']] # Tambahkan video contoh Anda ke repo
131
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ opencv-python-headless
4
+ filterpy
5
+ scikit-image
6
+ gradio
7
+ timm
sort.py ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SORT: A Simple, Online and Realtime Tracker
3
+ Copyright (C) 2016-2020 Alex Bewley alex@bewley.ai
4
+
5
+ This program is free software: you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation, either version 3 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ """
18
+ from __future__ import print_function
19
+
20
+ import os
21
+ import numpy as np
22
+ import matplotlib
23
+ matplotlib.use('TkAgg')
24
+ import matplotlib.pyplot as plt
25
+ import matplotlib.patches as patches
26
+ from skimage import io
27
+
28
+ import glob
29
+ import time
30
+ import argparse
31
+ from filterpy.kalman import KalmanFilter
32
+
33
+ np.random.seed(0)
34
+
35
+
36
+ def linear_assignment(cost_matrix):
37
+ try:
38
+ import lap
39
+ _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
40
+ return np.array([[y[i],i] for i in x if i >= 0]) #
41
+ except ImportError:
42
+ from scipy.optimize import linear_sum_assignment
43
+ x, y = linear_sum_assignment(cost_matrix)
44
+ return np.array(list(zip(x, y)))
45
+
46
+
47
+ def iou_batch(bb_test, bb_gt):
48
+ """
49
+ From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2]
50
+ """
51
+ bb_gt = np.expand_dims(bb_gt, 0)
52
+ bb_test = np.expand_dims(bb_test, 1)
53
+
54
+ xx1 = np.maximum(bb_test[..., 0], bb_gt[..., 0])
55
+ yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1])
56
+ xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2])
57
+ yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3])
58
+ w = np.maximum(0., xx2 - xx1)
59
+ h = np.maximum(0., yy2 - yy1)
60
+ wh = w * h
61
+ o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1])
62
+ + (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh)
63
+ return(o)
64
+
65
+
66
+ def convert_bbox_to_z(bbox):
67
+ """
68
+ Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
69
+ [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
70
+ the aspect ratio
71
+ """
72
+ w = bbox[2] - bbox[0]
73
+ h = bbox[3] - bbox[1]
74
+ x = bbox[0] + w/2.
75
+ y = bbox[1] + h/2.
76
+ s = w * h #scale is just area
77
+ r = w / float(h)
78
+ return np.array([x, y, s, r]).reshape((4, 1))
79
+
80
+
81
+ def convert_x_to_bbox(x,score=None):
82
+ """
83
+ Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
84
+ [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
85
+ """
86
+ w = np.sqrt(x[2] * x[3])
87
+ h = x[2] / w
88
+ if(score==None):
89
+ return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
90
+ else:
91
+ return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
92
+
93
+
94
+ class KalmanBoxTracker(object):
95
+ """
96
+ This class represents the internal state of individual tracked objects observed as bbox.
97
+ """
98
+ count = 0
99
+ def __init__(self,bbox):
100
+ """
101
+ Initialises a tracker using initial bounding box.
102
+ """
103
+ #define constant velocity model
104
+ self.kf = KalmanFilter(dim_x=7, dim_z=4)
105
+ self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0], [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
106
+ self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
107
+
108
+ self.kf.R[2:,2:] *= 10.
109
+ self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
110
+ self.kf.P *= 10.
111
+ self.kf.Q[-1,-1] *= 0.01
112
+ self.kf.Q[4:,4:] *= 0.01
113
+
114
+ self.kf.x[:4] = convert_bbox_to_z(bbox)
115
+ self.time_since_update = 0
116
+ self.id = KalmanBoxTracker.count
117
+ KalmanBoxTracker.count += 1
118
+ self.history = []
119
+ self.hits = 0
120
+ self.hit_streak = 0
121
+ self.age = 0
122
+
123
+ def update(self,bbox):
124
+ """
125
+ Updates the state vector with observed bbox.
126
+ """
127
+ self.time_since_update = 0
128
+ self.history = []
129
+ self.hits += 1
130
+ self.hit_streak += 1
131
+ self.kf.update(convert_bbox_to_z(bbox))
132
+
133
+ def predict(self):
134
+ """
135
+ Advances the state vector and returns the predicted bounding box estimate.
136
+ """
137
+ if((self.kf.x[6]+self.kf.x[2])<=0):
138
+ self.kf.x[6] *= 0.0
139
+ self.kf.predict()
140
+ self.age += 1
141
+ if(self.time_since_update>0):
142
+ self.hit_streak = 0
143
+ self.time_since_update += 1
144
+ self.history.append(convert_x_to_bbox(self.kf.x))
145
+ return self.history[-1]
146
+
147
+ def get_state(self):
148
+ """
149
+ Returns the current bounding box estimate.
150
+ """
151
+ return convert_x_to_bbox(self.kf.x)
152
+
153
+
154
+ def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
155
+ """
156
+ Assigns detections to tracked object (both represented as bounding boxes)
157
+
158
+ Returns 3 lists of matches, unmatched_detections and unmatched_trackers
159
+ """
160
+ if(len(trackers)==0):
161
+ return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
162
+
163
+ iou_matrix = iou_batch(detections, trackers)
164
+
165
+ if min(iou_matrix.shape) > 0:
166
+ a = (iou_matrix > iou_threshold).astype(np.int32)
167
+ if a.sum(1).max() == 1 and a.sum(0).max() == 1:
168
+ matched_indices = np.stack(np.where(a), axis=1)
169
+ else:
170
+ matched_indices = linear_assignment(-iou_matrix)
171
+ else:
172
+ matched_indices = np.empty(shape=(0,2))
173
+
174
+ unmatched_detections = []
175
+ for d, det in enumerate(detections):
176
+ if(d not in matched_indices[:,0]):
177
+ unmatched_detections.append(d)
178
+ unmatched_trackers = []
179
+ for t, trk in enumerate(trackers):
180
+ if(t not in matched_indices[:,1]):
181
+ unmatched_trackers.append(t)
182
+
183
+ #filter out matched with low IOU
184
+ matches = []
185
+ for m in matched_indices:
186
+ if(iou_matrix[m[0], m[1]]<iou_threshold):
187
+ unmatched_detections.append(m[0])
188
+ unmatched_trackers.append(m[1])
189
+ else:
190
+ matches.append(m.reshape(1,2))
191
+ if(len(matches)==0):
192
+ matches = np.empty((0,2),dtype=int)
193
+ else:
194
+ matches = np.concatenate(matches,axis=0)
195
+
196
+ return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
197
+
198
+
199
+ class Sort(object):
200
+ def __init__(self, max_age=1, min_hits=3, iou_threshold=0.3):
201
+ """
202
+ Sets key parameters for SORT
203
+ """
204
+ self.max_age = max_age
205
+ self.min_hits = min_hits
206
+ self.iou_threshold = iou_threshold
207
+ self.trackers = []
208
+ self.frame_count = 0
209
+
210
+ def update(self, dets=np.empty((0, 5))):
211
+ """
212
+ Params:
213
+ dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
214
+ Requires: this method must be called once for each frame even with empty detections (use np.empty((0, 5)) for frames without detections).
215
+ Returns the a similar array, where the last column is the object ID.
216
+
217
+ NOTE: The number of objects returned may differ from the number of detections provided.
218
+ """
219
+ self.frame_count += 1
220
+ # get predicted locations from existing trackers.
221
+ trks = np.zeros((len(self.trackers), 5))
222
+ to_del = []
223
+ ret = []
224
+ for t, trk in enumerate(trks):
225
+ pos = self.trackers[t].predict()[0]
226
+ trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
227
+ if np.any(np.isnan(pos)):
228
+ to_del.append(t)
229
+ trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
230
+ for t in reversed(to_del):
231
+ self.trackers.pop(t)
232
+ matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets,trks, self.iou_threshold)
233
+
234
+ # update matched trackers with assigned detections
235
+ for m in matched:
236
+ self.trackers[m[1]].update(dets[m[0], :])
237
+
238
+ # create and initialise new trackers for unmatched detections
239
+ for i in unmatched_dets:
240
+ trk = KalmanBoxTracker(dets[i,:])
241
+ self.trackers.append(trk)
242
+ i = len(self.trackers)
243
+ for trk in reversed(self.trackers):
244
+ d = trk.get_state()[0]
245
+ if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
246
+ ret.append(np.concatenate((d,[trk.id+1])).reshape(1,-1)) # +1 as MOT benchmark requires positive
247
+ i -= 1
248
+ # remove dead tracklet
249
+ if(trk.time_since_update > self.max_age):
250
+ self.trackers.pop(i)
251
+ if(len(ret)>0):
252
+ return np.concatenate(ret)
253
+ return np.empty((0,5))
254
+
255
+ def parse_args():
256
+ """Parse input arguments."""
257
+ parser = argparse.ArgumentParser(description='SORT demo')
258
+ parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
259
+ parser.add_argument("--seq_path", help="Path to detections.", type=str, default='data')
260
+ parser.add_argument("--phase", help="Subdirectory in seq_path.", type=str, default='train')
261
+ parser.add_argument("--max_age",
262
+ help="Maximum number of frames to keep alive a track without associated detections.",
263
+ type=int, default=1)
264
+ parser.add_argument("--min_hits",
265
+ help="Minimum number of associated detections before track is initialised.",
266
+ type=int, default=3)
267
+ parser.add_argument("--iou_threshold", help="Minimum IOU for match.", type=float, default=0.3)
268
+ args = parser.parse_args()
269
+ return args
270
+
271
+ if __name__ == '__main__':
272
+ # all train
273
+ args = parse_args()
274
+ display = args.display
275
+ phase = args.phase
276
+ total_time = 0.0
277
+ total_frames = 0
278
+ colours = np.random.rand(32, 3) #used only for display
279
+ if(display):
280
+ if not os.path.exists('mot_benchmark'):
281
+ print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
282
+ exit()
283
+ plt.ion()
284
+ fig = plt.figure()
285
+ ax1 = fig.add_subplot(111, aspect='equal')
286
+
287
+ if not os.path.exists('output'):
288
+ os.makedirs('output')
289
+ pattern = os.path.join(args.seq_path, phase, '*', 'det', 'det.txt')
290
+ for seq_dets_fn in glob.glob(pattern):
291
+ mot_tracker = Sort(max_age=args.max_age,
292
+ min_hits=args.min_hits,
293
+ iou_threshold=args.iou_threshold) #create instance of the SORT tracker
294
+ seq_dets = np.loadtxt(seq_dets_fn, delimiter=',')
295
+ seq = seq_dets_fn[pattern.find('*'):].split(os.path.sep)[0]
296
+
297
+ with open(os.path.join('output', '%s.txt'%(seq)),'w') as out_file:
298
+ print("Processing %s."%(seq))
299
+ for frame in range(int(seq_dets[:,0].max())):
300
+ frame += 1 #detection and frame numbers begin at 1
301
+ dets = seq_dets[seq_dets[:, 0]==frame, 2:7]
302
+ dets[:, 2:4] += dets[:, 0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
303
+ total_frames += 1
304
+
305
+ if(display):
306
+ fn = os.path.join('mot_benchmark', phase, seq, 'img1', '%06d.jpg'%(frame))
307
+ im =io.imread(fn)
308
+ ax1.imshow(im)
309
+ plt.title(seq + ' Tracked Targets')
310
+
311
+ start_time = time.time()
312
+ trackers = mot_tracker.update(dets)
313
+ cycle_time = time.time() - start_time
314
+ total_time += cycle_time
315
+
316
+ for d in trackers:
317
+ print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file)
318
+ if(display):
319
+ d = d.astype(np.int32)
320
+ ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:]))
321
+
322
+ if(display):
323
+ fig.canvas.flush_events()
324
+ plt.draw()
325
+ ax1.cla()
326
+
327
+ print("Total Tracking took: %.3f seconds for %d frames or %.1f FPS" % (total_time, total_frames, total_frames / total_time))
328
+
329
+ if(display):
330
+ print("Note: to get real runtime results run without the option: --display")