ayushsaun commited on
Commit
4fcaf7f
·
1 Parent(s): 9b02776

Rollback to original version

Browse files
Files changed (2) hide show
  1. __pycache__/inference.cpython-312.pyc +0 -0
  2. inference.py +187 -251
__pycache__/inference.cpython-312.pyc ADDED
Binary file (19.8 kB). View file
 
inference.py CHANGED
@@ -1,288 +1,224 @@
 
1
  import cv2
2
  import joblib
3
- import os
4
  import numpy as np
 
5
 
6
 
7
- class CameraMotionCompensator:
8
- def __init__(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  self.prev_frame = None
10
  self.prev_kp = None
11
  self.prev_desc = None
12
- self.orb = cv2.ORB_create(nfeatures=1000)
13
- self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
14
-
15
- def estimate_motion(self, frame):
16
  if frame is None:
17
  return np.eye(2, 3, dtype=np.float32)
18
-
19
  gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
20
  kp, desc = self.orb.detectAndCompute(gray, None)
21
-
22
- if self.prev_frame is None or desc is None or self.prev_desc is None or len(desc) < 4 or len(self.prev_desc) < 4:
23
  self.prev_frame = gray
24
  self.prev_kp = kp
25
  self.prev_desc = desc
26
  return np.eye(2, 3, dtype=np.float32)
27
-
 
 
 
28
  matches = self.matcher.match(self.prev_desc, desc)
 
29
  if len(matches) < 4:
30
  return np.eye(2, 3, dtype=np.float32)
31
-
32
- matches = sorted(matches, key=lambda x: x.distance)[:50]
33
- src = np.float32([self.prev_kp[m.queryIdx].pt for m in matches]).reshape(-1,1,2)
34
- dst = np.float32([kp[m.trainIdx].pt for m in matches]).reshape(-1,1,2)
35
-
36
- M,_ = cv2.estimateAffinePartial2D(src, dst)
37
- if M is None:
38
- M = np.eye(2,3,dtype=np.float32)
39
-
 
 
 
40
  self.prev_frame = gray
41
  self.prev_kp = kp
42
  self.prev_desc = desc
43
- return M
44
-
45
-
46
- class ImprovedSlidingWindowTracker:
47
- def __init__(self, scale_factor=2.0, overlap=0.3):
48
- self.scale_factor = scale_factor
49
- self.overlap = overlap
50
- self.sift = cv2.SIFT_create(nfeatures=2000)
51
- self.scale_levels = 3
52
- self.scale_step = 1.2
53
- index_params = dict(algorithm=1, trees=5)
54
- search_params = dict(checks=50)
55
- self.flann = cv2.FlannBasedMatcher(index_params, search_params)
56
-
57
- def generate_multiscale_windows(self, img_shape, prev_bbox, transform_matrix):
58
- x,y,w,h = map(int, prev_bbox)
59
- center = np.array([[x+w/2,y+h/2,1]],dtype=np.float32).T
60
- center = np.dot(transform_matrix, center)
61
- cx,cy = int(center[0]), int(center[1])
62
-
63
- windows=[]
64
- for s in np.linspace(1/self.scale_step, self.scale_step, self.scale_levels):
65
- ww=int(w*self.scale_factor*s)
66
- hh=int(h*self.scale_factor*s)
67
- step_x=max(1,int(ww*(1-self.overlap)//2))
68
- step_y=max(1,int(hh*(1-self.overlap)//2))
69
- for dy in range(-step_y,step_y+1,step_y):
70
- for dx in range(-step_x,step_x+1,step_x):
71
- wx=max(0,min(cx-ww//2+dx,img_shape[1]-ww))
72
- wy=max(0,min(cy-hh//2+dy,img_shape[0]-hh))
73
- if ww>10 and hh>10:
74
- windows.append((wx,wy,ww,hh))
75
- return windows
76
-
77
- def score_window(self, gray, window, template, template_desc):
78
- try:
79
- x,y,w,h = map(int,window)
80
- if y+h > gray.shape[0] or x+w > gray.shape[1]:
81
- return 0
82
- roi = gray[y:y+h,x:x+w]
83
- if roi.shape[0]<20 or roi.shape[1]<20:
84
- return 0
85
- roi = cv2.resize(roi,(template.shape[1],template.shape[0]))
86
- _,desc = self.sift.detectAndCompute(roi,None)
87
- if desc is None or template_desc is None or len(desc)==0:
88
- return 0
89
- matches = self.flann.knnMatch(template_desc,desc,k=2)
90
- good = [m for match_pair in matches if len(match_pair)==2 for m,n in [match_pair] if m.distance < 0.7*n.distance]
91
- if not good:
92
- return 0
93
- return len(good)*(1-np.mean([m.distance for m in good])/512)
94
- except:
95
- return 0
96
-
97
-
98
- class ObjectTrackerInference:
99
- def __init__(self, model_dir):
100
- print(f"Loading models from {model_dir}...")
101
- self.position_model = joblib.load(os.path.join(model_dir,'position_model.joblib'))
102
- self.size_model = joblib.load(os.path.join(model_dir,'size_model.joblib'))
103
- self.position_scaler = joblib.load(os.path.join(model_dir,'position_scaler.joblib'))
104
- self.size_scaler = joblib.load(os.path.join(model_dir,'size_scaler.joblib'))
105
- print("Models loaded successfully!")
106
 
107
- self.window_tracker = ImprovedSlidingWindowTracker()
108
- self.motion = CameraMotionCompensator()
109
- self.template = None
110
- self.template_desc = None
111
- self.prev_bbox = None
112
- self.template_update_counter = 0
113
-
114
- def local_binary_pattern(self, image):
115
- r=1;n=8
116
- out=np.zeros(image.shape)
117
- for i in range(r,image.shape[0]-r):
118
- for j in range(r,image.shape[1]-r):
119
- c=image[i,j];v=0
120
- for k in range(n):
121
- a=2*np.pi*k/n
122
- x=j+r*np.cos(a);y=i-r*np.sin(a)
123
- x1,x2=int(np.floor(x)),int(np.ceil(x))
124
- y1,y2=int(np.floor(y)),int(np.ceil(y))
125
- val=(image[y1,x1]+image[y1,x2]+image[y2,x1]+image[y2,x2])/4
126
- v|=(val>c)<<k
127
- out[i,j]=v
128
- return out
129
-
130
- def extract_features(self, frame, prev_bbox, M):
131
- gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
132
- windows = self.window_tracker.generate_multiscale_windows(frame.shape, prev_bbox, M)
133
-
134
- if self.template is None:
135
- x,y,w,h = map(int,prev_bbox)
136
- x = max(0, min(x, gray.shape[1]-w))
137
- y = max(0, min(y, gray.shape[0]-h))
138
- self.template = gray[y:y+h,x:x+w].copy()
139
- _,self.template_desc = self.window_tracker.sift.detectAndCompute(self.template,None)
140
-
141
- best_score = -1
142
- best_window = prev_bbox
143
- for w in windows:
144
- s = self.window_tracker.score_window(gray,w,self.template,self.template_desc)
145
- if s > best_score:
146
- best_score = s
147
- best_window = w
148
-
149
- x,y,w,h = map(int,best_window)
150
- x = max(0, min(x, gray.shape[1]-10))
151
- y = max(0, min(y, gray.shape[0]-10))
152
- w = min(w, gray.shape[1]-x)
153
- h = min(h, gray.shape[0]-y)
154
-
155
- roi = cv2.resize(gray[y:y+h,x:x+w],(64,64))
156
- hog = cv2.HOGDescriptor((64,64),(16,16),(8,8),(8,8),9).compute(roi).flatten()[:64]
157
- lbp = self.local_binary_pattern(roi)
158
-
159
- feat = list(hog)+[
160
- np.mean(lbp),np.std(lbp),
161
- *np.percentile(lbp,[25,50,75]),
162
- M[0,0],M[1,1],M[0,2],M[1,2],
163
- x,y,w,h
164
- ]
165
-
166
- return np.array(feat).reshape(1,-1),(x,y,w,h),windows
167
-
168
- def calculate_iou(self,a,b):
169
- x1,y1,w1,h1=a
170
- x2,y2,w2,h2=b
171
- xl=max(x1,x2);yt=max(y1,y2)
172
- xr=min(x1+w1,x2+w2);yb=min(y1+h1,y2+h2)
173
- if xr<xl or yb<yt:
174
- return 0
175
- inter=(xr-xl)*(yb-yt)
176
- return inter/(w1*h1+w2*h2-inter)
177
-
178
- def track_video(self, video_path, init_bbox, output_path='tracked_output.mp4', fps=30):
179
- print(f"Opening video: {video_path}")
180
 
181
- try:
182
- cap=cv2.VideoCapture(video_path)
183
- if not cap.isOpened():
184
- raise ValueError(f"Cannot open video: {video_path}")
185
-
186
- w,h=int(cap.get(3)),int(cap.get(4))
187
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
188
- print(f"Video: {w}x{h}, {total_frames} frames")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
191
- output_path = os.path.abspath(output_path)
192
- out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))
193
-
194
-
195
- if not out.isOpened():
196
- raise RuntimeError("VideoWriter failed to open")
 
 
 
 
197
 
198
- # Reset state
199
- self.motion.prev_frame = None
200
- self.template = None
201
- self.template_desc = None
202
- self.prev_bbox = None
203
- self.template_update_counter = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
- cur=init_bbox
206
- frame_idx=0
207
-
208
- print("Starting tracking...")
209
- while True:
210
- ret,frame=cap.read()
211
- if not ret:
212
- break
213
-
214
- M=self.motion.estimate_motion(frame)
215
- feats,search_bbox,windows=self.extract_features(frame,cur,M)
216
-
217
- pos=self.position_model.predict(self.position_scaler.transform(feats))
218
- size=self.size_model.predict(self.size_scaler.transform(feats))
219
- x = int(pos[0,0])
220
- y = int(pos[0,1])
221
- w1 = int(size[0,0])
222
- h1 = int(size[0,1])
223
-
224
- x = max(0, min(x, w - 1))
225
- y = max(0, min(y, h - 1))
226
- w1 = max(10, min(w1, w - x))
227
- h1 = max(10, min(h1, h - y))
228
-
229
- pred = [x, y, w1, h1]
230
-
231
- self.template_update_counter+=1
232
- if self.template_update_counter>=5 and self.prev_bbox is not None:
233
- if self.calculate_iou(self.prev_bbox,pred)>0.6:
234
- g=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
235
- x,y,w1,h1=pred
236
- self.template=g[y:y+h1,x:x+w1].copy()
237
- _,self.template_desc=self.window_tracker.sift.detectAndCompute(self.template,None)
238
- self.template_update_counter=0
239
-
240
- # Draw yellow search windows
241
- for wx,wy,ww,wh in windows:
242
- cv2.rectangle(frame,(wx,wy),(wx+ww,wy+wh),(0,255,255),1)
243
-
244
- # Draw green motion arrows
245
- hh,ww=frame.shape[:2]
246
- for yy in range(0,hh,32):
247
- for xx in range(0,ww,32):
248
- sp=np.array([xx,yy,1])
249
- ep=np.dot(M,sp)
250
- if abs(ep[0]-xx)>1 or abs(ep[1]-yy)>1:
251
- cv2.arrowedLine(frame,(xx,yy),(int(ep[0]),int(ep[1])),(0,255,0),1,tipLength=0.2)
252
-
253
- # Draw tracked bounding box
254
- x,y,w1,h1=pred
255
- x = max(0, min(x, w - 1))
256
- y = max(0, min(y, h - 1))
257
- w1 = max(1, min(w1, w - x))
258
- h1 = max(1, min(h1, h - y))
259
- cv2.rectangle(frame,(x,y),(x+w1,y+h1),(0,255,0),2)
260
- cv2.putText(frame,f'Frame: {frame_idx}',(10,30),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),2)
261
-
262
- out.write(frame)
263
- self.prev_bbox=pred
264
- cur=pred
265
- frame_idx+=1
266
 
267
- if frame_idx % 30 == 0:
268
- print(f"Processed {frame_idx}/{total_frames} frames")
 
269
 
270
- cap.release()
271
- out.release()
 
 
 
 
 
 
272
 
273
- print(f"Tracking complete! Saved to: {output_path}")
274
- return output_path
275
 
276
- except Exception as e:
277
- print(f"Error during tracking: {str(e)}")
278
- raise
 
 
 
 
 
279
 
280
 
281
  def main():
282
- tracker=ObjectTrackerInference('models')
283
- result = tracker.track_video('input_video.mp4',[100,100,50,50],'tracked_output.mp4')
284
- print(f"Output: {result}")
 
 
 
 
 
285
 
286
 
287
- if __name__=="__main__":
288
  main()
 
1
+ import os
2
  import cv2
3
  import joblib
 
4
  import numpy as np
5
+ from pathlib import Path
6
 
7
 
8
+ class ObjectTrackerInference:
9
+ def __init__(self, model_dir='models'):
10
+ self.model_dir = model_dir
11
+
12
+ print("Loading pre-trained models...")
13
+ self.position_model = joblib.load(os.path.join(model_dir, 'position_model.joblib'))
14
+ self.size_model = joblib.load(os.path.join(model_dir, 'size_model.joblib'))
15
+ self.position_scaler = joblib.load(os.path.join(model_dir, 'position_scaler.joblib'))
16
+ self.size_scaler = joblib.load(os.path.join(model_dir, 'size_scaler.joblib'))
17
+ print("Models loaded successfully!")
18
+
19
+ self.sift = cv2.SIFT_create(nfeatures=2000)
20
+
21
+ self.orb = cv2.ORB_create(nfeatures=1000)
22
+ self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
23
  self.prev_frame = None
24
  self.prev_kp = None
25
  self.prev_desc = None
26
+
27
+ def estimate_camera_motion(self, frame):
 
 
28
  if frame is None:
29
  return np.eye(2, 3, dtype=np.float32)
30
+
31
  gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
32
  kp, desc = self.orb.detectAndCompute(gray, None)
33
+
34
+ if self.prev_frame is None:
35
  self.prev_frame = gray
36
  self.prev_kp = kp
37
  self.prev_desc = desc
38
  return np.eye(2, 3, dtype=np.float32)
39
+
40
+ if desc is None or self.prev_desc is None or len(desc) < 4 or len(self.prev_desc) < 4:
41
+ return np.eye(2, 3, dtype=np.float32)
42
+
43
  matches = self.matcher.match(self.prev_desc, desc)
44
+
45
  if len(matches) < 4:
46
  return np.eye(2, 3, dtype=np.float32)
47
+
48
+ matches = sorted(matches, key=lambda x: x.distance)
49
+ good_matches = matches[:min(len(matches), 50)]
50
+
51
+ src_pts = np.float32([self.prev_kp[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
52
+ dst_pts = np.float32([kp[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
53
+
54
+ transform_matrix, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts)
55
+
56
+ if transform_matrix is None:
57
+ transform_matrix = np.eye(2, 3, dtype=np.float32)
58
+
59
  self.prev_frame = gray
60
  self.prev_kp = kp
61
  self.prev_desc = desc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ return transform_matrix
64
+
65
+ def local_binary_pattern(self, image, n_points=8, radius=1):
66
+ rows, cols = image.shape
67
+ output = np.zeros((rows, cols))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ for i in range(radius, rows-radius):
70
+ for j in range(radius, cols-radius):
71
+ center = image[i, j]
72
+ pattern = 0
73
+
74
+ for k in range(n_points):
75
+ angle = 2 * np.pi * k / n_points
76
+ x = j + radius * np.cos(angle)
77
+ y = i - radius * np.sin(angle)
78
+ x1, x2 = int(np.floor(x)), int(np.ceil(x))
79
+ y1, y2 = int(np.floor(y)), int(np.ceil(y))
80
+
81
+ f11 = image[y1, x1]
82
+ f12 = image[y1, x2]
83
+ f21 = image[y2, x1]
84
+ f22 = image[y2, x2]
85
+
86
+ x_weight = x - x1
87
+ y_weight = y - y1
88
+
89
+ pixel_value = (f11 * (1-x_weight) * (1-y_weight) +
90
+ f21 * (1-x_weight) * y_weight +
91
+ f12 * x_weight * (1-y_weight) +
92
+ f22 * x_weight * y_weight)
93
+
94
+ pattern |= (pixel_value > center) << k
95
+
96
+ output[i, j] = pattern
97
+
98
+ return output
99
+
100
+ def extract_features(self, frame, bbox, transform_matrix=None):
101
+ if frame is None:
102
+ return None
103
 
104
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
105
+ x, y, w, h = map(int, bbox)
106
+
107
+ x = max(0, min(x, gray.shape[1] - w))
108
+ y = max(0, min(y, gray.shape[0] - h))
109
+ w = min(w, gray.shape[1] - x)
110
+ h = min(h, gray.shape[0] - y)
111
+
112
+ roi = gray[y:y+h, x:x+w]
113
+ if roi.size == 0:
114
+ roi = gray
115
 
116
+ roi = cv2.resize(roi, (64, 64))
117
+
118
+ features = []
119
+
120
+ hog = cv2.HOGDescriptor((64,64), (16,16), (8,8), (8,8), 9)
121
+ hog_features = hog.compute(roi)
122
+ features.extend(hog_features.flatten()[:64])
123
+
124
+ lbp = self.local_binary_pattern(roi, n_points=8, radius=1)
125
+ features.extend([
126
+ np.mean(lbp),
127
+ np.std(lbp),
128
+ *np.percentile(lbp, [25, 50, 75])
129
+ ])
130
+
131
+ if transform_matrix is not None:
132
+ features.extend([
133
+ transform_matrix[0,0],
134
+ transform_matrix[1,1],
135
+ transform_matrix[0,2],
136
+ transform_matrix[1,2]
137
+ ])
138
+ else:
139
+ features.extend([1, 1, 0, 0])
140
 
141
+ features.extend([x, y, w, h])
142
+
143
+ return np.array(features).reshape(1, -1)
144
+
145
+ def predict_bbox(self, features):
146
+ features_position = self.position_scaler.transform(features)
147
+ features_size = self.size_scaler.transform(features)
148
+
149
+ position_pred = self.position_model.predict(features_position)
150
+ size_pred = self.size_model.predict(features_size)
151
+
152
+ bbox = np.hstack([position_pred, size_pred])[0]
153
+
154
+ return bbox
155
+
156
+ def track_video(self, video_path, initial_bbox, output_path='output_tracked.mp4', fps=30):
157
+ print(f"Processing video: {video_path}")
158
+
159
+ cap = cv2.VideoCapture(video_path)
160
+ if not cap.isOpened():
161
+ raise ValueError(f"Could not open video: {video_path}")
162
+
163
+ frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
164
+ frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
165
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
166
+
167
+ print(f"Video: {frame_width}x{frame_height}, {total_frames} frames")
168
+
169
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
170
+ out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))
171
+
172
+ self.prev_frame = None
173
+ self.prev_kp = None
174
+ self.prev_desc = None
175
+
176
+ current_bbox = initial_bbox
177
+ frame_idx = 0
178
+
179
+ print("Tracking object...")
180
+
181
+ while True:
182
+ ret, frame = cap.read()
183
+ if not ret:
184
+ break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
+ transform_matrix = self.estimate_camera_motion(frame)
187
+
188
+ features = self.extract_features(frame, current_bbox, transform_matrix)
189
 
190
+ if features is not None:
191
+ predicted_bbox = self.predict_bbox(features)
192
+ current_bbox = predicted_bbox
193
+
194
+ x, y, w, h = map(int, current_bbox)
195
+ cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
196
+ cv2.putText(frame, f'Frame: {frame_idx}', (10, 30),
197
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
198
 
199
+ out.write(frame)
200
+ frame_idx += 1
201
 
202
+ if frame_idx % 30 == 0:
203
+ print(f"Processed {frame_idx}/{total_frames} frames")
204
+
205
+ cap.release()
206
+ out.release()
207
+
208
+ print(f"Tracking complete! Video saved to: {output_path}")
209
+ return output_path
210
 
211
 
212
  def main():
213
+ tracker = ObjectTrackerInference(model_dir='models')
214
+
215
+ video_path = 'input_video.mp4'
216
+ initial_bbox = [100, 100, 50, 50]
217
+ output_path = 'tracked_output.mp4'
218
+
219
+ result = tracker.track_video(video_path, initial_bbox, output_path)
220
+ print(f"Done! Output: {result}")
221
 
222
 
223
+ if __name__ == "__main__":
224
  main()