Single-Rope-Contest

Running

App Files Files Community

dylanplummer commited on 8 days ago

Commit

2621d57

1 Parent(s): ed4eebf

speed optim

Browse files

Files changed (1) hide show

app.py +19 -26

app.py CHANGED Viewed

@@ -76,8 +76,8 @@ def preprocess_image(img, img_size):
 def run_inference(batch_X):
     global ort_sess
-    batch_X = torch.cat(batch_X)
-    return ort_sess.run(None, {'video': batch_X.numpy()})
 def sigmoid(x):
@@ -208,17 +208,11 @@ def count_phases(phase_sin, phase_cos, threshold=0.5):
         count: Number of phase transitions
         phase_indices: Indices where transitions occur
     """
-    phase_indices = []
-    count = 0
-    for i in range(1, len(phase_sin)):
-        # Check if the sine and cosine phases cross each other
-        if (phase_sin[i-1] < threshold and phase_sin[i] >= threshold) or \
-           (phase_sin[i-1] >= threshold and phase_sin[i] < threshold):
-            # Check if the cosine phase crosses the threshold
-            if (phase_cos[i-1] < threshold and phase_cos[i] >= threshold) or \
-            (phase_cos[i-1] >= threshold and phase_cos[i] < threshold):
-                phase_indices.append(i)
-                count += 1
     return count, phase_indices
@@ -263,7 +257,7 @@ def inference(in_video, use_60fps,
             frame = all_frames[-1]  # padding will be with last frame
             break
-        frame = cv2.cvtColor(np.uint8(frame), cv2.COLOR_BGR2RGB)
         # add square padding with opencv
         #frame = square_pad_opencv(frame)
         # frame_center_x = frame.shape[1] // 2
@@ -274,7 +268,7 @@ def inference(in_video, use_60fps,
         # crop_x = frame_center_x - IMG_SIZE // 2
         # crop_y = frame_center_y - IMG_SIZE // 2
         # frame = frame[crop_y:crop_y+IMG_SIZE, crop_x:crop_x+IMG_SIZE]
-        frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_CUBIC)
         all_frames.append(frame)
     cap.release()
@@ -294,21 +288,20 @@ def inference(in_video, use_60fps,
     batch_list = []
     idx_list = []
     inference_futures = []
-    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
         for i in progress.tqdm(range(0, length + stride_length - stride_pad, stride_length)):
             batch = all_frames[i:i + seq_len]
-            Xlist = []
-            preprocess_tasks = [(idx, executor.submit(preprocess_image, img, IMG_SIZE)) for idx, img in enumerate(batch)]
-            for idx, future in sorted(preprocess_tasks, key=lambda x: x[0]):
-                Xlist.append(future.result())
-            if len(Xlist) < seq_len:
-                for _ in range(seq_len - len(Xlist)):
-                    Xlist.append(Xlist[-1])
-            X = torch.cat(Xlist)
-            X *= 255
-            batch_list.append(X.unsqueeze(0))
             idx_list.append(i)
             if len(batch_list) == batch_size:

 def run_inference(batch_X):
     global ort_sess
+    batch_X = np.concatenate(batch_X, axis=0)
+    return ort_sess.run(None, {'video': batch_X})
 def sigmoid(x):
         count: Number of phase transitions
         phase_indices: Indices where transitions occur
     """
+    sin_crosses = (phase_sin[:-1] < threshold) != (phase_sin[1:] < threshold)
+    cos_crosses = (phase_cos[:-1] < threshold) != (phase_cos[1:] < threshold)
+    both_cross = sin_crosses & cos_crosses
+    phase_indices = (np.where(both_cross)[0] + 1).tolist()
+    count = len(phase_indices)
     return count, phase_indices
             frame = all_frames[-1]  # padding will be with last frame
             break
+        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         # add square padding with opencv
         #frame = square_pad_opencv(frame)
         # frame_center_x = frame.shape[1] // 2
         # crop_x = frame_center_x - IMG_SIZE // 2
         # crop_y = frame_center_y - IMG_SIZE // 2
         # frame = frame[crop_y:crop_y+IMG_SIZE, crop_x:crop_x+IMG_SIZE]
+        frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_LINEAR)
         all_frames.append(frame)
     cap.release()
     batch_list = []
     idx_list = []
     inference_futures = []
+    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
         for i in progress.tqdm(range(0, length + stride_length - stride_pad, stride_length)):
             batch = all_frames[i:i + seq_len]
+            if len(batch) < seq_len:
+                batch = batch + [batch[-1]] * (seq_len - len(batch))
+            # Vectorized preprocessing: stack, transpose HWC->CHW, convert to float32
+            # (replaces per-frame PIL conversion + torchvision ToTensor + X*=255 undo)
+            X = np.ascontiguousarray(
+                np.stack(batch).transpose(0, 3, 1, 2),
+                dtype=np.float32
+            )
+            batch_list.append(X[np.newaxis])  # add batch dim: (1, seq_len, 3, H, W)
             idx_list.append(i)
             if len(batch_list) == batch_size: