Single-Rope-Contest-Vol3

Sleeping

App Files Files Community

dylanplummer commited on Jan 27, 2025

Commit

1431cde

1 Parent(s): 392e794

optimize inference

Browse files

Files changed (1) hide show

app.py +100 -103

app.py CHANGED Viewed

@@ -4,10 +4,10 @@ from PIL import Image
 import os
 import cv2
 import math
-import spaces
 import matplotlib
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 from scipy.signal import medfilt, find_peaks
 from functools import partial
 from passlib.hash import pbkdf2_sha256
@@ -26,15 +26,20 @@ from hls_download import download_clips
 plt.style.use('dark_background')
 onnx_file = hf_hub_download(repo_id="dylanplummer/ropenet", filename="nextjump.onnx", repo_type="model", token=os.environ['DATASET_SECRET'])
-# model_xml = hf_hub_download(repo_id="dylanplummer/ropenet", filename="model.xml", repo_type="model", token=os.environ['DATASET_SECRET'])
-# hf_hub_download(repo_id="dylanplummer/ropenet", filename="model.mapping", repo_type="model", token=os.environ['DATASET_SECRET'])
-#model_xml = "model_ir/model.xml"
-# ie = Core()
-# model_ir = ie.read_model(model=model_xml)
-# config = {"PERFORMANCE_HINT": "LATENCY"}
-# compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU", config=config)
 class SquarePad:
@@ -46,52 +51,72 @@ class SquarePad:
 		vp = int((max_wh - h) / 2)
 		padding = (hp, vp, hp, vp)
 		return F.pad(image, padding, 0, 'constant')
 def sigmoid(x):
     return 1 / (1 + np.exp(-x))
-@spaces.GPU()
 def inference(stream_url, start_time, end_time, count_only_api, api_key,
               img_size=256, seq_len=64, stride_length=32, stride_pad=3, batch_size=4,
               miss_threshold=0.8, marks_threshold=0.5, median_pred_filter=True, center_crop=True, both_feet=True,
               api_call=False,
               progress=gr.Progress()):
-    progress(0, desc="Starting...")
-    x = download_clips(stream_url, os.getcwd(), start_time, end_time)
-    # check if GPU is available
-    if torch.cuda.is_available():
-        providers = [("CUDAExecutionProvider", {"device_id": torch.cuda.current_device(),
-                                                "user_compute_stream": str(torch.cuda.current_stream().cuda_stream)})]
-        sess_options = ort.SessionOptions()
-        ort_sess = ort.InferenceSession(onnx_file, sess_options=sess_options, providers=providers)
-    else:
-        ort_sess = ort.InferenceSession(onnx_file)
-    #api = HfApi(token=os.environ['DATASET_SECRET'])
-    #out_file = str(uuid.uuid1())
     has_access = False
     if api_call:
         has_access = pbkdf2_sha256.verify(os.environ['DEV_API_TOKEN'], api_key)
         if not has_access:
             return "Invalid API Key"
-    cap = cv2.VideoCapture(x)
     length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     period_length_overlaps = np.zeros(length + seq_len)
     fps = int(cap.get(cv2.CAP_PROP_FPS))
     seconds = length / fps
     all_frames = []
     frame_i = 1
     while cap.isOpened():
         ret, frame = cap.read()
         if ret is False:
             frame = all_frames[-1]  # padding will be with last frame
             break
         frame = cv2.cvtColor(np.uint8(frame), cv2.COLOR_BGR2RGB)
-        img = Image.fromarray(frame)
-        all_frames.append(img)
         frame_i += 1
     cap.release()
@@ -106,47 +131,45 @@ def inference(stream_url, start_time, end_time, count_only_api, api_key,
         all_frames.append(all_frames[-1])
     batch_list = []
     idx_list = []
-    for i in tqdm(range(0, length + stride_length - stride_pad, stride_length)):
-        batch = all_frames[i:i + seq_len]
-        Xlist = []
-        print('Preprocessing...')
-        for img in batch:
-            transforms_list = []
-            # if center_crop:
-            #     if width > height:
-            #         transforms_list.append(transforms.Resize((int(width / (height / img_size)), img_size)))
-            #     else:
-            #         transforms_list.append(transforms.Resize((img_size, int(height / (width / img_size)))))
-            #     transforms_list.append(transforms.CenterCrop((img_size, img_size)))
-            # else:
-            transforms_list.append(SquarePad())
-            transforms_list.append(transforms.Resize((img_size, img_size), interpolation=Image.BICUBIC))
-            transforms_list += [
-                transforms.ToTensor()]
-                #transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]
-            preprocess = transforms.Compose(transforms_list)
-            frameTensor = preprocess(img).unsqueeze(0)
-            Xlist.append(frameTensor)
-        if len(Xlist) < seq_len:
-            for _ in range(seq_len - len(Xlist)):
-                Xlist.append(Xlist[-1])
-        X = torch.cat(Xlist)
-        X *= 255
-        batch_list.append(X.unsqueeze(0))
-        idx_list.append(i)
-        print('Running inference...')
-        if len(batch_list) == batch_size:
-            batch_X = torch.cat(batch_list)
-            outputs = ort_sess.run(None, {'video': batch_X.numpy()})
-            y1pred = outputs[0]
-            y2pred = outputs[1]
-            y3pred = outputs[2]
-            y4pred = outputs[3]
-            for y1, y2, y3, y4, idx in zip(y1pred, y2pred, y3pred, y4pred, idx_list):
                 periodLength = y1.squeeze()
                 periodicity = y2.squeeze()
                 marks = y3.squeeze()
@@ -157,30 +180,6 @@ def inference(stream_url, start_time, end_time, count_only_api, api_key,
                 event_type_logits[idx:idx+seq_len] += event_type
                 period_length_overlaps[idx:idx+seq_len] += 1
                 event_type_logit_overlaps[idx:idx+seq_len] += 1
-            batch_list = []
-            idx_list = []
-        progress(i / (length + stride_length - stride_pad), desc="Processing...")
-    if len(batch_list) != 0:  # still some leftover frames
-        while len(batch_list) != batch_size:
-            batch_list.append(batch_list[-1])
-            idx_list.append(idx_list[-1])
-        batch_X = torch.cat(batch_list)
-        outputs = ort_sess.run(None, {'video': batch_X.numpy()})
-        y1pred = outputs[0]
-        y2pred = outputs[1]
-        y3pred = outputs[2]
-        y4pred = outputs[3]
-        for y1, y2, y3, y4, idx in zip(y1pred, y2pred, y3pred, y4pred, idx_list):
-            periodLength = y1.squeeze()
-            periodicity = y2.squeeze()
-            marks = y3.squeeze()
-            event_type = y4.squeeze()
-            period_lengths[idx:idx+seq_len] += periodLength
-            periodicities[idx:idx+seq_len] += periodicity
-            full_marks[idx:idx+seq_len] += marks
-            event_type_logits[idx:idx+seq_len] += event_type
-            period_length_overlaps[idx:idx+seq_len] += 1
-            event_type_logit_overlaps[idx:idx+seq_len] += 1
     periodLength = np.divide(period_lengths, period_length_overlaps, where=period_length_overlaps!=0)[:length]
     periodicity = np.divide(periodicities, period_length_overlaps, where=period_length_overlaps!=0)[:length]
@@ -196,7 +195,6 @@ def inference(stream_url, start_time, end_time, count_only_api, api_key,
         periodLength = medfilt(periodLength, 5)
     periodicity = sigmoid(periodicity)
     full_marks = sigmoid(full_marks)
-    #full_marks_mask = np.int32(full_marks > marks_threshold)
     pred_marks_peaks, _ = find_peaks(full_marks, distance=3, height=marks_threshold)
     full_marks_mask = np.zeros(len(full_marks))
     full_marks_mask[pred_marks_peaks] = 1
@@ -328,24 +326,15 @@ def inference(stream_url, start_time, end_time, count_only_api, api_key,
                  labels={'x': 'event type', 'y': 'probability'},
                  range_y=[0, 1])
-    return x, count_msg, fig, hist, bar
-DESCRIPTION = '# NextJump 🦘'
-DESCRIPTION += '\n## AI Counting for Competitive Jump Rope'
-DESCRIPTION += '\nDemo created by [Dylan Plummer](https://dylan-plummer.github.io/). Check out the [NextJump iOS app](https://apps.apple.com/us/app/nextjump-jump-rope-counter/id6451026115).'
-with gr.Blocks(theme='WeixuanYuan/Soft_dark') as demo:
-    gr.Markdown(DESCRIPTION)
-    # in_video = gr.PlayableVideo(label="Input Video", elem_id='input-video', format='mp4',
-    #                             width=400, height=400, interactive=True, container=True,
-    #                             max_length=150)
     with gr.Row():
-        in_stream_url = gr.Textbox(label="Stream URL", elem_id='stream-url', visible=True)
         with gr.Column():
-            in_stream_start = gr.Textbox(label="Start Time", elem_id='stream-start', visible=True)
         with gr.Column():
             in_stream_end = gr.Textbox(label="End Time", elem_id='stream-end', visible=True)
         with gr.Column(min_width=480):
             out_video = gr.PlayableVideo(label="Video Clip", elem_id='output-video', format='mp4', width=400, height=400)
@@ -376,7 +365,15 @@ with gr.Blocks(theme='WeixuanYuan/Soft_dark') as demo:
     run_button.click(demo_inference, [in_stream_url, in_stream_start, in_stream_end], outputs=[out_video, out_text, out_plot, out_hist, out_event_type_dist])
     api_inference = partial(inference, api_call=True)
     api_dummy_button.click(api_inference, [in_stream_url, in_stream_start, in_stream_end, count_only, api_token], outputs=[period_length], api_name='inference')
 if __name__ == "__main__":
     demo.queue(api_open=True, max_size=15).launch(share=False)

 import os
 import cv2
 import math
 import matplotlib
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt
+import concurrent.futures
 from scipy.signal import medfilt, find_peaks
 from functools import partial
 from passlib.hash import pbkdf2_sha256
 plt.style.use('dark_background')
+IMG_SIZE = 256
 onnx_file = hf_hub_download(repo_id="dylanplummer/ropenet", filename="nextjump.onnx", repo_type="model", token=os.environ['DATASET_SECRET'])
+if torch.cuda.is_available():
+    providers = [("CUDAExecutionProvider", {"device_id": torch.cuda.current_device(),
+                                            "user_compute_stream": str(torch.cuda.current_stream().cuda_stream)})]
+    sess_options = ort.SessionOptions()
+    ort_sess = ort.InferenceSession(onnx_file, sess_options=sess_options, providers=providers)
+else:
+    ort_sess = ort.InferenceSession(onnx_file)
+# warmup inference
+ort_sess.run(None, {'video': np.zeros((4, 64, 3, IMG_SIZE, IMG_SIZE), dtype=np.float32)})
 class SquarePad:
 		vp = int((max_wh - h) / 2)
 		padding = (hp, vp, hp, vp)
 		return F.pad(image, padding, 0, 'constant')
+def square_pad_opencv(image):
+    h, w = image.shape[:2]
+    max_wh = max(w, h)
+    hp = int((max_wh - w) / 2)
+    vp = int((max_wh - h) / 2)
+    return cv2.copyMakeBorder(image, vp, vp, hp, hp, cv2.BORDER_CONSTANT, value=[0, 0, 0])
+def preprocess_image(img, img_size):
+    #img = square_pad_opencv(img)
+    #img = cv2.resize(img, (img_size, img_size), interpolation=cv2.INTER_CUBIC)
+    img = Image.fromarray(img)
+    transforms_list = []
+    transforms_list.append(transforms.ToTensor())
+    preprocess = transforms.Compose(transforms_list)
+    return preprocess(img).unsqueeze(0)
+def run_inference(batch_X):
+    batch_X = torch.cat(batch_X)
+    return ort_sess.run(None, {'video': batch_X.numpy()})
 def sigmoid(x):
     return 1 / (1 + np.exp(-x))
 def inference(stream_url, start_time, end_time, count_only_api, api_key,
               img_size=256, seq_len=64, stride_length=32, stride_pad=3, batch_size=4,
               miss_threshold=0.8, marks_threshold=0.5, median_pred_filter=True, center_crop=True, both_feet=True,
               api_call=False,
               progress=gr.Progress()):
+    progress(0, desc="Downloading clip...")
+    in_video = download_clips(stream_url, os.getcwd(), start_time, end_time)
+    progress(0, desc="Running inference...")
     has_access = False
     if api_call:
         has_access = pbkdf2_sha256.verify(os.environ['DEV_API_TOKEN'], api_key)
         if not has_access:
             return "Invalid API Key"
+    cap = cv2.VideoCapture(in_video)
     length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     period_length_overlaps = np.zeros(length + seq_len)
     fps = int(cap.get(cv2.CAP_PROP_FPS))
     seconds = length / fps
     all_frames = []
     frame_i = 1
+    resize_size = max(frame_width, frame_height)
     while cap.isOpened():
         ret, frame = cap.read()
         if ret is False:
             frame = all_frames[-1]  # padding will be with last frame
             break
         frame = cv2.cvtColor(np.uint8(frame), cv2.COLOR_BGR2RGB)
+        # add square padding with opencv
+        #frame = square_pad_opencv(frame)
+        frame = cv2.resize(frame, (resize_size, resize_size), interpolation=cv2.INTER_CUBIC)
+        frame_center_x = frame.shape[1] // 2
+        frame_center_y = frame.shape[0] // 2
+        crop_x = frame_center_x - img_size // 2
+        crop_y = frame_center_y - img_size // 2
+        frame = frame[crop_y:crop_y+img_size, crop_x:crop_x+img_size]
+        all_frames.append(frame)
         frame_i += 1
     cap.release()
         all_frames.append(all_frames[-1])
     batch_list = []
     idx_list = []
+    inference_futures = []
+    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
+        for i in tqdm(range(0, length + stride_length - stride_pad, stride_length)):
+            batch = all_frames[i:i + seq_len]
+            Xlist = []
+            preprocess_tasks = [(idx, executor.submit(preprocess_image, img, img_size)) for idx, img in enumerate(batch)]
+            for idx, future in sorted(preprocess_tasks, key=lambda x: x[0]):
+                Xlist.append(future.result())
+            if len(Xlist) < seq_len:
+                for _ in range(seq_len - len(Xlist)):
+                    Xlist.append(Xlist[-1])
+            X = torch.cat(Xlist)
+            X *= 255
+            batch_list.append(X.unsqueeze(0))
+            idx_list.append(i)
+            if len(batch_list) == batch_size:
+                future = executor.submit(run_inference, batch_list)
+                inference_futures.append((batch_list, idx_list, future))
+                batch_list = []
+                idx_list = []
+        # Process any remaining batches
+        if batch_list:
+            while len(batch_list) != batch_size:
+                batch_list.append(batch_list[-1])
+                idx_list.append(idx_list[-1])
+            future = executor.submit(run_inference, batch_list)
+            inference_futures.append((batch_list, idx_list, future))
+        # Collect and process the inference results
+        for batch_list, idx_list, future in inference_futures:
+            outputs = future.result()
+            y1_out = outputs[0]
+            y2_out = outputs[1]
+            y3_out = outputs[2]
+            y4_out = outputs[3]
+            for y1, y2, y3, y4, idx in zip(y1_out, y2_out, y3_out, y4_out, idx_list):
                 periodLength = y1.squeeze()
                 periodicity = y2.squeeze()
                 marks = y3.squeeze()
                 event_type_logits[idx:idx+seq_len] += event_type
                 period_length_overlaps[idx:idx+seq_len] += 1
                 event_type_logit_overlaps[idx:idx+seq_len] += 1
     periodLength = np.divide(period_lengths, period_length_overlaps, where=period_length_overlaps!=0)[:length]
     periodicity = np.divide(periodicities, period_length_overlaps, where=period_length_overlaps!=0)[:length]
         periodLength = medfilt(periodLength, 5)
     periodicity = sigmoid(periodicity)
     full_marks = sigmoid(full_marks)
     pred_marks_peaks, _ = find_peaks(full_marks, distance=3, height=marks_threshold)
     full_marks_mask = np.zeros(len(full_marks))
     full_marks_mask[pred_marks_peaks] = 1
                  labels={'x': 'event type', 'y': 'probability'},
                  range_y=[0, 1])
+    return in_video, count_msg, fig, hist, bar
+with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
+            in_stream_url = gr.Textbox(label="Stream URL", elem_id='stream-url', visible=True)
         with gr.Column():
+            in_stream_start = gr.Textbox(label="Start Time", elem_id='stream-start', visible=True)
             in_stream_end = gr.Textbox(label="End Time", elem_id='stream-end', visible=True)
         with gr.Column(min_width=480):
             out_video = gr.PlayableVideo(label="Video Clip", elem_id='output-video', format='mp4', width=400, height=400)
     run_button.click(demo_inference, [in_stream_url, in_stream_start, in_stream_end], outputs=[out_video, out_text, out_plot, out_hist, out_event_type_dist])
     api_inference = partial(inference, api_call=True)
     api_dummy_button.click(api_inference, [in_stream_url, in_stream_start, in_stream_end, count_only, api_token], outputs=[period_length], api_name='inference')
+    examples = [
+        ['https://hiemdall-dev2.azurewebsites.net/api/playlist/rec_rd2FAyUo/vod', '00:43:10', '00:43:40'],
+    ]
+    gr.Examples(examples,
+                inputs=[in_stream_url, in_stream_start, in_stream_end],
+                outputs=[out_video, out_text, out_plot, out_hist, out_event_type_dist],
+                fn=demo_inference, cache_examples=os.getenv('SYSTEM') == 'spaces')
 if __name__ == "__main__":
     demo.queue(api_open=True, max_size=15).launch(share=False)