Single-Rope-Contest-Vol3

Sleeping

App Files Files Community

dylanplummer commited on Oct 3, 2024

Commit

0340538

1 Parent(s): 35202d4

add warmup

Browse files

Files changed (2) hide show

app.py +373 -368
requirements.txt +1 -2

app.py CHANGED Viewed

@@ -1,369 +1,374 @@
-import gradio as gr
-import numpy as np
-from PIL import Image
-import os
-import cv2
-import math
-import matplotlib
-matplotlib.use('Agg')
-import matplotlib.pyplot as plt
-from scipy.signal import medfilt, find_peaks
-from functools import partial
-from passlib.hash import pbkdf2_sha256
-from tqdm import tqdm
-import pandas as pd
-import plotly.express as px
-import onnxruntime as ort
-import torch
-from torchvision import transforms
-import torchvision.transforms.functional as F
-from huggingface_hub import hf_hub_download
-from huggingface_hub import HfApi
-from hls_download import download_clips
-plt.style.use('dark_background')
-onnx_file = hf_hub_download(repo_id='dylanplummer/ropenet', filename='nextjump.onnx', repo_type='model', token=os.environ['DATASET_SECRET'])
-#onnx_file = hf_hub_download(repo_id='dylanplummer/ropenet', filename='nextjump_fp16.onnx', repo_type='model', token=os.environ['DATASET_SECRET'])
-# model_xml = hf_hub_download(repo_id='dylanplummer/ropenet', filename='model.xml', repo_type='model', token=os.environ['DATASET_SECRET'])
-# hf_hub_download(repo_id='dylanplummer/ropenet', filename='model.mapping', repo_type='model', token=os.environ['DATASET_SECRET'])
-#model_xml = 'model_ir/model.xml'
-# ie = Core()
-# model_ir = ie.read_model(model=model_xml)
-# config = {'PERFORMANCE_HINT': 'LATENCY'}
-# compiled_model_ir = ie.compile_model(model=model_ir, device_name='CPU', config=config)
-class SquarePad:
-    # https://discuss.pytorch.org/t/how-to-resize-and-pad-in-a-torchvision-transforms-compose/71850/9
-	def __call__(self, image):
-		w, h = image.size
-		max_wh = max(w, h)
-		hp = int((max_wh - w) / 2)
-		vp = int((max_wh - h) / 2)
-		padding = (hp, vp, hp, vp)
-		return F.pad(image, padding, 0, 'constant')
-def sigmoid(x):
-    return 1 / (1 + np.exp(-x))
-def create_transform(img_size):
-    return transforms.Compose([
-        SquarePad(),
-        transforms.Resize((img_size, img_size), interpolation=Image.BICUBIC),
-        transforms.ToTensor(),
-    ])
-def inference(stream_url, start_time, end_time, count_only_api, api_key,
-              img_size=288, seq_len=64, stride_length=32, stride_pad=3, batch_size=4,
-              miss_threshold=0.8, marks_threshold=0.5, median_pred_filter=True, center_crop=True, both_feet=True,
-              api_call=False,
-              progress=gr.Progress()):
-    progress(0, desc='Starting...')
-    x = download_clips(stream_url, os.getcwd(), start_time, end_time)
-    # check if GPU is available
-    if torch.cuda.is_available():
-        providers = ['TensorrtExecutionProvider', ('CUDAExecutionProvider', {'device_id': torch.cuda.current_device(),
-                                                'user_compute_stream': str(torch.cuda.current_stream().cuda_stream)})]
-        sess_options = ort.SessionOptions()
-        sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-        ort_sess = ort.InferenceSession(onnx_file, sess_options=sess_options, providers=providers)
-    else:
-        ort_sess = ort.InferenceSession(onnx_file)
-    #api = HfApi(token=os.environ['DATASET_SECRET'])
-    #out_file = str(uuid.uuid1())
-    has_access = False
-    if api_call:
-        has_access = pbkdf2_sha256.verify(os.environ['DEV_API_TOKEN'], api_key)
-        if not has_access:
-            return 'Invalid API Key'
-    cap = cv2.VideoCapture(x)
-    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    period_length_overlaps = np.zeros(length + seq_len)
-    fps = int(cap.get(cv2.CAP_PROP_FPS))
-    seconds = length / fps
-    all_frames = []
-    frame_i = 1
-    while cap.isOpened():
-        ret, frame = cap.read()
-        if ret is False:
-            frame = all_frames[-1]  # padding will be with last frame
-            break
-        frame = cv2.cvtColor(np.uint8(frame), cv2.COLOR_BGR2RGB)
-        img = Image.fromarray(frame)
-        all_frames.append(img)
-        frame_i += 1
-    cap.release()
-    length = len(all_frames)
-    period_lengths = np.zeros(len(all_frames) + seq_len + stride_length)
-    periodicities = np.zeros(len(all_frames) + seq_len + stride_length)
-    full_marks = np.zeros(len(all_frames) + seq_len + stride_length)
-    event_type_logits = np.zeros((len(all_frames) + seq_len + stride_length, 7))
-    period_length_overlaps = np.zeros(len(all_frames) + seq_len + stride_length)
-    event_type_logit_overlaps = np.zeros((len(all_frames) + seq_len + stride_length, 7))
-    for _ in range(seq_len + stride_length):  # pad full sequence
-        all_frames.append(all_frames[-1])
-    batch_list = []
-    idx_list = []
-    preprocess = create_transform(img_size)
-    for i in tqdm(range(0, length + stride_length - stride_pad, stride_length)):
-        batch = all_frames[i:i + seq_len]
-        Xlist = []
-        print('Preprocessing...')
-        for img in batch:
-            frameTensor = preprocess(img).unsqueeze(0)
-            Xlist.append(frameTensor)
-        if len(Xlist) < seq_len:
-            for _ in range(seq_len - len(Xlist)):
-                Xlist.append(Xlist[-1])
-        X = torch.cat(Xlist)
-        X *= 255
-        batch_list.append(X.unsqueeze(0))
-        idx_list.append(i)
-        print('Running inference...')
-        if len(batch_list) == batch_size:
-            batch_X = torch.cat(batch_list)
-            outputs = ort_sess.run(None, {'video': batch_X.numpy()})
-            y1pred = outputs[0]
-            y2pred = outputs[1]
-            y3pred = outputs[2]
-            y4pred = outputs[3]
-            for y1, y2, y3, y4, idx in zip(y1pred, y2pred, y3pred, y4pred, idx_list):
-                periodLength = y1.squeeze()
-                periodicity = y2.squeeze()
-                marks = y3.squeeze()
-                event_type = y4.squeeze()
-                period_lengths[idx:idx+seq_len] += periodLength
-                periodicities[idx:idx+seq_len] += periodicity
-                full_marks[idx:idx+seq_len] += marks
-                event_type_logits[idx:idx+seq_len] += event_type
-                period_length_overlaps[idx:idx+seq_len] += 1
-                event_type_logit_overlaps[idx:idx+seq_len] += 1
-            batch_list = []
-            idx_list = []
-        progress(i / (length + stride_length - stride_pad), desc='Processing...')
-    if len(batch_list) != 0:  # still some leftover frames
-        while len(batch_list) != batch_size:
-            batch_list.append(batch_list[-1])
-            idx_list.append(idx_list[-1])
-        batch_X = torch.cat(batch_list)
-        outputs = ort_sess.run(None, {'video': batch_X.numpy()})
-        y1pred = outputs[0]
-        y2pred = outputs[1]
-        y3pred = outputs[2]
-        y4pred = outputs[3]
-        for y1, y2, y3, y4, idx in zip(y1pred, y2pred, y3pred, y4pred, idx_list):
-            periodLength = y1.squeeze()
-            periodicity = y2.squeeze()
-            marks = y3.squeeze()
-            event_type = y4.squeeze()
-            period_lengths[idx:idx+seq_len] += periodLength
-            periodicities[idx:idx+seq_len] += periodicity
-            full_marks[idx:idx+seq_len] += marks
-            event_type_logits[idx:idx+seq_len] += event_type
-            period_length_overlaps[idx:idx+seq_len] += 1
-            event_type_logit_overlaps[idx:idx+seq_len] += 1
-    periodLength = np.divide(period_lengths, period_length_overlaps, where=period_length_overlaps!=0)[:length]
-    periodicity = np.divide(periodicities, period_length_overlaps, where=period_length_overlaps!=0)[:length]
-    full_marks = np.divide(full_marks, period_length_overlaps, where=period_length_overlaps!=0)[:length]
-    per_frame_event_type_logits = np.divide(event_type_logits, event_type_logit_overlaps, where=event_type_logit_overlaps!=0)[:length]
-    event_type_logits = np.mean(per_frame_event_type_logits, axis=0)
-    # softmax of event type logits
-    event_type_probs = np.exp(event_type_logits) / np.sum(np.exp(event_type_logits))
-    per_frame_event_types = np.argmax(per_frame_event_type_logits, axis=1)
-    if median_pred_filter:
-        periodicity = medfilt(periodicity, 5)
-        periodLength = medfilt(periodLength, 5)
-    periodicity = sigmoid(periodicity)
-    full_marks = sigmoid(full_marks)
-    #full_marks_mask = np.int32(full_marks > marks_threshold)
-    pred_marks_peaks, _ = find_peaks(full_marks, distance=3, height=marks_threshold)
-    full_marks_mask = np.zeros(len(full_marks))
-    full_marks_mask[pred_marks_peaks] = 1
-    periodicity_mask = np.int32(periodicity > miss_threshold)
-    numofReps = 0
-    count = []
-    for i in range(len(periodLength)):
-        if periodLength[i] < 2 or periodicity_mask[i] == 0:
-            numofReps += 0
-        elif full_marks_mask[i]:  # high confidence mark detected
-            if math.modf(numofReps)[0] < 0.2:  # probably false positive/late detection
-                numofReps = float(int(numofReps))
-            else:
-                numofReps = float(int(numofReps) + 1.01)  # round up
-        else:
-            numofReps += max(0, periodicity_mask[i]/(periodLength[i]))
-        count.append(round(float(numofReps), 2))
-    count_pred = count[-1]
-    marks_count_pred = 0
-    for i in range(len(full_marks) - 1):
-        # if a jump was counted, and periodicity is high, and the next frame was not counted (to avoid double counting)
-        if full_marks_mask[i] > 0 and periodicity_mask[i] > 0 and full_marks_mask[i + 1] == 0:
-            marks_count_pred += 1
-    if not both_feet:
-        count_pred = count_pred / 2
-        marks_count_pred = marks_count_pred / 2
-        count = np.array(count) / 2
-    try:
-        confidence = (np.mean(periodicity[periodicity > miss_threshold]) - miss_threshold) / (1 - miss_threshold)
-    except ZeroDivisionError:
-        confidence = 0
-    self_err = abs(count_pred - marks_count_pred)
-    try:
-        self_pct_err = self_err / count_pred
-    except ZeroDivisionError:
-        self_pct_err = 0
-    total_confidence = confidence * (1 - self_pct_err)
-    if both_feet:
-        count_msg = f'## Reps Count (both feet): {count_pred:.1f}, Marks Count (both feet): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}'
-    else:
-        count_msg = f'## Predicted Count (one foot): {count_pred:.1f}, Marks Count (one foot): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}'
-    if api_call:
-        if count_only_api:
-            return f'{count_pred:.2f} (conf: {total_confidence:.2f})'
-        else:
-            return np.array2string(periodLength, formatter={'float_kind':lambda x: '%.2f' % x}).replace('\n', ''), \
-                np.array2string(periodicity, formatter={'float_kind':lambda x: '%.2f' % x}).replace('\n', ''), \
-                np.array2string(full_marks, formatter={'float_kind':lambda x: '%.2f' % x}).replace('\n', ''), \
-                f'reps: {count_pred:.2f}, marks: {marks_count_pred:.1f}, confidence: {total_confidence:.2f}', \
-                f'single_rope_speed: {event_type_probs[0]:.3f}, double_dutch: {event_type_probs[1]:.3f}, double_unders: {event_type_probs[2]:.3f}, single_bounce: {event_type_probs[3]:.3f}'
-    jumps_per_second = np.clip(1 / ((periodLength / fps) + 0.01), 0, 10)
-    jumping_speed = np.copy(jumps_per_second)
-    misses = periodicity < miss_threshold
-    jumps_per_second[misses] = 0
-    frame_type = np.array(['miss' if miss else 'frame' for miss in misses])
-    frame_type[full_marks > marks_threshold] = 'jump'
-    per_frame_event_types = np.clip(per_frame_event_types, 0, 6) / 6
-    df = pd.DataFrame.from_dict({'period length': periodLength,
-                                 'jumping speed': jumping_speed,
-                                'jumps per second': jumps_per_second,
-                                'periodicity': periodicity,
-                                'miss': misses,
-                                'frame_type': frame_type,
-                                'event_type': per_frame_event_types,
-                                'jumps': full_marks,
-                                'jumps_size': (full_marks + 0.05) * 10,
-                                'miss_size': np.clip((1 - periodicity) * 0.9 + 0.1, 1, 8),
-                                'seconds': np.linspace(0, seconds, num=len(periodLength))})
-    event_type_tick_vals = np.linspace(0, 1, num=7)
-    event_type_colors = ['red', 'orange', 'green', 'blue', 'purple', 'pink', 'black']
-    fig = px.scatter(data_frame=df,
-                    x='seconds',
-                    y='jumps per second',
-                    #symbol='frame_type',
-                    #symbol_map={'frame': 'circle', 'miss': 'circle-open', 'jump': 'triangle-down'},
-                    color='event_type',
-                    size='jumps_size',
-                    size_max=8,
-                    color_continuous_scale=[(t, c) for t, c in zip(event_type_tick_vals, event_type_colors)],
-                    range_color=(0,1),
-                    title='Jumping speed (jumps-per-second)',
-                    trendline='rolling',
-                    trendline_options=dict(window=16),
-                    trendline_color_override='goldenrod',
-                    trendline_scope='overall',
-                    template='plotly_dark')
-    fig.update_layout(legend=dict(
-            orientation='h',
-            yanchor='bottom',
-            y=0.98,
-            xanchor='right',
-            x=1,
-            font=dict(
-                family='Courier',
-                size=12,
-                color='black'
-                ),
-            bgcolor='AliceBlue',
-        ),
-        paper_bgcolor='rgba(0,0,0,0)',
-        plot_bgcolor='rgba(0,0,0,0)'
-    )
-    # remove white outline from marks
-    fig.update_traces(marker_line_width = 0)
-    fig.update_layout(coloraxis_colorbar=dict(
-        tickvals=event_type_tick_vals,
-        ticktext=['single<br>rope', 'double<br>dutch', 'double<br>unders', 'single<br>bounces', 'double<br>bounces', 'triple<br>unders', 'other'],
-        title='event type'
-    ))
-    hist = px.histogram(df,
-                        x='jumps per second',
-                        template='plotly_dark',
-                        marginal='box',
-                        histnorm='percent',
-                        title='Distribution of jumping speed (jumps-per-second)')
-    # make a bar plot of the event type distribution
-    bar = px.bar(x=['single rope', 'double dutch', 'double unders', 'single bounces', 'double bounces', 'triple unders', 'other'],
-                 y=event_type_probs,
-                 template='plotly_dark',
-                 title='Event Type Distribution',
-                 labels={'x': 'event type', 'y': 'probability'},
-                 range_y=[0, 1])
-    return x, count_msg, fig, hist, bar
-with gr.Blocks() as demo:
-    # in_video = gr.PlayableVideo(label='Input Video', elem_id='input-video', format='mp4',
-    #                             width=400, height=400, interactive=True, container=True,
-    #                             max_length=150)
-    with gr.Row():
-        in_stream_url = gr.Textbox(label='Stream URL', elem_id='stream-url', visible=True)
-        with gr.Column():
-            in_stream_start = gr.Textbox(label='Start Time', elem_id='stream-start', visible=True)
-        with gr.Column():
-            in_stream_end = gr.Textbox(label='End Time', elem_id='stream-end', visible=True)
-        with gr.Column(min_width=480):
-            out_video = gr.PlayableVideo(label='Video Clip', elem_id='output-video', format='mp4', width=400, height=400)
-    with gr.Row():
-        run_button = gr.Button(value='Run', elem_id='run-button', scale=1)
-        api_dummy_button = gr.Button(value='Run (No Viz)', elem_id='count-only', visible=False, scale=2)
-        count_only = gr.Checkbox(label='Count Only', visible=False)
-        api_token = gr.Textbox(label='API Key', elem_id='api-token', visible=False)
-    with gr.Column(elem_id='output-video-container'):
-        with gr.Row():
-            with gr.Column():
-                out_text = gr.Markdown(label='Predicted Count', elem_id='output-text')
-                period_length = gr.Textbox(label='Period Length', elem_id='period-length', visible=False)
-                periodicity = gr.Textbox(label='Periodicity', elem_id='periodicity', visible=False)
-        with gr.Row():
-            out_plot = gr.Plot(label='Jumping Speed', elem_id='output-plot')
-        with gr.Row():
-            with gr.Column():
-                out_hist = gr.Plot(label='Speed Histogram', elem_id='output-hist')
-            with gr.Column():
-                out_event_type_dist = gr.Plot(label='Event Type Distribution', elem_id='output-event-type-dist')
-    demo_inference = partial(inference, count_only_api=False, api_key=None)
-    run_button.click(demo_inference, [in_stream_url, in_stream_start, in_stream_end], outputs=[out_video, out_text, out_plot, out_hist, out_event_type_dist])
-    api_inference = partial(inference, api_call=True)
-    api_dummy_button.click(api_inference, [in_stream_url, in_stream_start, in_stream_end, count_only, api_token], outputs=[period_length], api_name='inference')
-if __name__ == '__main__':
     demo.queue(api_open=True, max_size=15).launch(share=False)

+import gradio as gr
+import numpy as np
+from PIL import Image
+import os
+import cv2
+import math
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from scipy.signal import medfilt, find_peaks
+from functools import partial
+from passlib.hash import pbkdf2_sha256
+from tqdm import tqdm
+import pandas as pd
+import plotly.express as px
+import onnxruntime as ort
+import torch
+from torchvision import transforms
+import torchvision.transforms.functional as F
+from huggingface_hub import hf_hub_download
+from huggingface_hub import HfApi
+from hls_download import download_clips
+plt.style.use('dark_background')
+onnx_file = hf_hub_download(repo_id='dylanplummer/ropenet', filename='nextjump.onnx', repo_type='model', token=os.environ['DATASET_SECRET'])
+#onnx_file = hf_hub_download(repo_id='dylanplummer/ropenet', filename='nextjump_fp16.onnx', repo_type='model', token=os.environ['DATASET_SECRET'])
+# model_xml = hf_hub_download(repo_id='dylanplummer/ropenet', filename='model.xml', repo_type='model', token=os.environ['DATASET_SECRET'])
+# hf_hub_download(repo_id='dylanplummer/ropenet', filename='model.mapping', repo_type='model', token=os.environ['DATASET_SECRET'])
+#model_xml = 'model_ir/model.xml'
+# ie = Core()
+# model_ir = ie.read_model(model=model_xml)
+# config = {'PERFORMANCE_HINT': 'LATENCY'}
+# compiled_model_ir = ie.compile_model(model=model_ir, device_name='CPU', config=config)
+# check if GPU is available
+if torch.cuda.is_available():
+    providers = [('CUDAExecutionProvider', {'device_id': torch.cuda.current_device(),
+                                            'user_compute_stream': str(torch.cuda.current_stream().cuda_stream)})]
+    sess_options = ort.SessionOptions()
+    sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+    ort_sess = ort.InferenceSession(onnx_file, sess_options=sess_options, providers=providers)
+else:
+    ort_sess = ort.InferenceSession(onnx_file)
+print('Warmup...')
+dummy_input = torch.randn(1, 64, 3, 288, 288)
+ort_sess.run(None, {'video': dummy_input.numpy()})
+print('Done!')
+class SquarePad:
+    # https://discuss.pytorch.org/t/how-to-resize-and-pad-in-a-torchvision-transforms-compose/71850/9
+	def __call__(self, image):
+		w, h = image.size
+		max_wh = max(w, h)
+		hp = int((max_wh - w) / 2)
+		vp = int((max_wh - h) / 2)
+		padding = (hp, vp, hp, vp)
+		return F.pad(image, padding, 0, 'constant')
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+def create_transform(img_size):
+    return transforms.Compose([
+        SquarePad(),
+        transforms.Resize((img_size, img_size), interpolation=Image.BICUBIC),
+        transforms.ToTensor(),
+    ])
+def inference(stream_url, start_time, end_time, count_only_api, api_key,
+              img_size=288, seq_len=64, stride_length=32, stride_pad=3, batch_size=4,
+              miss_threshold=0.8, marks_threshold=0.5, median_pred_filter=True, center_crop=True, both_feet=True,
+              api_call=False,
+              progress=gr.Progress()):
+    progress(0, desc='Starting...')
+    x = download_clips(stream_url, os.getcwd(), start_time, end_time)
+    #api = HfApi(token=os.environ['DATASET_SECRET'])
+    #out_file = str(uuid.uuid1())
+    has_access = False
+    if api_call:
+        has_access = pbkdf2_sha256.verify(os.environ['DEV_API_TOKEN'], api_key)
+        if not has_access:
+            return 'Invalid API Key'
+    cap = cv2.VideoCapture(x)
+    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    period_length_overlaps = np.zeros(length + seq_len)
+    fps = int(cap.get(cv2.CAP_PROP_FPS))
+    seconds = length / fps
+    all_frames = []
+    frame_i = 1
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if ret is False:
+            frame = all_frames[-1]  # padding will be with last frame
+            break
+        frame = cv2.cvtColor(np.uint8(frame), cv2.COLOR_BGR2RGB)
+        img = Image.fromarray(frame)
+        all_frames.append(img)
+        frame_i += 1
+    cap.release()
+    length = len(all_frames)
+    period_lengths = np.zeros(len(all_frames) + seq_len + stride_length)
+    periodicities = np.zeros(len(all_frames) + seq_len + stride_length)
+    full_marks = np.zeros(len(all_frames) + seq_len + stride_length)
+    event_type_logits = np.zeros((len(all_frames) + seq_len + stride_length, 7))
+    period_length_overlaps = np.zeros(len(all_frames) + seq_len + stride_length)
+    event_type_logit_overlaps = np.zeros((len(all_frames) + seq_len + stride_length, 7))
+    for _ in range(seq_len + stride_length):  # pad full sequence
+        all_frames.append(all_frames[-1])
+    batch_list = []
+    idx_list = []
+    preprocess = create_transform(img_size)
+    for i in tqdm(range(0, length + stride_length - stride_pad, stride_length)):
+        batch = all_frames[i:i + seq_len]
+        Xlist = []
+        print('Preprocessing...')
+        for img in batch:
+            frameTensor = preprocess(img).unsqueeze(0)
+            Xlist.append(frameTensor)
+        if len(Xlist) < seq_len:
+            for _ in range(seq_len - len(Xlist)):
+                Xlist.append(Xlist[-1])
+        X = torch.cat(Xlist)
+        X *= 255
+        batch_list.append(X.unsqueeze(0))
+        idx_list.append(i)
+        print('Running inference...')
+        if len(batch_list) == batch_size:
+            batch_X = torch.cat(batch_list)
+            outputs = ort_sess.run(None, {'video': batch_X.numpy()})
+            y1pred = outputs[0]
+            y2pred = outputs[1]
+            y3pred = outputs[2]
+            y4pred = outputs[3]
+            for y1, y2, y3, y4, idx in zip(y1pred, y2pred, y3pred, y4pred, idx_list):
+                periodLength = y1.squeeze()
+                periodicity = y2.squeeze()
+                marks = y3.squeeze()
+                event_type = y4.squeeze()
+                period_lengths[idx:idx+seq_len] += periodLength
+                periodicities[idx:idx+seq_len] += periodicity
+                full_marks[idx:idx+seq_len] += marks
+                event_type_logits[idx:idx+seq_len] += event_type
+                period_length_overlaps[idx:idx+seq_len] += 1
+                event_type_logit_overlaps[idx:idx+seq_len] += 1
+            batch_list = []
+            idx_list = []
+        progress(i / (length + stride_length - stride_pad), desc='Processing...')
+    if len(batch_list) != 0:  # still some leftover frames
+        while len(batch_list) != batch_size:
+            batch_list.append(batch_list[-1])
+            idx_list.append(idx_list[-1])
+        batch_X = torch.cat(batch_list)
+        outputs = ort_sess.run(None, {'video': batch_X.numpy()})
+        y1pred = outputs[0]
+        y2pred = outputs[1]
+        y3pred = outputs[2]
+        y4pred = outputs[3]
+        for y1, y2, y3, y4, idx in zip(y1pred, y2pred, y3pred, y4pred, idx_list):
+            periodLength = y1.squeeze()
+            periodicity = y2.squeeze()
+            marks = y3.squeeze()
+            event_type = y4.squeeze()
+            period_lengths[idx:idx+seq_len] += periodLength
+            periodicities[idx:idx+seq_len] += periodicity
+            full_marks[idx:idx+seq_len] += marks
+            event_type_logits[idx:idx+seq_len] += event_type
+            period_length_overlaps[idx:idx+seq_len] += 1
+            event_type_logit_overlaps[idx:idx+seq_len] += 1
+    periodLength = np.divide(period_lengths, period_length_overlaps, where=period_length_overlaps!=0)[:length]
+    periodicity = np.divide(periodicities, period_length_overlaps, where=period_length_overlaps!=0)[:length]
+    full_marks = np.divide(full_marks, period_length_overlaps, where=period_length_overlaps!=0)[:length]
+    per_frame_event_type_logits = np.divide(event_type_logits, event_type_logit_overlaps, where=event_type_logit_overlaps!=0)[:length]
+    event_type_logits = np.mean(per_frame_event_type_logits, axis=0)
+    # softmax of event type logits
+    event_type_probs = np.exp(event_type_logits) / np.sum(np.exp(event_type_logits))
+    per_frame_event_types = np.argmax(per_frame_event_type_logits, axis=1)
+    if median_pred_filter:
+        periodicity = medfilt(periodicity, 5)
+        periodLength = medfilt(periodLength, 5)
+    periodicity = sigmoid(periodicity)
+    full_marks = sigmoid(full_marks)
+    #full_marks_mask = np.int32(full_marks > marks_threshold)
+    pred_marks_peaks, _ = find_peaks(full_marks, distance=3, height=marks_threshold)
+    full_marks_mask = np.zeros(len(full_marks))
+    full_marks_mask[pred_marks_peaks] = 1
+    periodicity_mask = np.int32(periodicity > miss_threshold)
+    numofReps = 0
+    count = []
+    for i in range(len(periodLength)):
+        if periodLength[i] < 2 or periodicity_mask[i] == 0:
+            numofReps += 0
+        elif full_marks_mask[i]:  # high confidence mark detected
+            if math.modf(numofReps)[0] < 0.2:  # probably false positive/late detection
+                numofReps = float(int(numofReps))
+            else:
+                numofReps = float(int(numofReps) + 1.01)  # round up
+        else:
+            numofReps += max(0, periodicity_mask[i]/(periodLength[i]))
+        count.append(round(float(numofReps), 2))
+    count_pred = count[-1]
+    marks_count_pred = 0
+    for i in range(len(full_marks) - 1):
+        # if a jump was counted, and periodicity is high, and the next frame was not counted (to avoid double counting)
+        if full_marks_mask[i] > 0 and periodicity_mask[i] > 0 and full_marks_mask[i + 1] == 0:
+            marks_count_pred += 1
+    if not both_feet:
+        count_pred = count_pred / 2
+        marks_count_pred = marks_count_pred / 2
+        count = np.array(count) / 2
+    try:
+        confidence = (np.mean(periodicity[periodicity > miss_threshold]) - miss_threshold) / (1 - miss_threshold)
+    except ZeroDivisionError:
+        confidence = 0
+    self_err = abs(count_pred - marks_count_pred)
+    try:
+        self_pct_err = self_err / count_pred
+    except ZeroDivisionError:
+        self_pct_err = 0
+    total_confidence = confidence * (1 - self_pct_err)
+    if both_feet:
+        count_msg = f'## Reps Count (both feet): {count_pred:.1f}, Marks Count (both feet): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}'
+    else:
+        count_msg = f'## Predicted Count (one foot): {count_pred:.1f}, Marks Count (one foot): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}'
+    if api_call:
+        if count_only_api:
+            return f'{count_pred:.2f} (conf: {total_confidence:.2f})'
+        else:
+            return np.array2string(periodLength, formatter={'float_kind':lambda x: '%.2f' % x}).replace('\n', ''), \
+                np.array2string(periodicity, formatter={'float_kind':lambda x: '%.2f' % x}).replace('\n', ''), \
+                np.array2string(full_marks, formatter={'float_kind':lambda x: '%.2f' % x}).replace('\n', ''), \
+                f'reps: {count_pred:.2f}, marks: {marks_count_pred:.1f}, confidence: {total_confidence:.2f}', \
+                f'single_rope_speed: {event_type_probs[0]:.3f}, double_dutch: {event_type_probs[1]:.3f}, double_unders: {event_type_probs[2]:.3f}, single_bounce: {event_type_probs[3]:.3f}'
+    jumps_per_second = np.clip(1 / ((periodLength / fps) + 0.01), 0, 10)
+    jumping_speed = np.copy(jumps_per_second)
+    misses = periodicity < miss_threshold
+    jumps_per_second[misses] = 0
+    frame_type = np.array(['miss' if miss else 'frame' for miss in misses])
+    frame_type[full_marks > marks_threshold] = 'jump'
+    per_frame_event_types = np.clip(per_frame_event_types, 0, 6) / 6
+    df = pd.DataFrame.from_dict({'period length': periodLength,
+                                 'jumping speed': jumping_speed,
+                                'jumps per second': jumps_per_second,
+                                'periodicity': periodicity,
+                                'miss': misses,
+                                'frame_type': frame_type,
+                                'event_type': per_frame_event_types,
+                                'jumps': full_marks,
+                                'jumps_size': (full_marks + 0.05) * 10,
+                                'miss_size': np.clip((1 - periodicity) * 0.9 + 0.1, 1, 8),
+                                'seconds': np.linspace(0, seconds, num=len(periodLength))})
+    event_type_tick_vals = np.linspace(0, 1, num=7)
+    event_type_colors = ['red', 'orange', 'green', 'blue', 'purple', 'pink', 'black']
+    fig = px.scatter(data_frame=df,
+                    x='seconds',
+                    y='jumps per second',
+                    #symbol='frame_type',
+                    #symbol_map={'frame': 'circle', 'miss': 'circle-open', 'jump': 'triangle-down'},
+                    color='event_type',
+                    size='jumps_size',
+                    size_max=8,
+                    color_continuous_scale=[(t, c) for t, c in zip(event_type_tick_vals, event_type_colors)],
+                    range_color=(0,1),
+                    title='Jumping speed (jumps-per-second)',
+                    trendline='rolling',
+                    trendline_options=dict(window=16),
+                    trendline_color_override='goldenrod',
+                    trendline_scope='overall',
+                    template='plotly_dark')
+    fig.update_layout(legend=dict(
+            orientation='h',
+            yanchor='bottom',
+            y=0.98,
+            xanchor='right',
+            x=1,
+            font=dict(
+                family='Courier',
+                size=12,
+                color='black'
+                ),
+            bgcolor='AliceBlue',
+        ),
+        paper_bgcolor='rgba(0,0,0,0)',
+        plot_bgcolor='rgba(0,0,0,0)'
+    )
+    # remove white outline from marks
+    fig.update_traces(marker_line_width = 0)
+    fig.update_layout(coloraxis_colorbar=dict(
+        tickvals=event_type_tick_vals,
+        ticktext=['single<br>rope', 'double<br>dutch', 'double<br>unders', 'single<br>bounces', 'double<br>bounces', 'triple<br>unders', 'other'],
+        title='event type'
+    ))
+    hist = px.histogram(df,
+                        x='jumps per second',
+                        template='plotly_dark',
+                        marginal='box',
+                        histnorm='percent',
+                        title='Distribution of jumping speed (jumps-per-second)')
+    # make a bar plot of the event type distribution
+    bar = px.bar(x=['single rope', 'double dutch', 'double unders', 'single bounces', 'double bounces', 'triple unders', 'other'],
+                 y=event_type_probs,
+                 template='plotly_dark',
+                 title='Event Type Distribution',
+                 labels={'x': 'event type', 'y': 'probability'},
+                 range_y=[0, 1])
+    return x, count_msg, fig, hist, bar
+with gr.Blocks() as demo:
+    # in_video = gr.PlayableVideo(label='Input Video', elem_id='input-video', format='mp4',
+    #                             width=400, height=400, interactive=True, container=True,
+    #                             max_length=150)
+    with gr.Row():
+        in_stream_url = gr.Textbox(label='Stream URL', elem_id='stream-url', visible=True)
+        with gr.Column():
+            in_stream_start = gr.Textbox(label='Start Time', elem_id='stream-start', visible=True)
+        with gr.Column():
+            in_stream_end = gr.Textbox(label='End Time', elem_id='stream-end', visible=True)
+        with gr.Column(min_width=480):
+            out_video = gr.PlayableVideo(label='Video Clip', elem_id='output-video', format='mp4', width=400, height=400)
+    with gr.Row():
+        run_button = gr.Button(value='Run', elem_id='run-button', scale=1)
+        api_dummy_button = gr.Button(value='Run (No Viz)', elem_id='count-only', visible=False, scale=2)
+        count_only = gr.Checkbox(label='Count Only', visible=False)
+        api_token = gr.Textbox(label='API Key', elem_id='api-token', visible=False)
+    with gr.Column(elem_id='output-video-container'):
+        with gr.Row():
+            with gr.Column():
+                out_text = gr.Markdown(label='Predicted Count', elem_id='output-text')
+                period_length = gr.Textbox(label='Period Length', elem_id='period-length', visible=False)
+                periodicity = gr.Textbox(label='Periodicity', elem_id='periodicity', visible=False)
+        with gr.Row():
+            out_plot = gr.Plot(label='Jumping Speed', elem_id='output-plot')
+        with gr.Row():
+            with gr.Column():
+                out_hist = gr.Plot(label='Speed Histogram', elem_id='output-hist')
+            with gr.Column():
+                out_event_type_dist = gr.Plot(label='Event Type Distribution', elem_id='output-event-type-dist')
+    demo_inference = partial(inference, count_only_api=False, api_key=None)
+    run_button.click(demo_inference, [in_stream_url, in_stream_start, in_stream_end], outputs=[out_video, out_text, out_plot, out_hist, out_event_type_dist])
+    api_inference = partial(inference, api_call=True)
+    api_dummy_button.click(api_inference, [in_stream_url, in_stream_start, in_stream_end, count_only, api_token], outputs=[period_length], api_name='inference')
+if __name__ == '__main__':
     demo.queue(api_open=True, max_size=15).launch(share=False)

requirements.txt CHANGED Viewed

@@ -10,5 +10,4 @@ opencv-python-headless==4.7.0.68
 torch
 torchvision
 onnxruntime-gpu
-yt-dlp
-nvidia-tensorrt

 torch
 torchvision
 onnxruntime-gpu
+yt-dlp