| import gradio as gr
|
| import numpy as np
|
| from PIL import Image
|
| import os
|
| import cv2
|
| import math
|
| import matplotlib
|
| matplotlib.use('Agg')
|
| import matplotlib.pyplot as plt
|
| from scipy.signal import medfilt, find_peaks
|
| from functools import partial
|
| from passlib.hash import pbkdf2_sha256
|
| from tqdm import tqdm
|
| import pandas as pd
|
| import plotly.express as px
|
| import onnxruntime as ort
|
| import torch
|
| from torchvision import transforms
|
| import torchvision.transforms.functional as F
|
|
|
| from huggingface_hub import hf_hub_download
|
| from huggingface_hub import HfApi
|
|
|
| from hls_download import download_clips
|
|
|
| plt.style.use('dark_background')
|
|
|
| onnx_file = hf_hub_download(repo_id='dylanplummer/ropenet', filename='nextjump.onnx', repo_type='model', token=os.environ['DATASET_SECRET'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| if torch.cuda.is_available():
|
| providers = [('CUDAExecutionProvider', {'device_id': torch.cuda.current_device(),
|
| 'user_compute_stream': str(torch.cuda.current_stream().cuda_stream)})]
|
| sess_options = ort.SessionOptions()
|
| sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
| ort_sess = ort.InferenceSession(onnx_file, sess_options=sess_options, providers=providers)
|
| else:
|
| ort_sess = ort.InferenceSession(onnx_file)
|
|
|
| print('Warmup...')
|
| dummy_input = torch.randn(4, 64, 3, 288, 288)
|
| ort_sess.run(None, {'video': dummy_input.numpy()})
|
| print('Done!')
|
|
|
| class SquarePad:
|
|
|
| def __call__(self, image):
|
| w, h = image.size
|
| max_wh = max(w, h)
|
| hp = int((max_wh - w) / 2)
|
| vp = int((max_wh - h) / 2)
|
| padding = (hp, vp, hp, vp)
|
| return F.pad(image, padding, 0, 'constant')
|
|
|
| def sigmoid(x):
|
| return 1 / (1 + np.exp(-x))
|
|
|
|
|
| def create_transform(img_size):
|
| return transforms.Compose([
|
| SquarePad(),
|
| transforms.Resize((img_size, img_size), interpolation=Image.BICUBIC),
|
| transforms.ToTensor(),
|
| ])
|
|
|
|
|
| def inference(stream_url, start_time, end_time, count_only_api, api_key,
|
| img_size=288, seq_len=64, stride_length=32, stride_pad=3, batch_size=4,
|
| miss_threshold=0.8, marks_threshold=0.5, median_pred_filter=True, center_crop=True, both_feet=True,
|
| api_call=False,
|
| progress=gr.Progress()):
|
| progress(0, desc='Starting...')
|
| x = download_clips(stream_url, os.getcwd(), start_time, end_time)
|
|
|
|
|
|
|
| has_access = False
|
| if api_call:
|
| has_access = pbkdf2_sha256.verify(os.environ['DEV_API_TOKEN'], api_key)
|
| if not has_access:
|
| return 'Invalid API Key'
|
|
|
| cap = cv2.VideoCapture(x)
|
| length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| period_length_overlaps = np.zeros(length + seq_len)
|
| fps = int(cap.get(cv2.CAP_PROP_FPS))
|
| seconds = length / fps
|
| all_frames = []
|
| frame_i = 1
|
| print('Reading frames...')
|
| while cap.isOpened():
|
| ret, frame = cap.read()
|
| if ret is False:
|
| frame = all_frames[-1]
|
| break
|
| frame = cv2.cvtColor(np.uint8(frame), cv2.COLOR_BGR2RGB)
|
| img = Image.fromarray(frame)
|
| all_frames.append(img)
|
| frame_i += 1
|
| cap.release()
|
| print('Done!')
|
|
|
| length = len(all_frames)
|
| period_lengths = np.zeros(len(all_frames) + seq_len + stride_length)
|
| periodicities = np.zeros(len(all_frames) + seq_len + stride_length)
|
| full_marks = np.zeros(len(all_frames) + seq_len + stride_length)
|
| event_type_logits = np.zeros((len(all_frames) + seq_len + stride_length, 7))
|
| period_length_overlaps = np.zeros(len(all_frames) + seq_len + stride_length)
|
| event_type_logit_overlaps = np.zeros((len(all_frames) + seq_len + stride_length, 7))
|
| for _ in range(seq_len + stride_length):
|
| all_frames.append(all_frames[-1])
|
| batch_list = []
|
| idx_list = []
|
| preprocess = create_transform(img_size)
|
| for i in tqdm(range(0, length + stride_length - stride_pad, stride_length)):
|
| batch = all_frames[i:i + seq_len]
|
| Xlist = []
|
| for img in batch:
|
| frameTensor = preprocess(img).unsqueeze(0)
|
| Xlist.append(frameTensor)
|
|
|
| if len(Xlist) < seq_len:
|
| for _ in range(seq_len - len(Xlist)):
|
| Xlist.append(Xlist[-1])
|
|
|
| X = torch.cat(Xlist)
|
| X *= 255
|
| batch_list.append(X.unsqueeze(0))
|
| idx_list.append(i)
|
| if len(batch_list) == batch_size:
|
| batch_X = torch.cat(batch_list)
|
| outputs = ort_sess.run(None, {'video': batch_X.numpy()})
|
| y1pred = outputs[0]
|
| y2pred = outputs[1]
|
| y3pred = outputs[2]
|
| y4pred = outputs[3]
|
| for y1, y2, y3, y4, idx in zip(y1pred, y2pred, y3pred, y4pred, idx_list):
|
| periodLength = y1.squeeze()
|
| periodicity = y2.squeeze()
|
| marks = y3.squeeze()
|
| event_type = y4.squeeze()
|
| period_lengths[idx:idx+seq_len] += periodLength
|
| periodicities[idx:idx+seq_len] += periodicity
|
| full_marks[idx:idx+seq_len] += marks
|
| event_type_logits[idx:idx+seq_len] += event_type
|
| period_length_overlaps[idx:idx+seq_len] += 1
|
| event_type_logit_overlaps[idx:idx+seq_len] += 1
|
| batch_list = []
|
| idx_list = []
|
| progress(i / (length + stride_length - stride_pad), desc='Processing...')
|
| if len(batch_list) != 0:
|
| while len(batch_list) != batch_size:
|
| batch_list.append(batch_list[-1])
|
| idx_list.append(idx_list[-1])
|
| batch_X = torch.cat(batch_list)
|
| outputs = ort_sess.run(None, {'video': batch_X.numpy()})
|
| y1pred = outputs[0]
|
| y2pred = outputs[1]
|
| y3pred = outputs[2]
|
| y4pred = outputs[3]
|
| for y1, y2, y3, y4, idx in zip(y1pred, y2pred, y3pred, y4pred, idx_list):
|
| periodLength = y1.squeeze()
|
| periodicity = y2.squeeze()
|
| marks = y3.squeeze()
|
| event_type = y4.squeeze()
|
| period_lengths[idx:idx+seq_len] += periodLength
|
| periodicities[idx:idx+seq_len] += periodicity
|
| full_marks[idx:idx+seq_len] += marks
|
| event_type_logits[idx:idx+seq_len] += event_type
|
| period_length_overlaps[idx:idx+seq_len] += 1
|
| event_type_logit_overlaps[idx:idx+seq_len] += 1
|
|
|
| periodLength = np.divide(period_lengths, period_length_overlaps, where=period_length_overlaps!=0)[:length]
|
| periodicity = np.divide(periodicities, period_length_overlaps, where=period_length_overlaps!=0)[:length]
|
| full_marks = np.divide(full_marks, period_length_overlaps, where=period_length_overlaps!=0)[:length]
|
| per_frame_event_type_logits = np.divide(event_type_logits, event_type_logit_overlaps, where=event_type_logit_overlaps!=0)[:length]
|
| event_type_logits = np.mean(per_frame_event_type_logits, axis=0)
|
|
|
| event_type_probs = np.exp(event_type_logits) / np.sum(np.exp(event_type_logits))
|
| per_frame_event_types = np.argmax(per_frame_event_type_logits, axis=1)
|
|
|
| if median_pred_filter:
|
| periodicity = medfilt(periodicity, 5)
|
| periodLength = medfilt(periodLength, 5)
|
| periodicity = sigmoid(periodicity)
|
| full_marks = sigmoid(full_marks)
|
|
|
| pred_marks_peaks, _ = find_peaks(full_marks, distance=3, height=marks_threshold)
|
| full_marks_mask = np.zeros(len(full_marks))
|
| full_marks_mask[pred_marks_peaks] = 1
|
| periodicity_mask = np.int32(periodicity > miss_threshold)
|
| numofReps = 0
|
| count = []
|
| for i in range(len(periodLength)):
|
| if periodLength[i] < 2 or periodicity_mask[i] == 0:
|
| numofReps += 0
|
| elif full_marks_mask[i]:
|
| if math.modf(numofReps)[0] < 0.2:
|
| numofReps = float(int(numofReps))
|
| else:
|
| numofReps = float(int(numofReps) + 1.01)
|
| else:
|
| numofReps += max(0, periodicity_mask[i]/(periodLength[i]))
|
| count.append(round(float(numofReps), 2))
|
| count_pred = count[-1]
|
| marks_count_pred = 0
|
| for i in range(len(full_marks) - 1):
|
|
|
| if full_marks_mask[i] > 0 and periodicity_mask[i] > 0 and full_marks_mask[i + 1] == 0:
|
| marks_count_pred += 1
|
| if not both_feet:
|
| count_pred = count_pred / 2
|
| marks_count_pred = marks_count_pred / 2
|
| count = np.array(count) / 2
|
| try:
|
| confidence = (np.mean(periodicity[periodicity > miss_threshold]) - miss_threshold) / (1 - miss_threshold)
|
| except ZeroDivisionError:
|
| confidence = 0
|
| self_err = abs(count_pred - marks_count_pred)
|
| try:
|
| self_pct_err = self_err / count_pred
|
| except ZeroDivisionError:
|
| self_pct_err = 0
|
| total_confidence = confidence * (1 - self_pct_err)
|
|
|
| if both_feet:
|
| count_msg = f'## Reps Count (both feet): {count_pred:.1f}, Marks Count (both feet): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}'
|
| else:
|
| count_msg = f'## Predicted Count (one foot): {count_pred:.1f}, Marks Count (one foot): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}'
|
|
|
| if api_call:
|
| if count_only_api:
|
| return f'{count_pred:.2f} (conf: {total_confidence:.2f})'
|
| else:
|
| return np.array2string(periodLength, formatter={'float_kind':lambda x: '%.2f' % x}).replace('\n', ''), \
|
| np.array2string(periodicity, formatter={'float_kind':lambda x: '%.2f' % x}).replace('\n', ''), \
|
| np.array2string(full_marks, formatter={'float_kind':lambda x: '%.2f' % x}).replace('\n', ''), \
|
| f'reps: {count_pred:.2f}, marks: {marks_count_pred:.1f}, confidence: {total_confidence:.2f}', \
|
| f'single_rope_speed: {event_type_probs[0]:.3f}, double_dutch: {event_type_probs[1]:.3f}, double_unders: {event_type_probs[2]:.3f}, single_bounce: {event_type_probs[3]:.3f}'
|
|
|
|
|
| jumps_per_second = np.clip(1 / ((periodLength / fps) + 0.01), 0, 10)
|
| jumping_speed = np.copy(jumps_per_second)
|
| misses = periodicity < miss_threshold
|
| jumps_per_second[misses] = 0
|
| frame_type = np.array(['miss' if miss else 'frame' for miss in misses])
|
| frame_type[full_marks > marks_threshold] = 'jump'
|
| per_frame_event_types = np.clip(per_frame_event_types, 0, 6) / 6
|
| df = pd.DataFrame.from_dict({'period length': periodLength,
|
| 'jumping speed': jumping_speed,
|
| 'jumps per second': jumps_per_second,
|
| 'periodicity': periodicity,
|
| 'miss': misses,
|
| 'frame_type': frame_type,
|
| 'event_type': per_frame_event_types,
|
| 'jumps': full_marks,
|
| 'jumps_size': (full_marks + 0.05) * 10,
|
| 'miss_size': np.clip((1 - periodicity) * 0.9 + 0.1, 1, 8),
|
| 'seconds': np.linspace(0, seconds, num=len(periodLength))})
|
| event_type_tick_vals = np.linspace(0, 1, num=7)
|
| event_type_colors = ['red', 'orange', 'green', 'blue', 'purple', 'pink', 'black']
|
| fig = px.scatter(data_frame=df,
|
| x='seconds',
|
| y='jumps per second',
|
|
|
|
|
| color='event_type',
|
| size='jumps_size',
|
| size_max=8,
|
| color_continuous_scale=[(t, c) for t, c in zip(event_type_tick_vals, event_type_colors)],
|
| range_color=(0,1),
|
| title='Jumping speed (jumps-per-second)',
|
| trendline='rolling',
|
| trendline_options=dict(window=16),
|
| trendline_color_override='goldenrod',
|
| trendline_scope='overall',
|
| template='plotly_dark')
|
|
|
| fig.update_layout(legend=dict(
|
| orientation='h',
|
| yanchor='bottom',
|
| y=0.98,
|
| xanchor='right',
|
| x=1,
|
| font=dict(
|
| family='Courier',
|
| size=12,
|
| color='black'
|
| ),
|
| bgcolor='AliceBlue',
|
| ),
|
| paper_bgcolor='rgba(0,0,0,0)',
|
| plot_bgcolor='rgba(0,0,0,0)'
|
| )
|
|
|
| fig.update_traces(marker_line_width = 0)
|
| fig.update_layout(coloraxis_colorbar=dict(
|
| tickvals=event_type_tick_vals,
|
| ticktext=['single<br>rope', 'double<br>dutch', 'double<br>unders', 'single<br>bounces', 'double<br>bounces', 'triple<br>unders', 'other'],
|
| title='event type'
|
| ))
|
|
|
| hist = px.histogram(df,
|
| x='jumps per second',
|
| template='plotly_dark',
|
| marginal='box',
|
| histnorm='percent',
|
| title='Distribution of jumping speed (jumps-per-second)')
|
|
|
|
|
|
|
| bar = px.bar(x=['single rope', 'double dutch', 'double unders', 'single bounces', 'double bounces', 'triple unders', 'other'],
|
| y=event_type_probs,
|
| template='plotly_dark',
|
| title='Event Type Distribution',
|
| labels={'x': 'event type', 'y': 'probability'},
|
| range_y=[0, 1])
|
|
|
| return x, count_msg, fig, hist, bar
|
|
|
|
|
| with gr.Blocks() as demo:
|
|
|
|
|
|
|
| with gr.Row():
|
| in_stream_url = gr.Textbox(label='Stream URL', elem_id='stream-url', visible=True)
|
| with gr.Column():
|
| in_stream_start = gr.Textbox(label='Start Time', elem_id='stream-start', visible=True)
|
| with gr.Column():
|
| in_stream_end = gr.Textbox(label='End Time', elem_id='stream-end', visible=True)
|
| with gr.Column(min_width=480):
|
| out_video = gr.PlayableVideo(label='Video Clip', elem_id='output-video', format='mp4', width=400, height=400)
|
|
|
| with gr.Row():
|
| run_button = gr.Button(value='Run', elem_id='run-button', scale=1)
|
| api_dummy_button = gr.Button(value='Run (No Viz)', elem_id='count-only', visible=False, scale=2)
|
| count_only = gr.Checkbox(label='Count Only', visible=False)
|
| api_token = gr.Textbox(label='API Key', elem_id='api-token', visible=False)
|
|
|
| with gr.Column(elem_id='output-video-container'):
|
| with gr.Row():
|
| with gr.Column():
|
| out_text = gr.Markdown(label='Predicted Count', elem_id='output-text')
|
| period_length = gr.Textbox(label='Period Length', elem_id='period-length', visible=False)
|
| periodicity = gr.Textbox(label='Periodicity', elem_id='periodicity', visible=False)
|
| with gr.Row():
|
| out_plot = gr.Plot(label='Jumping Speed', elem_id='output-plot')
|
| with gr.Row():
|
| with gr.Column():
|
| out_hist = gr.Plot(label='Speed Histogram', elem_id='output-hist')
|
| with gr.Column():
|
| out_event_type_dist = gr.Plot(label='Event Type Distribution', elem_id='output-event-type-dist')
|
|
|
|
|
| demo_inference = partial(inference, count_only_api=False, api_key=None)
|
|
|
| run_button.click(demo_inference, [in_stream_url, in_stream_start, in_stream_end], outputs=[out_video, out_text, out_plot, out_hist, out_event_type_dist])
|
| api_inference = partial(inference, api_call=True)
|
| api_dummy_button.click(api_inference, [in_stream_url, in_stream_start, in_stream_end, count_only, api_token], outputs=[period_length], api_name='inference')
|
|
|
|
|
| if __name__ == '__main__':
|
| demo.queue(api_open=True, max_size=15).launch(share=False) |