Spaces:
Build error
Build error
| import gradio as gr | |
| import numpy as np | |
| from PIL import Image | |
| import os | |
| import cv2 | |
| import math | |
| import spaces | |
| import matplotlib | |
| matplotlib.use('Agg') | |
| import matplotlib.pyplot as plt | |
| import concurrent.futures | |
| from scipy.signal import medfilt, find_peaks | |
| from functools import partial | |
| from passlib.hash import pbkdf2_sha256 | |
| from tqdm import tqdm | |
| import pandas as pd | |
| import plotly.express as px | |
| import onnxruntime as ort | |
| import torch | |
| from torchvision import transforms | |
| import torchvision.transforms.functional as F | |
| from huggingface_hub import hf_hub_download | |
| from huggingface_hub import HfApi | |
| from hls_download import download_clips | |
| plt.style.use('dark_background') | |
| IMG_SIZE = 256 | |
| onnx_file = hf_hub_download(repo_id="dylanplummer/ropenet", filename="nextjump.onnx", repo_type="model", token=os.environ['DATASET_SECRET']) | |
| # model_xml = hf_hub_download(repo_id="dylanplummer/ropenet", filename="model.xml", repo_type="model", token=os.environ['DATASET_SECRET']) | |
| # hf_hub_download(repo_id="dylanplummer/ropenet", filename="model.mapping", repo_type="model", token=os.environ['DATASET_SECRET']) | |
| #model_xml = "model_ir/model.xml" | |
| # ie = Core() | |
| # model_ir = ie.read_model(model=model_xml) | |
| # config = {"PERFORMANCE_HINT": "LATENCY"} | |
| # compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU", config=config) | |
| if torch.cuda.is_available(): | |
| providers = [("CUDAExecutionProvider", {"device_id": torch.cuda.current_device(), | |
| "user_compute_stream": str(torch.cuda.current_stream().cuda_stream)})] | |
| sess_options = ort.SessionOptions() | |
| ort_sess = ort.InferenceSession(onnx_file, sess_options=sess_options, providers=providers) | |
| else: | |
| ort_sess = ort.InferenceSession(onnx_file) | |
| # warmup inference | |
| ort_sess.run(None, {'video': np.zeros((4, 64, 3, IMG_SIZE, IMG_SIZE), dtype=np.float32)}) | |
| class SquarePad: | |
| # https://discuss.pytorch.org/t/how-to-resize-and-pad-in-a-torchvision-transforms-compose/71850/9 | |
| def __call__(self, image): | |
| w, h = image.size | |
| max_wh = max(w, h) | |
| hp = int((max_wh - w) / 2) | |
| vp = int((max_wh - h) / 2) | |
| padding = (hp, vp, hp, vp) | |
| return F.pad(image, padding, 0, 'constant') | |
| def square_pad_opencv(image): | |
| h, w = image.shape[:2] | |
| max_wh = max(w, h) | |
| hp = int((max_wh - w) / 2) | |
| vp = int((max_wh - h) / 2) | |
| return cv2.copyMakeBorder(image, vp, vp, hp, hp, cv2.BORDER_CONSTANT, value=[0, 0, 0]) | |
| def sigmoid(x): | |
| return 1 / (1 + np.exp(-x)) | |
| def preprocess_image(img, img_size): | |
| #img = square_pad_opencv(img) | |
| #img = cv2.resize(img, (img_size, img_size), interpolation=cv2.INTER_CUBIC) | |
| img = Image.fromarray(img) | |
| transforms_list = [] | |
| transforms_list.append(transforms.ToTensor()) | |
| preprocess = transforms.Compose(transforms_list) | |
| return preprocess(img).unsqueeze(0) | |
| def run_inference(batch_X): | |
| batch_X = torch.cat(batch_X) | |
| return ort_sess.run(None, {'video': batch_X.numpy()}) | |
| def inference(x, count_only_api, api_key, | |
| img_size=IMG_SIZE, seq_len=64, stride_length=32, stride_pad=3, batch_size=4, | |
| miss_threshold=0.8, marks_threshold=0.5, median_pred_filter=True, center_crop=True, both_feet=True, | |
| api_call=False, | |
| progress=gr.Progress()): | |
| progress(0, desc="Starting...") | |
| #x = download_clips(stream_url, os.getcwd(), start_time, end_time) | |
| # check if GPU is available | |
| #api = HfApi(token=os.environ['DATASET_SECRET']) | |
| #out_file = str(uuid.uuid1()) | |
| has_access = False | |
| if api_call: | |
| has_access = pbkdf2_sha256.verify(os.environ['DEV_API_TOKEN'], api_key) | |
| if not has_access: | |
| return "Invalid API Key" | |
| cap = cv2.VideoCapture(x) | |
| length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
| frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
| frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
| period_length_overlaps = np.zeros(length + seq_len) | |
| fps = int(cap.get(cv2.CAP_PROP_FPS)) | |
| seconds = length / fps | |
| all_frames = [] | |
| frame_i = 1 | |
| resize_size = max(frame_width, frame_height) | |
| while cap.isOpened(): | |
| ret, frame = cap.read() | |
| if ret is False: | |
| frame = all_frames[-1] # padding will be with last frame | |
| break | |
| frame = cv2.cvtColor(np.uint8(frame), cv2.COLOR_BGR2RGB) | |
| frame = cv2.resize(frame, (resize_size, resize_size), interpolation=cv2.INTER_CUBIC) | |
| frame_center_x = frame.shape[1] // 2 | |
| frame_center_y = frame.shape[0] // 2 | |
| crop_x = frame_center_x - img_size // 2 | |
| crop_y = frame_center_y - img_size // 2 | |
| frame = frame[crop_y:crop_y+img_size, crop_x:crop_x+img_size] | |
| all_frames.append(frame) | |
| frame_i += 1 | |
| cap.release() | |
| length = len(all_frames) | |
| period_lengths = np.zeros(len(all_frames) + seq_len + stride_length) | |
| periodicities = np.zeros(len(all_frames) + seq_len + stride_length) | |
| full_marks = np.zeros(len(all_frames) + seq_len + stride_length) | |
| event_type_logits = np.zeros((len(all_frames) + seq_len + stride_length, 7)) | |
| period_length_overlaps = np.zeros(len(all_frames) + seq_len + stride_length) | |
| event_type_logit_overlaps = np.zeros((len(all_frames) + seq_len + stride_length, 7)) | |
| for _ in range(seq_len + stride_length): # pad full sequence | |
| all_frames.append(all_frames[-1]) | |
| batch_list = [] | |
| idx_list = [] | |
| inference_futures = [] | |
| with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: | |
| for i in tqdm(range(0, length + stride_length - stride_pad, stride_length)): | |
| batch = all_frames[i:i + seq_len] | |
| Xlist = [] | |
| preprocess_tasks = [(idx, executor.submit(preprocess_image, img, img_size)) for idx, img in enumerate(batch)] | |
| for idx, future in sorted(preprocess_tasks, key=lambda x: x[0]): | |
| Xlist.append(future.result()) | |
| if len(Xlist) < seq_len: | |
| for _ in range(seq_len - len(Xlist)): | |
| Xlist.append(Xlist[-1]) | |
| X = torch.cat(Xlist) | |
| X *= 255 | |
| batch_list.append(X.unsqueeze(0)) | |
| idx_list.append(i) | |
| if len(batch_list) == batch_size: | |
| future = executor.submit(run_inference, batch_list) | |
| inference_futures.append((batch_list, idx_list, future)) | |
| batch_list = [] | |
| idx_list = [] | |
| # Process any remaining batches | |
| if batch_list: | |
| while len(batch_list) != batch_size: | |
| batch_list.append(batch_list[-1]) | |
| idx_list.append(idx_list[-1]) | |
| future = executor.submit(run_inference, batch_list) | |
| inference_futures.append((batch_list, idx_list, future)) | |
| # Collect and process the inference results | |
| for batch_list, idx_list, future in inference_futures: | |
| outputs = future.result() | |
| y1_out = outputs[0] | |
| y2_out = outputs[1] | |
| y3_out = outputs[2] | |
| y4_out = outputs[3] | |
| for y1, y2, y3, y4, idx in zip(y1_out, y2_out, y3_out, y4_out, idx_list): | |
| periodLength = y1.squeeze() | |
| periodicity = y2.squeeze() | |
| marks = y3.squeeze() | |
| event_type = y4.squeeze() | |
| period_lengths[idx:idx+seq_len] += periodLength | |
| periodicities[idx:idx+seq_len] += periodicity | |
| full_marks[idx:idx+seq_len] += marks | |
| event_type_logits[idx:idx+seq_len] += event_type | |
| period_length_overlaps[idx:idx+seq_len] += 1 | |
| event_type_logit_overlaps[idx:idx+seq_len] += 1 | |
| periodLength = np.divide(period_lengths, period_length_overlaps, where=period_length_overlaps!=0)[:length] | |
| periodicity = np.divide(periodicities, period_length_overlaps, where=period_length_overlaps!=0)[:length] | |
| full_marks = np.divide(full_marks, period_length_overlaps, where=period_length_overlaps!=0)[:length] | |
| per_frame_event_type_logits = np.divide(event_type_logits, event_type_logit_overlaps, where=event_type_logit_overlaps!=0)[:length] | |
| event_type_logits = np.mean(per_frame_event_type_logits, axis=0) | |
| # softmax of event type logits | |
| event_type_probs = np.exp(event_type_logits) / np.sum(np.exp(event_type_logits)) | |
| per_frame_event_types = np.argmax(per_frame_event_type_logits, axis=1) | |
| if median_pred_filter: | |
| periodicity = medfilt(periodicity, 5) | |
| periodLength = medfilt(periodLength, 5) | |
| periodicity = sigmoid(periodicity) | |
| full_marks = sigmoid(full_marks) | |
| pred_marks_peaks, _ = find_peaks(full_marks, distance=3, height=marks_threshold) | |
| full_marks_mask = np.zeros(len(full_marks)) | |
| full_marks_mask[pred_marks_peaks] = 1 | |
| periodicity_mask = np.int32(periodicity > miss_threshold) | |
| numofReps = 0 | |
| count = [] | |
| for i in range(len(periodLength)): | |
| if periodLength[i] < 2 or periodicity_mask[i] == 0: | |
| numofReps += 0 | |
| elif full_marks_mask[i]: # high confidence mark detected | |
| if math.modf(numofReps)[0] < 0.2: # probably false positive/late detection | |
| numofReps = float(int(numofReps)) | |
| else: | |
| numofReps = float(int(numofReps) + 1.01) # round up | |
| else: | |
| numofReps += max(0, periodicity_mask[i]/(periodLength[i])) | |
| count.append(round(float(numofReps), 2)) | |
| count_pred = count[-1] | |
| marks_count_pred = 0 | |
| for i in range(len(full_marks) - 1): | |
| # if a jump was counted, and periodicity is high, and the next frame was not counted (to avoid double counting) | |
| if full_marks_mask[i] > 0 and periodicity_mask[i] > 0 and full_marks_mask[i + 1] == 0: | |
| marks_count_pred += 1 | |
| if not both_feet: | |
| count_pred = count_pred / 2 | |
| marks_count_pred = marks_count_pred / 2 | |
| count = np.array(count) / 2 | |
| try: | |
| confidence = (np.mean(periodicity[periodicity > miss_threshold]) - miss_threshold) / (1 - miss_threshold) | |
| except ZeroDivisionError: | |
| confidence = 0 | |
| self_err = abs(count_pred - marks_count_pred) | |
| try: | |
| self_pct_err = self_err / count_pred | |
| except ZeroDivisionError: | |
| self_pct_err = 0 | |
| total_confidence = confidence * (1 - self_pct_err) | |
| if both_feet: | |
| count_msg = f"## Reps Count (both feet): {count_pred:.1f}, Confidence: {total_confidence:.2f}" | |
| else: | |
| count_msg = f"## Predicted Count (one foot): {count_pred:.1f}, Confidence: {total_confidence:.2f}" | |
| if api_call: | |
| if count_only_api: | |
| return f"{count_pred:.2f} (conf: {total_confidence:.2f})" | |
| else: | |
| return np.array2string(periodLength, formatter={'float_kind':lambda x: "%.2f" % x}).replace('\n', ''), \ | |
| np.array2string(periodicity, formatter={'float_kind':lambda x: "%.2f" % x}).replace('\n', ''), \ | |
| np.array2string(full_marks, formatter={'float_kind':lambda x: "%.2f" % x}).replace('\n', ''), \ | |
| f"reps: {count_pred:.2f}, marks: {marks_count_pred:.1f}, confidence: {total_confidence:.2f}", \ | |
| f"single_rope_speed: {event_type_probs[0]:.3f}, double_dutch: {event_type_probs[1]:.3f}, double_unders: {event_type_probs[2]:.3f}, single_bounce: {event_type_probs[3]:.3f}" | |
| jumps_per_second = np.clip(1 / ((periodLength / fps) + 0.01), 0, 10) | |
| jumping_speed = np.copy(jumps_per_second) | |
| misses = periodicity < miss_threshold | |
| jumps_per_second[misses] = 0 | |
| frame_type = np.array(['miss' if miss else 'frame' for miss in misses]) | |
| frame_type[full_marks > marks_threshold] = 'jump' | |
| per_frame_event_types = np.clip(per_frame_event_types, 0, 6) / 6 | |
| df = pd.DataFrame.from_dict({'period length': periodLength, | |
| 'jumping speed': jumping_speed, | |
| 'jumps per second': jumps_per_second, | |
| 'periodicity': periodicity, | |
| 'miss': misses, | |
| 'frame_type': frame_type, | |
| 'event_type': per_frame_event_types, | |
| 'jumps': full_marks, | |
| 'jumps_size': (full_marks + 0.05) * 10, | |
| 'miss_size': np.clip((1 - periodicity) * 0.9 + 0.1, 1, 8), | |
| 'seconds': np.linspace(0, seconds, num=len(periodLength))}) | |
| event_type_tick_vals = np.linspace(0, 1, num=7) | |
| event_type_colors = ['red', 'orange', 'green', 'blue', 'purple', 'pink', 'black'] | |
| fig = px.scatter(data_frame=df, | |
| x='seconds', | |
| y='jumps per second', | |
| #symbol='frame_type', | |
| #symbol_map={'frame': 'circle', 'miss': 'circle-open', 'jump': 'triangle-down'}, | |
| color='event_type', | |
| size='jumps_size', | |
| size_max=8, | |
| color_continuous_scale=[(t, c) for t, c in zip(event_type_tick_vals, event_type_colors)], | |
| range_color=(0,1), | |
| title="Jumping speed (jumps-per-second)", | |
| trendline='rolling', | |
| trendline_options=dict(window=16), | |
| trendline_color_override="goldenrod", | |
| trendline_scope='overall', | |
| template="plotly_dark") | |
| fig.update_layout(legend=dict( | |
| orientation="h", | |
| yanchor="bottom", | |
| y=0.98, | |
| xanchor="right", | |
| x=1, | |
| font=dict( | |
| family="Courier", | |
| size=12, | |
| color="black" | |
| ), | |
| bgcolor="AliceBlue", | |
| ), | |
| paper_bgcolor='rgba(0,0,0,0)', | |
| plot_bgcolor='rgba(0,0,0,0)' | |
| ) | |
| # remove white outline from marks | |
| fig.update_traces(marker_line_width = 0) | |
| fig.update_layout(coloraxis_colorbar=dict( | |
| tickvals=event_type_tick_vals, | |
| ticktext=['single<br>rope', 'double<br>dutch', 'double<br>unders', 'single<br>bounces', 'double<br>bounces', 'triple<br>unders', 'other'], | |
| title='event type' | |
| )) | |
| hist = px.histogram(df, | |
| x="jumps per second", | |
| template="plotly_dark", | |
| marginal="box", | |
| histnorm='percent', | |
| title="Distribution of jumping speed (jumps-per-second)") | |
| # make a bar plot of the event type distribution | |
| bar = px.bar(x=['single rope', 'double dutch', 'double unders', 'single bounces', 'double bounces', 'triple unders', 'other'], | |
| y=event_type_probs, | |
| template="plotly_dark", | |
| title="Event Type Distribution", | |
| labels={'x': 'event type', 'y': 'probability'}, | |
| range_y=[0, 1]) | |
| return count_msg, fig, hist, bar | |
| DESCRIPTION = '# NextJump 🦘' | |
| DESCRIPTION += '\n## AI Counting for Competitive Jump Rope' | |
| DESCRIPTION += '\nDemo created by [Dylan Plummer](https://dylan-plummer.github.io/). Check out the [NextJump iOS app](https://apps.apple.com/us/app/nextjump-jump-rope-counter/id6451026115).' | |
| with gr.Blocks() as demo: | |
| gr.Markdown(DESCRIPTION) | |
| # in_video = gr.PlayableVideo(label="Input Video", elem_id='input-video', format='mp4', | |
| # width=400, height=400, interactive=True, container=True, | |
| # max_length=150) | |
| with gr.Row(): | |
| with gr.Column(min_width=480): | |
| video = gr.Video(label="Video Clip", elem_id='output-video', format='mp4', width=400, height=400) | |
| with gr.Row(): | |
| run_button = gr.Button(value="Run", elem_id='run-button', scale=1) | |
| api_dummy_button = gr.Button(value="Run (No Viz)", elem_id='count-only', visible=False, scale=2) | |
| count_only = gr.Checkbox(label="Count Only", visible=False) | |
| api_token = gr.Textbox(label="API Key", elem_id='api-token', visible=False) | |
| with gr.Column(elem_id='output-video-container'): | |
| with gr.Row(): | |
| with gr.Column(): | |
| out_text = gr.Markdown(label="Predicted Count", elem_id='output-text') | |
| period_length = gr.Textbox(label="Period Length", elem_id='period-length', visible=False) | |
| periodicity = gr.Textbox(label="Periodicity", elem_id='periodicity', visible=False) | |
| with gr.Row(): | |
| out_plot = gr.Plot(label="Jumping Speed", elem_id='output-plot') | |
| with gr.Row(): | |
| with gr.Column(): | |
| out_hist = gr.Plot(label="Speed Histogram", elem_id='output-hist') | |
| with gr.Column(): | |
| out_event_type_dist = gr.Plot(label="Event Type Distribution", elem_id='output-event-type-dist') | |
| demo_inference = partial(inference, count_only_api=False, api_key=None) | |
| run_button.click(demo_inference, [video], outputs=[out_text, out_plot, out_hist, out_event_type_dist]) | |
| api_inference = partial(inference, api_call=True) | |
| api_dummy_button.click(api_inference, [video, count_only, api_token], outputs=[period_length], api_name='inference') | |
| if __name__ == "__main__": | |
| demo.queue(api_open=True, max_size=15).launch(share=False, pwa=True) |