Single-Rope-Contest

Running

File size: 19,484 Bytes

8fe9a5a
 
 
 
 
 
 
 
 
 
 
 
 
 
d192993
8fe9a5a
 
 
 
 
 
 
 
 
 
2fe3d36
 
8fe9a5a
 
2fe3d36
 
 
 
 
 
57925f4
8fe9a5a
2fe3d36
 
 
 
 
 
 
 
 
 
 
 
 
8fe9a5a
 
43f7645
 
 
 
 
 
 
 
 
8fe9a5a
43f7645
 
8fe9a5a
 
f80d9b2
8fe9a5a
d192993
 
 
 
 
 
 
8fe9a5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43f7645
8fe9a5a
 
 
 
 
 
08cf343
796d94e
8fe9a5a
796d94e
8fe9a5a
 
 
 
 
 
 
 
 
43f7645
 
 
 
 
 
 
 
 
8fe9a5a
 
 
 
 
 
43f7645
8fe9a5a
 
 
 
 
 
 
 
 
 
 
 
2fe3d36
 
 
 
 
08cf343
8fe9a5a
 
08cf343
43f7645
8fe9a5a
 
08cf343
43f7645
8fe9a5a
43f7645
8fe9a5a
 
 
 
 
 
 
2fe3d36
 
 
 
 
08cf343
8fe9a5a
 
08cf343
43f7645
8fe9a5a
 
08cf343
43f7645
8fe9a5a
43f7645
8fe9a5a
 
 
08cf343
cf3f9fd
 
43f7645
 
cf3f9fd
98fee40
8fe9a5a
 
319f52e
8fe9a5a
08cf343
 
8fe9a5a
 
 
 
 
 
 
 
 
 
08cf343
 
 
 
 
8fe9a5a
 
08cf343
8fe9a5a
 
319f52e
 
 
 
 
8fe9a5a
319f52e
8fe9a5a
319f52e
8fe9a5a
98fee40
 
e3ec7f6
98fee40
 
43f7645
08cf343
e3ec7f6
43f7645
98fee40
 
08cf343
8fe9a5a
 
 
319f52e
 
796d94e
8fe9a5a
 
 
 
 
319f52e
cf3f9fd
08cf343
ab0b18f
8fe9a5a
 
c843211
 
8fe9a5a
 
 
cf3f9fd
 
 
08cf343
 
c843211
4b5adda
8fe9a5a
 
08cf343
8fe9a5a
 
 
ab0b18f
8fe9a5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547e0cb
c843211
547e0cb
 
 
8fe9a5a
 
 
 
 
 
3398cf4
43f7645
 
 
796d94e
43f7645
 
 
 
 
8fe9a5a
43f7645
8fe9a5a
 
08cf343
8fe9a5a
 
 
 
319f52e
8fe9a5a
2fe3d36
98fee40
 
8494608
 
98fee40
d192993
8fe9a5a
 
 
 
 
 
 
0f5c629
8fe9a5a
43f7645
 
 
 
 
 
 
98fee40
8fe9a5a
 
ab0b18f
8fe9a5a
 
 
 
 
 
 
 
 
ab0b18f
 
8fe9a5a
 
b2bf29b
 
8fe9a5a
ab0b18f
 
 
08775f1
 
ab0b18f
08775f1
ab0b18f
97b70eb
 
 
cf3f9fd
8fe9a5a
98fee40
43f7645
b2bf29b
 
43f7645
8fe9a5a
d192993
8fe9a5a

import gradio as gr
import numpy as np
from PIL import Image
from openvino.runtime import Core
import os
import cv2
import uuid
import time
import subprocess
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from scipy.signal import medfilt
from functools import partial
from passlib.hash import pbkdf2_sha256
from tqdm import tqdm
import pandas as pd
import plotly.express as px
import torch
from torchvision import transforms
import torchvision.transforms.functional as F

from huggingface_hub import hf_hub_download
from huggingface_hub import HfApi



plt.style.use('dark_background')

checkpoint = hf_hub_download(repo_id="dylanplummer/ropenet", filename="ropenet_keypoint_0.pt", repo_type="model", token=os.environ['DATASET_SECRET'])
model_file = checkpoint = hf_hub_download(repo_id="dylanplummer/ropenet", filename="model.py", repo_type="model", token=os.environ['DATASET_SECRET'])
os.move(model_file, "model.py")
from model import RepNet
# model_xml = hf_hub_download(repo_id="dylanplummer/ropenet", filename="model.xml", repo_type="model", token=os.environ['DATASET_SECRET'])
# hf_hub_download(repo_id="dylanplummer/ropenet", filename="model.mapping", repo_type="model", token=os.environ['DATASET_SECRET'])
#model_xml = "model_ir/model.xml"

# ie = Core()
# model_ir = ie.read_model(model=model_xml)
# config = {"PERFORMANCE_HINT": "LATENCY"}
# compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU", config=config)

img_size = 224
backbone = 'mobilenetv3'
embedding_size = 196
n_layers_lstm = 1
separate_rope = False
save_realtime = False
model = RepNet(64, backbone=backbone, backbone_scale='0', trainable_backbone=False, distill_frame_model=save_realtime, img_size=img_size, embedding_size=embedding_size, separate_rope=separate_rope)



class SquarePad:
    # https://discuss.pytorch.org/t/how-to-resize-and-pad-in-a-torchvision-transforms-compose/71850/9
	def __call__(self, image):
		w, h = image.size
		max_wh = max(w, h)
		hp = int((max_wh - w) / 2)
		vp = int((max_wh - h) / 2)
		padding = (hp, vp, hp, vp)
		return F.pad(image, padding, 0, 'constant')

def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def inference(x, count_only_api, api_key, img_size=192, seq_len=64, stride_length=32, stride_pad=3, batch_size=4, miss_threshold=0.8, marks_threshold=0.6, median_pred_filter=True, center_crop=True, both_feet=True, api_call=False):
    print(x)
    #api = HfApi(token=os.environ['DATASET_SECRET'])
    #out_file = str(uuid.uuid1())
    has_access = False
    if api_call:
        has_access = pbkdf2_sha256.verify(os.environ['DEV_API_TOKEN'], api_key)
        if not has_access:
            return "Invalid API Key"
        
    cap = cv2.VideoCapture(x)
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    period_length_overlaps = np.zeros(length + seq_len)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    seconds = length / fps
    all_frames = []
    frame_i = 1
    while cap.isOpened():
        ret, frame = cap.read()
        if ret is False:
            frame = all_frames[-1]  # padding will be with last frame
            break
        frame = cv2.cvtColor(np.uint8(frame), cv2.COLOR_BGR2RGB)
        img = Image.fromarray(frame)
        all_frames.append(img)
        frame_i += 1
    cap.release()

    length = len(all_frames)
    period_lengths = np.zeros(len(all_frames) + seq_len + stride_length)
    periodicities = np.zeros(len(all_frames) + seq_len + stride_length)
    full_marks = np.zeros(len(all_frames) + seq_len + stride_length)
    event_type_logits = np.zeros((len(all_frames) + seq_len + stride_length, 7))
    period_length_overlaps = np.zeros(len(all_frames) + seq_len + stride_length)
    event_type_logit_overlaps = np.zeros((len(all_frames) + seq_len + stride_length, 7))
    for _ in range(seq_len + stride_length):  # pad full sequence
        all_frames.append(all_frames[-1])
    batch_list = []
    idx_list = []
    for i in tqdm(range(0, length + stride_length - stride_pad, stride_length)):
        batch = all_frames[i:i + seq_len]
        Xlist = []
        for img in batch:
            transforms_list = []
            # if center_crop:
            #     if width > height:
            #         transforms_list.append(transforms.Resize((int(width / (height / img_size)), img_size)))
            #     else:
            #         transforms_list.append(transforms.Resize((img_size, int(height / (width / img_size)))))
            #     transforms_list.append(transforms.CenterCrop((img_size, img_size)))
            # else:
            transforms_list.append(SquarePad())
            transforms_list.append(transforms.Resize((img_size, img_size)))
            

            transforms_list += [
                transforms.ToTensor()]
                #transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]
            preprocess = transforms.Compose(transforms_list)
            frameTensor = preprocess(img).unsqueeze(0)
            Xlist.append(frameTensor)

        if len(Xlist) < seq_len:
            for _ in range(seq_len - len(Xlist)):
                Xlist.append(Xlist[-1])
        
        X = torch.cat(Xlist)
        X *= 255
        batch_list.append(X.unsqueeze(0))
        idx_list.append(i)
        if len(batch_list) == batch_size:
            batch_X = torch.cat(batch_list)
            result = model(batch_X)
            y1pred = result[0]
            y2pred = result[1]
            y3pred = result[2]
            y4pred = result[3]
            for y1, y2, y3, y4, idx in zip(y1pred, y2pred, y3pred, y4pred, idx_list):
                periodLength = y1.squeeze()
                periodicity = y2.squeeze()
                marks = y3.squeeze()
                event_type = y4.squeeze()
                period_lengths[idx:idx+seq_len] += periodLength
                periodicities[idx:idx+seq_len] += periodicity
                full_marks[idx:idx+seq_len] += marks
                event_type_logits[idx:idx+seq_len] += event_type
                period_length_overlaps[idx:idx+seq_len] += 1
                event_type_logit_overlaps[idx:idx+seq_len] += 1
            batch_list = []
            idx_list = []
    if len(batch_list) != 0:  # still some leftover frames
        while len(batch_list) != batch_size:
            batch_list.append(batch_list[-1])
            idx_list.append(idx_list[-1])
        batch_X = torch.cat(batch_list)
        result = model(batch_X)
        y1pred = result[0]
        y2pred = result[1]
        y3pred = result[2]
        y4pred = result[3]
        for y1, y2, y3, y4, idx in zip(y1pred, y2pred, y3pred, y4pred, idx_list):
            periodLength = y1.squeeze()
            periodicity = y2.squeeze()
            marks = y3.squeeze()
            event_type = y4.squeeze()
            period_lengths[idx:idx+seq_len] += periodLength
            periodicities[idx:idx+seq_len] += periodicity
            full_marks[idx:idx+seq_len] += marks
            event_type_logits[idx:idx+seq_len] += event_type
            period_length_overlaps[idx:idx+seq_len] += 1
            event_type_logit_overlaps[idx:idx+seq_len] += 1
            
    periodLength = np.divide(period_lengths, period_length_overlaps, where=period_length_overlaps!=0)[:length]
    periodicity = np.divide(periodicities, period_length_overlaps, where=period_length_overlaps!=0)[:length]
    full_marks = np.divide(full_marks, period_length_overlaps, where=period_length_overlaps!=0)[:length]
    per_frame_event_type_logits = np.divide(event_type_logits, event_type_logit_overlaps, where=event_type_logit_overlaps!=0)[:length]
    event_type_logits = np.mean(per_frame_event_type_logits, axis=0)
    # softmax of event type logits  
    event_type_probs = np.exp(event_type_logits) / np.sum(np.exp(event_type_logits))
    per_frame_event_types = np.argmax(per_frame_event_type_logits, axis=1)
    
    if median_pred_filter:
        periodicity = medfilt(periodicity, 5)
        periodLength = medfilt(periodLength, 5)
    periodicity = sigmoid(periodicity)
    full_marks = sigmoid(full_marks)
    full_marks_mask = np.int32(full_marks > marks_threshold)
    periodicity_mask = np.int32(periodicity > miss_threshold)
    numofReps = 0
    count = []
    for i in range(len(periodLength)):
        if periodLength[i] < 2 or periodicity_mask[i] == 0:
            numofReps += 0
        else:
            numofReps += max(0, periodicity_mask[i]/(periodLength[i]))
        count.append(round(float(numofReps), 2))
    count_pred = count[-1]
    marks_count_pred = 0
    for i in range(len(full_marks) - 1):
        # if a jump was counted, and periodicity is high, and the next frame was not counted (to avoid double counting)
        if full_marks_mask[i] > 0 and periodicity_mask[i] > 0 and full_marks_mask[i + 1] == 0:
            marks_count_pred += 1
    if not both_feet:
        count_pred = count_pred / 2
        marks_count_pred = marks_count_pred / 2
        count = np.array(count) / 2

    confidence = (np.mean(periodicity[periodicity > miss_threshold]) - miss_threshold) / (1 - miss_threshold)
    self_err = abs(count_pred - marks_count_pred)
    self_pct_err = self_err / count_pred
    total_confidence = confidence * (1 - self_pct_err)

    if both_feet:
        count_msg = f"## Reps Count (both feet): {count_pred:.1f}, Marks Count (both feet): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}"
    else:
        count_msg = f"## Predicted Count (one foot): {count_pred:.1f}, Marks Count (one foot): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}"

    if api_call:
        if count_only_api:
            return f"{count_pred:.2f} (conf: {total_confidence:.2f})"
        else:
            return np.array2string(periodLength, formatter={'float_kind':lambda x: "%.2f" % x}).replace('\n', ''), \
                np.array2string(periodicity, formatter={'float_kind':lambda x: "%.2f" % x}).replace('\n', ''), \
                np.array2string(full_marks, formatter={'float_kind':lambda x: "%.2f" % x}).replace('\n', ''), \
                f"reps: {count_pred:.2f}, marks: {marks_count_pred:.1f}, confidence: {total_confidence:.2f}", \
                f"single_rope_speed: {event_type_probs[0]:.3f}, double_dutch: {event_type_probs[1]:.3f}, double_unders: {event_type_probs[2]:.3f}, single_bounce: {event_type_probs[3]:.3f}"
   

    jumps_per_second = np.clip(1 / ((periodLength / fps) + 0.01), 0, 10)
    jumping_speed = np.copy(jumps_per_second)
    misses = periodicity < miss_threshold
    jumps_per_second[misses] = 0
    frame_type = np.array(['miss' if miss else 'frame' for miss in misses])
    frame_type[full_marks > marks_threshold] = 'jump'
    per_frame_event_types = np.clip(per_frame_event_types, 0, 7) / 7
    df = pd.DataFrame.from_dict({'period length': periodLength, 
                                 'jumping speed': jumping_speed,
                                'jumps per second': jumps_per_second,
                                'periodicity': periodicity,
                                'miss': misses,
                                'frame_type': frame_type,
                                'event_type': per_frame_event_types,
                                'jumps': full_marks,
                                'jumps_size': (full_marks + 0.05) * 10,
                                'miss_size': np.clip((1 - periodicity) * 0.9 + 0.1, 1, 10),
                                'seconds': np.linspace(0, seconds, num=len(periodLength))})
    event_type_tick_vals = np.linspace(0, 1, num=7)
    event_type_colors = ['red', 'orange', 'green', 'blue', 'purple', 'pink', 'black']
    fig = px.scatter(data_frame=df,
                    x='seconds', 
                    y='jumps per second',
                    #symbol='frame_type',
                    #symbol_map={'frame': 'circle', 'miss': 'circle-open', 'jump': 'triangle-down'},
                    color='event_type',
                    size='jumps_size',
                    size_max=10,
                    color_continuous_scale=[(t, c) for t, c in zip(event_type_tick_vals, event_type_colors)],
                    range_color=(0,1),
                    title="Jumping speed (jumps-per-second)",
                    trendline='rolling',
                    trendline_options=dict(window=16),
                    trendline_color_override="goldenrod",
                    trendline_scope='overall',
                    template="plotly_dark")
    
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        y=0.98,
        xanchor="right",
        x=1,
        font=dict(
            family="Courier",
            size=12,
            color="black"
            ),
        bgcolor="AliceBlue",
    ),
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)'
    )
    fig.update_layout(coloraxis_colorbar=dict(
        tickvals=event_type_tick_vals,
        ticktext=['single rope speed', 'double dutch', 'double unders', 'single bounces', 'double bounces', 'triple unders', 'other'],
        title='event type'
    ))

    hist = px.histogram(df, 
                        x="jumps per second", 
                        template="plotly_dark", 
                        marginal="box",
                        histnorm='percent',
                        title="Distribution of jumping speed (jumps-per-second)")
    
    # make a bar plot of the event type distribution

    bar = px.bar(x=['single rope speed', 'double dutch', 'double unders', 'single bounces', 'double bounces', 'triple unders', 'other'], 
                 y=event_type_probs,
                 template="plotly_dark",
                 title="Event Type Distribution",
                 labels={'x': 'event type', 'y': 'probability'},
                 range_y=[0, 1])

    return count_msg, fig, hist, bar
        

DESCRIPTION = '# NextJump 🦘'
DESCRIPTION += '\n## AI Counting for Competitive Jump Rope'
DESCRIPTION += '\nDemo created by [Dylan Plummer](https://dylan-plummer.github.io/). Check out the [NextJump iOS app](https://apps.apple.com/us/app/nextjump-jump-rope-counter/id6451026115).'


with gr.Blocks(theme='WeixuanYuan/Soft_dark') as demo:
    gr.Markdown(DESCRIPTION)
    in_video = gr.PlayableVideo(label="Input Video", elem_id='input-video', format='mp4', width=400, height=400, interactive=True, container=True)
            
    with gr.Row():
        run_button = gr.Button(value="Run", elem_id='run-button', scale=1)
        api_dummy_button = gr.Button(value="Run (No Viz)", elem_id='count-only', visible=False, scale=2)
        count_only = gr.Checkbox(label="Count Only", visible=False)
        api_token = gr.Textbox(label="API Key", elem_id='api-token', visible=False)

    with gr.Column(elem_id='output-video-container'):
        with gr.Row():
            with gr.Column():
                out_text = gr.Markdown(label="Predicted Count", elem_id='output-text')
                period_length = gr.Textbox(label="Period Length", elem_id='period-length', visible=False)
                periodicity = gr.Textbox(label="Periodicity", elem_id='periodicity', visible=False)
            #with gr.Column(min_width=480):
                #out_video = gr.PlayableVideo(label="Output Video", elem_id='output-video', format='mp4')
        with gr.Row():
            out_plot = gr.Plot(label="Jumping Speed", elem_id='output-plot')
        with gr.Row():
            with gr.Column():
                out_hist = gr.Plot(label="Speed Histogram", elem_id='output-hist')
            with gr.Column():
                out_event_type_dist = gr.Plot(label="Event Type Distribution", elem_id='output-event-type-dist')
              
    with gr.Accordion(label="Instructions and more information", open=False):
        instructions = "## Instructions:"
        instructions += "\n* Upload a video and click 'Run' to get a prediction of the number of jumps (either one foot, or both). This could take a couple minutes!"
        instructions += "\n\n## Tips (optional):"
        instructions += "\n* Trim the video to start and end of the event"
        instructions += "\n* Frame the jumper fully, in the center of the frame"
        instructions += "\n* Videos are automatically resized, so higher resolution will not help, but a closer framing of the jumper might help. Try cropping the video differently."
        gr.Markdown(instructions)

        faq = "## FAQ:"
        faq += "\n* **Q:** Does the model recognize misses?\n    * **A:** Yes, but if it fails, you can try tuning the miss threshold slider to make it more sensitive."
        faq += "\n* **Q:** Does the model recognize double dutch?\n    * **A:** Yes, but it is trained on a smaller set of double dutch videos, so it may not work perfectly."
        faq += "\n* **Q:** Does the model recognize double unders\n    * **A:** Yes, but it is trained on a smaller set of double under videos, so it may not work perfectly. It is also trained to count the rope, not the jumps so you will need to divide the count by 2 to get the traditional double under count."
        faq += "\n* **Q:** Does the model count both feet?\n    * **A:** Yes, it counts every time the rope goes around no matter the event."
        gr.Markdown(faq)

    demo_inference = partial(inference, count_only_api=False, api_key=None)

    gr.Examples(examples=[
                        [os.path.join(os.path.dirname(__file__), "files", "dylan.mp4")],
                        [os.path.join(os.path.dirname(__file__), "files", "train14.mp4")],
                        [os.path.join(os.path.dirname(__file__), "files", "train_17.mp4")],
                        #[os.path.join(os.path.dirname(__file__), "files", "train13.mp4")],
                        #[os.path.join(os.path.dirname(__file__), "files", "train_213.mp4")],
                        [os.path.join(os.path.dirname(__file__), "files", "train_156.mp4")],
                        #[os.path.join(os.path.dirname(__file__), "files", "train_202.mp4")],
                        [os.path.join(os.path.dirname(__file__), "files", "train_57.mp4")],
                        [os.path.join(os.path.dirname(__file__), "files", "train_95.mp4")],
                        [os.path.join(os.path.dirname(__file__), "files", "train_253.mp4")],
                        [os.path.join(os.path.dirname(__file__), "files", "train_66.mp4")],
                        [os.path.join(os.path.dirname(__file__), "files", "train_21.mp4")]
                    ],
                inputs=[in_video],
                outputs=[out_text, out_plot, out_hist, out_event_type_dist],
                fn=demo_inference, cache_examples=os.getenv('SYSTEM') == 'spaces')
    
    run_button.click(demo_inference, [in_video], outputs=[out_text, out_plot, out_hist, out_event_type_dist])
    api_inference = partial(inference, api_call=True)
    api_dummy_button.click(api_inference, [in_video, count_only, api_token], outputs=[period_length], api_name='inference')


if __name__ == "__main__":
    demo.queue(api_open=True, max_size=15).launch(share=False)