Single-Rope-Contest

Running

App Files Files Community

dylan-plummer commited on Feb 18, 2025

Commit

2777f96

1 Parent(s): 3769484

updates after trials

Browse files

Files changed (2) hide show

app.py +46 -32
hls_download.py +9 -22

app.py CHANGED Viewed

@@ -28,31 +28,26 @@ from hls_download import download_clips
 plt.style.use('dark_background')
 IMG_SIZE = 256
 onnx_file = hf_hub_download(repo_id="dylanplummer/ropenet", filename="nextjump.onnx", repo_type="model", token=os.environ['DATASET_SECRET'])
 if torch.cuda.is_available():
     providers = [("CUDAExecutionProvider", {"device_id": torch.cuda.current_device(),
                                             "user_compute_stream": str(torch.cuda.current_stream().cuda_stream)})]
     sess_options = ort.SessionOptions()
     ort_sess = ort.InferenceSession(onnx_file, sess_options=sess_options, providers=providers)
 else:
     ort_sess = ort.InferenceSession(onnx_file)
 # warmup inference
 ort_sess.run(None, {'video': np.zeros((4, 64, 3, IMG_SIZE, IMG_SIZE), dtype=np.float32)})
-class SquarePad:
-    # https://discuss.pytorch.org/t/how-to-resize-and-pad-in-a-torchvision-transforms-compose/71850/9
-	def __call__(self, image):
-		w, h = image.size
-		max_wh = max(w, h)
-		hp = int((max_wh - w) / 2)
-		vp = int((max_wh - h) / 2)
-		padding = (hp, vp, hp, vp)
-		return F.pad(image, padding, 0, 'constant')
 def square_pad_opencv(image):
     h, w = image.shape[:2]
@@ -71,6 +66,7 @@ def preprocess_image(img, img_size):
     preprocess = transforms.Compose(transforms_list)
     return preprocess(img).unsqueeze(0)
 def run_inference(batch_X):
     batch_X = torch.cat(batch_X)
     return ort_sess.run(None, {'video': batch_X.numpy()})
@@ -80,11 +76,15 @@ def sigmoid(x):
     return 1 / (1 + np.exp(-x))
-def detect_beeps(video_path, event_length=30):
     reference_file = 'beep.WAV'
     fs, beep = wavfile.read(reference_file)
     beep = beep[:, 0] + beep[:, 1]  # combine stereo to mono
     video = cv2.VideoCapture(video_path)
     audio_convert_command = f'ffmpeg -i {video_path} -vn -acodec pcm_s16le -ar {fs} -ac 2 temp.wav'
     subprocess.call(audio_convert_command, shell=True)
     length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
@@ -94,11 +94,6 @@ def detect_beeps(video_path, event_length=30):
     corr = correlate(audio, beep, mode='same') / audio.size
     # min max scale to -1, 1
     corr = 2 * (corr - np.min(corr)) / (np.max(corr) - np.min(corr)) - 1
-    # top_q = np.max(corr) - 0.1
-    # mean = np.mean(corr)
-    # print(top_q, mean)
-    beep_height = 0.8
     event_start = length
     while length - event_start < fps * event_length:
         peaks, _ = find_peaks(corr, height=beep_height, distance=fs)
@@ -123,12 +118,12 @@ def detect_beeps(video_path, event_length=30):
 def inference(stream_url, start_time, end_time, beep_detection_on, event_length, count_only_api, api_key,
-              img_size=256, seq_len=64, stride_length=32, stride_pad=3, batch_size=4, resize_factor=0.5, min_size=300, force_30_fps=True,
-              miss_threshold=0.8, marks_threshold=0.5, median_pred_filter=True, center_crop=True, both_feet=True,
               api_call=False,
               progress=gr.Progress()):
     progress(0, desc="Downloading clip...")
-    in_video = download_clips(stream_url, os.getcwd(), start_time, end_time)
     progress(0, desc="Running inference...")
     has_access = False
     if api_call:
@@ -150,7 +145,6 @@ def inference(stream_url, start_time, end_time, beep_detection_on, event_length,
     seconds = length / fps
     all_frames = []
     frame_i = 0
-    #resize_size = int(max(frame_width, frame_height) * 0.4)
     resize_amount = max((img_size + 64) / frame_width, (img_size + 64) / frame_height)
     while cap.isOpened():
         frame_i += 1
@@ -159,8 +153,6 @@ def inference(stream_url, start_time, end_time, beep_detection_on, event_length,
         if ret is False:
             frame = all_frames[-1]  # padding will be with last frame
             break
-        # if force_30_fps and fps != 30 and frame_i % 4 != 0:
-        #     continue
         frame = cv2.cvtColor(np.uint8(frame), cv2.COLOR_BGR2RGB)
         # add square padding with opencv
@@ -189,7 +181,7 @@ def inference(stream_url, start_time, end_time, beep_detection_on, event_length,
     batch_list = []
     idx_list = []
     inference_futures = []
-    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
         for i in range(0, length + stride_length - stride_pad, stride_length):
             batch = all_frames[i:i + seq_len]
             Xlist = []
@@ -237,6 +229,7 @@ def inference(stream_url, start_time, end_time, beep_detection_on, event_length,
                 event_type_logits[idx:idx+seq_len] += event_type
                 period_length_overlaps[idx:idx+seq_len] += 1
                 event_type_logit_overlaps[idx:idx+seq_len] += 1
     periodLength = np.divide(period_lengths, period_length_overlaps, where=period_length_overlaps!=0)[:length]
     periodicity = np.divide(periodicities, period_length_overlaps, where=period_length_overlaps!=0)[:length]
@@ -286,7 +279,11 @@ def inference(stream_url, start_time, end_time, beep_detection_on, event_length,
         marks_count_pred = marks_count_pred / 2
         count = np.array(count) / 2
     try:
-        confidence = (np.mean(periodicity[periodicity > miss_threshold]) - miss_threshold) / (1 - miss_threshold)
     except ZeroDivisionError:
         confidence = 0
     self_err = abs(count_pred - marks_count_pred)
@@ -299,15 +296,22 @@ def inference(stream_url, start_time, end_time, beep_detection_on, event_length,
     if both_feet:
         count_msg = f"## Reps Count (both feet): {count_pred:.1f}, Marks Count (both feet): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}"
     else:
-        count_msg = f"## Predicted Count (one foot): {count_pred:.1f}, Marks Count (one foot): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}"
     if api_call:
         if count_only_api:
             return f"{count_pred:.2f} (conf: {total_confidence:.2f})"
         else:
-            return np.array2string(periodLength, formatter={'float_kind':lambda x: "%.2f" % x}).replace('\n', ''), \
-                np.array2string(periodicity, formatter={'float_kind':lambda x: "%.2f" % x}).replace('\n', ''), \
-                np.array2string(full_marks, formatter={'float_kind':lambda x: "%.2f" % x}).replace('\n', ''), \
                 f"reps: {count_pred:.2f}, marks: {marks_count_pred:.1f}, confidence: {total_confidence:.2f}", \
                 f"single_rope_speed: {event_type_probs[0]:.3f}, double_dutch: {event_type_probs[1]:.3f}, double_unders: {event_type_probs[2]:.3f}, single_bounce: {event_type_probs[3]:.3f}"
@@ -393,7 +397,11 @@ def inference(stream_url, start_time, end_time, beep_detection_on, event_length,
                  title="Event Type Distribution",
                  labels={'x': 'event type', 'y': 'probability'},
                  range_y=[0, 1])
-    os.remove('temp.wav')
     return in_video, count_msg, fig, hist, bar
@@ -450,5 +458,11 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
-    demo.queue(api_open=True, max_size=15).launch(share=False)

 plt.style.use('dark_background')
+LOCAL = False
 IMG_SIZE = 256
+CACHE_API_CALLS = True
+os.makedirs(os.path.join(os.getcwd(), 'clips'), exist_ok=True)
 onnx_file = hf_hub_download(repo_id="dylanplummer/ropenet", filename="nextjump.onnx", repo_type="model", token=os.environ['DATASET_SECRET'])
 if torch.cuda.is_available():
+    print("Using CUDA")
     providers = [("CUDAExecutionProvider", {"device_id": torch.cuda.current_device(),
                                             "user_compute_stream": str(torch.cuda.current_stream().cuda_stream)})]
     sess_options = ort.SessionOptions()
+    #sess_options.log_severity_level = 0
     ort_sess = ort.InferenceSession(onnx_file, sess_options=sess_options, providers=providers)
 else:
+    print("Using CPU")
     ort_sess = ort.InferenceSession(onnx_file)
 # warmup inference
 ort_sess.run(None, {'video': np.zeros((4, 64, 3, IMG_SIZE, IMG_SIZE), dtype=np.float32)})
 def square_pad_opencv(image):
     h, w = image.shape[:2]
     preprocess = transforms.Compose(transforms_list)
     return preprocess(img).unsqueeze(0)
 def run_inference(batch_X):
     batch_X = torch.cat(batch_X)
     return ort_sess.run(None, {'video': batch_X.numpy()})
     return 1 / (1 + np.exp(-x))
+def detect_beeps(video_path, event_length=30, beep_height=0.8):
     reference_file = 'beep.WAV'
     fs, beep = wavfile.read(reference_file)
     beep = beep[:, 0] + beep[:, 1]  # combine stereo to mono
     video = cv2.VideoCapture(video_path)
+    try:
+        os.remove('temp.wav')
+    except FileNotFoundError:
+        pass
     audio_convert_command = f'ffmpeg -i {video_path} -vn -acodec pcm_s16le -ar {fs} -ac 2 temp.wav'
     subprocess.call(audio_convert_command, shell=True)
     length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
     corr = correlate(audio, beep, mode='same') / audio.size
     # min max scale to -1, 1
     corr = 2 * (corr - np.min(corr)) / (np.max(corr) - np.min(corr)) - 1
     event_start = length
     while length - event_start < fps * event_length:
         peaks, _ = find_peaks(corr, height=beep_height, distance=fs)
 def inference(stream_url, start_time, end_time, beep_detection_on, event_length, count_only_api, api_key,
+              img_size=256, seq_len=64, stride_length=32, stride_pad=3, batch_size=4,
+              miss_threshold=0.8, marks_threshold=0.5, median_pred_filter=True, both_feet=True,
               api_call=False,
               progress=gr.Progress()):
     progress(0, desc="Downloading clip...")
+    in_video = download_clips(stream_url, os.path.join(os.getcwd(), 'clips'), start_time, end_time)
     progress(0, desc="Running inference...")
     has_access = False
     if api_call:
     seconds = length / fps
     all_frames = []
     frame_i = 0
     resize_amount = max((img_size + 64) / frame_width, (img_size + 64) / frame_height)
     while cap.isOpened():
         frame_i += 1
         if ret is False:
             frame = all_frames[-1]  # padding will be with last frame
             break
         frame = cv2.cvtColor(np.uint8(frame), cv2.COLOR_BGR2RGB)
         # add square padding with opencv
     batch_list = []
     idx_list = []
     inference_futures = []
+    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
         for i in range(0, length + stride_length - stride_pad, stride_length):
             batch = all_frames[i:i + seq_len]
             Xlist = []
                 event_type_logits[idx:idx+seq_len] += event_type
                 period_length_overlaps[idx:idx+seq_len] += 1
                 event_type_logit_overlaps[idx:idx+seq_len] += 1
+            del y1_out, y2_out, y3_out, y4_out  # free up memory
     periodLength = np.divide(period_lengths, period_length_overlaps, where=period_length_overlaps!=0)[:length]
     periodicity = np.divide(periodicities, period_length_overlaps, where=period_length_overlaps!=0)[:length]
         marks_count_pred = marks_count_pred / 2
         count = np.array(count) / 2
     try:
+        periodicity_mask = periodicity > miss_threshold
+        if np.sum(periodicity_mask) == 0:
+            confidence = 0
+        else:
+            confidence = (np.mean(periodicity[periodicity > miss_threshold]) - miss_threshold) / (1 - miss_threshold)
     except ZeroDivisionError:
         confidence = 0
     self_err = abs(count_pred - marks_count_pred)
     if both_feet:
         count_msg = f"## Reps Count (both feet): {count_pred:.1f}, Marks Count (both feet): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}"
     else:
+        count_msg = f"## Reps Count (one foot): {count_pred:.1f}, Marks Count (one foot): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}"
     if api_call:
+        if CACHE_API_CALLS:
+            # write outputs as row of csv
+            with open('api_calls.tsv', 'a') as f:
+                periodicity_str = np.array2string(periodicity, formatter={'float_kind':lambda x: "%.2f" % x}, threshold=np.inf).replace('\n', '')
+                periodLength_str = np.array2string(periodLength, formatter={'float_kind':lambda x: "%.2f" % x}, threshold=np.inf).replace('\n', '')
+                full_marks_str = np.array2string(full_marks, formatter={'float_kind':lambda x: "%.2f" % x}, threshold=np.inf).replace('\n', '')
+                f.write(f"{stream_url}\t{start_time}\t{end_time}\t{beep_detection_on}\t{event_length}\t{periodicity_str}\t{periodLength_str}\t{full_marks_str}\t{count_pred}\t{total_confidence}\n")
         if count_only_api:
             return f"{count_pred:.2f} (conf: {total_confidence:.2f})"
         else:
+            return np.array2string(periodLength, formatter={'float_kind':lambda x: "%.2f" % x}, threshold=np.inf).replace('\n', ''), \
+                np.array2string(periodicity, formatter={'float_kind':lambda x: "%.2f" % x}, threshold=np.inf).replace('\n', ''), \
+                np.array2string(full_marks, formatter={'float_kind':lambda x: "%.2f" % x}, threshold=np.inf).replace('\n', ''), \
                 f"reps: {count_pred:.2f}, marks: {marks_count_pred:.1f}, confidence: {total_confidence:.2f}", \
                 f"single_rope_speed: {event_type_probs[0]:.3f}, double_dutch: {event_type_probs[1]:.3f}, double_unders: {event_type_probs[2]:.3f}, single_bounce: {event_type_probs[3]:.3f}"
                  title="Event Type Distribution",
                  labels={'x': 'event type', 'y': 'probability'},
                  range_y=[0, 1])
+    try:
+        os.remove('temp.wav')
+    except FileNotFoundError:
+        pass
     return in_video, count_msg, fig, hist, bar
 if __name__ == "__main__":
+    if LOCAL:
+        demo.queue(api_open=True, max_size=15).launch(server_name="0.0.0.0",
+                                                    server_port=7860,
+                                                    debug=False,
+                                                    ssl_verify=False,
+                                                    share=False)
+    else:
+        demo.queue(api_open=True, max_size=15).launch(share=False)

hls_download.py CHANGED Viewed

@@ -2,36 +2,23 @@ import subprocess
 import os
 def download_clips(stream_url, out_dir, start_time, end_time, resize=True):
     output_file = os.path.join(out_dir, f"train_{len(os.listdir(out_dir))}.mp4")
-    tmp_file = os.path.join(out_dir, f"train_{len(os.listdir(out_dir))}_tmp.mp4")
-    if end_time is None or end_time == '':
-        end_time = 'inf'
-    yt_dlp_cmd = [
-        'yt-dlp',
-        '--download-sections', f'*{start_time}-{end_time}',
-        stream_url,
-        '-o', tmp_file
-    ]
-    print(' '.join(yt_dlp_cmd))
-    try:
-        subprocess.run(yt_dlp_cmd, check=True, capture_output=True, text=True)
-    except subprocess.CalledProcessError as e:
-        print(f"Error occurred: {e}")
-        print(f"yt-dlp output: {e.output}")
-        return None
-    print(f"Downloaded {output_file}")
     if resize:  # resize and convert to 30 fps
         ffmpeg_cmd = [
                 'ffmpeg',
-                '-i', tmp_file,
                 '-c:v', 'libx264',
                 '-crf', '23',
                 '-r', '30',
                 '-maxrate', '2M',
                 '-bufsize', '4M',
                 '-vf', f"scale=-2:300",
                 '-c:a', 'aac',
                 '-b:a', '128k',
                 '-y',
@@ -44,8 +31,8 @@ def download_clips(stream_url, out_dir, start_time, end_time, resize=True):
             print(f"Error occurred: {e}")
             print(f"ffmpeg output: {e.output}")
             return None
-    else:
-        os.rename(tmp_file, output_file)
     print(f"Converted {output_file}")
     return output_file

 import os
 def download_clips(stream_url, out_dir, start_time, end_time, resize=True):
+    # remove all .mp4 files in out_dir to avoid confusion
+    if len(os.listdir(out_dir)) > 5:
+        for f in os.listdir(out_dir):
+            if f.endswith('.mp4'):
+                os.remove(os.path.join(out_dir, f))
     output_file = os.path.join(out_dir, f"train_{len(os.listdir(out_dir))}.mp4")
     if resize:  # resize and convert to 30 fps
         ffmpeg_cmd = [
                 'ffmpeg',
+                '-i', stream_url,
                 '-c:v', 'libx264',
                 '-crf', '23',
                 '-r', '30',
                 '-maxrate', '2M',
                 '-bufsize', '4M',
                 '-vf', f"scale=-2:300",
+                '-ss', start_time] + (['-to', end_time] if end_time != '' else []) + [
                 '-c:a', 'aac',
                 '-b:a', '128k',
                 '-y',
             print(f"Error occurred: {e}")
             print(f"ffmpeg output: {e.output}")
             return None
+    # else:
+    #     os.rename(tmp_file, output_file)
     print(f"Converted {output_file}")
     return output_file