Single-Rope-Contest

Running

App Files Files Community

dylanplummer commited on Jan 28, 2025

Commit

8963f04

1 Parent(s): 1431cde

add beep detection

Browse files

Files changed (2) hide show

app.py +57 -11
beep.WAV +0 -0

app.py CHANGED Viewed

@@ -4,11 +4,13 @@ from PIL import Image
 import os
 import cv2
 import math
 import matplotlib
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import concurrent.futures
-from scipy.signal import medfilt, find_peaks
 from functools import partial
 from passlib.hash import pbkdf2_sha256
 from tqdm import tqdm
@@ -78,7 +80,37 @@ def sigmoid(x):
     return 1 / (1 + np.exp(-x))
-def inference(stream_url, start_time, end_time, count_only_api, api_key,
               img_size=256, seq_len=64, stride_length=32, stride_pad=3, batch_size=4,
               miss_threshold=0.8, marks_threshold=0.5, median_pred_filter=True, center_crop=True, both_feet=True,
               api_call=False,
@@ -91,7 +123,10 @@ def inference(stream_url, start_time, end_time, count_only_api, api_key,
         has_access = pbkdf2_sha256.verify(os.environ['DEV_API_TOKEN'], api_key)
         if not has_access:
             return "Invalid API Key"
     cap = cv2.VideoCapture(in_video)
     length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
@@ -133,7 +168,7 @@ def inference(stream_url, start_time, end_time, count_only_api, api_key,
     idx_list = []
     inference_futures = []
     with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
-        for i in tqdm(range(0, length + stride_length - stride_pad, stride_length)):
             batch = all_frames[i:i + seq_len]
             Xlist = []
             preprocess_tasks = [(idx, executor.submit(preprocess_image, img, img_size)) for idx, img in enumerate(batch)]
@@ -163,7 +198,7 @@ def inference(stream_url, start_time, end_time, count_only_api, api_key,
             inference_futures.append((batch_list, idx_list, future))
         # Collect and process the inference results
-        for batch_list, idx_list, future in inference_futures:
             outputs = future.result()
             y1_out = outputs[0]
             y2_out = outputs[1]
@@ -195,6 +230,12 @@ def inference(stream_url, start_time, end_time, count_only_api, api_key,
         periodLength = medfilt(periodLength, 5)
     periodicity = sigmoid(periodicity)
     full_marks = sigmoid(full_marks)
     pred_marks_peaks, _ = find_peaks(full_marks, distance=3, height=marks_threshold)
     full_marks_mask = np.zeros(len(full_marks))
     full_marks_mask[pred_marks_peaks] = 1
@@ -325,7 +366,7 @@ def inference(stream_url, start_time, end_time, count_only_api, api_key,
                  title="Event Type Distribution",
                  labels={'x': 'event type', 'y': 'probability'},
                  range_y=[0, 1])
     return in_video, count_msg, fig, hist, bar
@@ -333,9 +374,11 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
             in_stream_url = gr.Textbox(label="Stream URL", elem_id='stream-url', visible=True)
-        with gr.Column():
             in_stream_start = gr.Textbox(label="Start Time", elem_id='stream-start', visible=True)
             in_stream_end = gr.Textbox(label="End Time", elem_id='stream-end', visible=True)
         with gr.Column(min_width=480):
             out_video = gr.PlayableVideo(label="Video Clip", elem_id='output-video', format='mp4', width=400, height=400)
@@ -362,14 +405,17 @@ with gr.Blocks() as demo:
     demo_inference = partial(inference, count_only_api=False, api_key=None)
-    run_button.click(demo_inference, [in_stream_url, in_stream_start, in_stream_end], outputs=[out_video, out_text, out_plot, out_hist, out_event_type_dist])
     api_inference = partial(inference, api_call=True)
-    api_dummy_button.click(api_inference, [in_stream_url, in_stream_start, in_stream_end, count_only, api_token], outputs=[period_length], api_name='inference')
     examples = [
-        ['https://hiemdall-dev2.azurewebsites.net/api/playlist/rec_rd2FAyUo/vod', '00:43:10', '00:43:40'],
     ]
     gr.Examples(examples,
-                inputs=[in_stream_url, in_stream_start, in_stream_end],
                 outputs=[out_video, out_text, out_plot, out_hist, out_event_type_dist],
                 fn=demo_inference, cache_examples=os.getenv('SYSTEM') == 'spaces')

 import os
 import cv2
 import math
+import subprocess
 import matplotlib
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import concurrent.futures
+from scipy.io import wavfile
+from scipy.signal import medfilt, correlate, find_peaks
 from functools import partial
 from passlib.hash import pbkdf2_sha256
 from tqdm import tqdm
     return 1 / (1 + np.exp(-x))
+def detect_beeps(video_path, event_length=30):
+    reference_file = 'beep.WAV'
+    fs, beep = wavfile.read(reference_file)
+    beep = beep[:, 0] + beep[:, 1]  # combine stereo to mono
+    video = cv2.VideoCapture(video_path)
+    audio_convert_command = f'ffmpeg -i {video_path} -vn -acodec pcm_s16le -ar {fs} -ac 2 temp.wav'
+    subprocess.call(audio_convert_command, shell=True)
+    length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps = int(video.get(cv2.CAP_PROP_FPS))
+    audio = wavfile.read('temp.wav')[1]
+    audio = (audio[:, 0] + audio[:, 1]) / 2  # combine stereo to mono
+    corr = correlate(audio, beep, mode='same') / audio.size
+    # min max scale to -1, 1
+    corr = 2 * (corr - np.min(corr)) / (np.max(corr) - np.min(corr)) - 1
+    # top_q = np.max(corr) - 0.1
+    # mean = np.mean(corr)
+    # print(top_q, mean)
+    peaks, _ = find_peaks(corr, height=0.7, distance=fs)
+    event_start = int(peaks[0] / fs * fps)
+    event_end = int(peaks[-1] / fs * fps)
+    # plt.plot(corr)
+    # plt.plot(peaks, corr[peaks], "x")
+    # plt.savefig('beep.png')
+    # plt.close()
+    return event_start, event_end
+def inference(stream_url, start_time, end_time, beep_detection_on, event_length, count_only_api, api_key,
               img_size=256, seq_len=64, stride_length=32, stride_pad=3, batch_size=4,
               miss_threshold=0.8, marks_threshold=0.5, median_pred_filter=True, center_crop=True, both_feet=True,
               api_call=False,
         has_access = pbkdf2_sha256.verify(os.environ['DEV_API_TOKEN'], api_key)
         if not has_access:
             return "Invalid API Key"
+    if beep_detection_on:
+        event_start, event_end = detect_beeps(in_video, event_length)
+    print(event_start, event_end)
+    event_length = int(event_length)
     cap = cv2.VideoCapture(in_video)
     length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     idx_list = []
     inference_futures = []
     with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
+        for i in range(0, length + stride_length - stride_pad, stride_length):
             batch = all_frames[i:i + seq_len]
             Xlist = []
             preprocess_tasks = [(idx, executor.submit(preprocess_image, img, img_size)) for idx, img in enumerate(batch)]
             inference_futures.append((batch_list, idx_list, future))
         # Collect and process the inference results
+        for batch_list, idx_list, future in tqdm(inference_futures):
             outputs = future.result()
             y1_out = outputs[0]
             y2_out = outputs[1]
         periodLength = medfilt(periodLength, 5)
     periodicity = sigmoid(periodicity)
     full_marks = sigmoid(full_marks)
+    # if the event_start and event_end (in frames) are detected and form a valid event of event_length (in seconds)
+    if beep_detection_on:
+        if event_start > 0 and event_end > 0 and (event_end - event_start) - (event_length * fps) < 0.5:
+            print(f"Event detected: {event_start} - {event_end}")
+            periodicity[:event_start] = 0
+            periodicity[event_end:] = 0
     pred_marks_peaks, _ = find_peaks(full_marks, distance=3, height=marks_threshold)
     full_marks_mask = np.zeros(len(full_marks))
     full_marks_mask[pred_marks_peaks] = 1
                  title="Event Type Distribution",
                  labels={'x': 'event type', 'y': 'probability'},
                  range_y=[0, 1])
+    os.remove('temp.wav')
     return in_video, count_msg, fig, hist, bar
     with gr.Row():
         with gr.Column():
             in_stream_url = gr.Textbox(label="Stream URL", elem_id='stream-url', visible=True)
             in_stream_start = gr.Textbox(label="Start Time", elem_id='stream-start', visible=True)
             in_stream_end = gr.Textbox(label="End Time", elem_id='stream-end', visible=True)
+        with gr.Column():
+            beep_detection_on = gr.Checkbox(label="Detect Beeps", elem_id='detect-beeps', visible=True)
+            event_length = gr.Textbox(label="Expected Event Length (s)", elem_id='event-length', visible=True)
         with gr.Column(min_width=480):
             out_video = gr.PlayableVideo(label="Video Clip", elem_id='output-video', format='mp4', width=400, height=400)
     demo_inference = partial(inference, count_only_api=False, api_key=None)
+    run_button.click(demo_inference, [in_stream_url, in_stream_start, in_stream_end, beep_detection_on, event_length],
+                     outputs=[out_video, out_text, out_plot, out_hist, out_event_type_dist])
     api_inference = partial(inference, api_call=True)
+    api_dummy_button.click(api_inference, [in_stream_url, in_stream_start, in_stream_end, beep_detection_on, event_length, count_only, api_token],
+                           outputs=[period_length], api_name='inference')
     examples = [
+        ['https://hiemdall-dev2.azurewebsites.net/api/playlist/rec_rd2FAyUo/vod', '00:43:10', '00:43:45', True, 30],
+        ['https://hiemdall-dev2.azurewebsites.net/api/playlist/rec_UGEhqlMh/vod', '00:00:18', '00:00:55', True, 30]
     ]
     gr.Examples(examples,
+                inputs=[in_stream_url, in_stream_start, in_stream_end, beep_detection_on, event_length],
                 outputs=[out_video, out_text, out_plot, out_hist, out_event_type_dist],
                 fn=demo_inference, cache_examples=os.getenv('SYSTEM') == 'spaces')

beep.WAV ADDED Viewed

Binary file (70.7 kB). View file