dylan-plummer commited on
Commit
2777f96
·
1 Parent(s): 3769484

updates after trials

Browse files
Files changed (2) hide show
  1. app.py +46 -32
  2. hls_download.py +9 -22
app.py CHANGED
@@ -28,31 +28,26 @@ from hls_download import download_clips
28
 
29
  plt.style.use('dark_background')
30
 
 
31
  IMG_SIZE = 256
 
 
32
 
33
  onnx_file = hf_hub_download(repo_id="dylanplummer/ropenet", filename="nextjump.onnx", repo_type="model", token=os.environ['DATASET_SECRET'])
34
  if torch.cuda.is_available():
 
35
  providers = [("CUDAExecutionProvider", {"device_id": torch.cuda.current_device(),
36
  "user_compute_stream": str(torch.cuda.current_stream().cuda_stream)})]
37
  sess_options = ort.SessionOptions()
 
38
  ort_sess = ort.InferenceSession(onnx_file, sess_options=sess_options, providers=providers)
39
  else:
 
40
  ort_sess = ort.InferenceSession(onnx_file)
41
 
42
  # warmup inference
43
  ort_sess.run(None, {'video': np.zeros((4, 64, 3, IMG_SIZE, IMG_SIZE), dtype=np.float32)})
44
 
45
-
46
-
47
- class SquarePad:
48
- # https://discuss.pytorch.org/t/how-to-resize-and-pad-in-a-torchvision-transforms-compose/71850/9
49
- def __call__(self, image):
50
- w, h = image.size
51
- max_wh = max(w, h)
52
- hp = int((max_wh - w) / 2)
53
- vp = int((max_wh - h) / 2)
54
- padding = (hp, vp, hp, vp)
55
- return F.pad(image, padding, 0, 'constant')
56
 
57
  def square_pad_opencv(image):
58
  h, w = image.shape[:2]
@@ -71,6 +66,7 @@ def preprocess_image(img, img_size):
71
  preprocess = transforms.Compose(transforms_list)
72
  return preprocess(img).unsqueeze(0)
73
 
 
74
  def run_inference(batch_X):
75
  batch_X = torch.cat(batch_X)
76
  return ort_sess.run(None, {'video': batch_X.numpy()})
@@ -80,11 +76,15 @@ def sigmoid(x):
80
  return 1 / (1 + np.exp(-x))
81
 
82
 
83
- def detect_beeps(video_path, event_length=30):
84
  reference_file = 'beep.WAV'
85
  fs, beep = wavfile.read(reference_file)
86
  beep = beep[:, 0] + beep[:, 1] # combine stereo to mono
87
  video = cv2.VideoCapture(video_path)
 
 
 
 
88
  audio_convert_command = f'ffmpeg -i {video_path} -vn -acodec pcm_s16le -ar {fs} -ac 2 temp.wav'
89
  subprocess.call(audio_convert_command, shell=True)
90
  length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
@@ -94,11 +94,6 @@ def detect_beeps(video_path, event_length=30):
94
  corr = correlate(audio, beep, mode='same') / audio.size
95
  # min max scale to -1, 1
96
  corr = 2 * (corr - np.min(corr)) / (np.max(corr) - np.min(corr)) - 1
97
-
98
- # top_q = np.max(corr) - 0.1
99
- # mean = np.mean(corr)
100
- # print(top_q, mean)
101
- beep_height = 0.8
102
  event_start = length
103
  while length - event_start < fps * event_length:
104
  peaks, _ = find_peaks(corr, height=beep_height, distance=fs)
@@ -123,12 +118,12 @@ def detect_beeps(video_path, event_length=30):
123
 
124
 
125
  def inference(stream_url, start_time, end_time, beep_detection_on, event_length, count_only_api, api_key,
126
- img_size=256, seq_len=64, stride_length=32, stride_pad=3, batch_size=4, resize_factor=0.5, min_size=300, force_30_fps=True,
127
- miss_threshold=0.8, marks_threshold=0.5, median_pred_filter=True, center_crop=True, both_feet=True,
128
  api_call=False,
129
  progress=gr.Progress()):
130
  progress(0, desc="Downloading clip...")
131
- in_video = download_clips(stream_url, os.getcwd(), start_time, end_time)
132
  progress(0, desc="Running inference...")
133
  has_access = False
134
  if api_call:
@@ -150,7 +145,6 @@ def inference(stream_url, start_time, end_time, beep_detection_on, event_length,
150
  seconds = length / fps
151
  all_frames = []
152
  frame_i = 0
153
- #resize_size = int(max(frame_width, frame_height) * 0.4)
154
  resize_amount = max((img_size + 64) / frame_width, (img_size + 64) / frame_height)
155
  while cap.isOpened():
156
  frame_i += 1
@@ -159,8 +153,6 @@ def inference(stream_url, start_time, end_time, beep_detection_on, event_length,
159
  if ret is False:
160
  frame = all_frames[-1] # padding will be with last frame
161
  break
162
- # if force_30_fps and fps != 30 and frame_i % 4 != 0:
163
- # continue
164
 
165
  frame = cv2.cvtColor(np.uint8(frame), cv2.COLOR_BGR2RGB)
166
  # add square padding with opencv
@@ -189,7 +181,7 @@ def inference(stream_url, start_time, end_time, beep_detection_on, event_length,
189
  batch_list = []
190
  idx_list = []
191
  inference_futures = []
192
- with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
193
  for i in range(0, length + stride_length - stride_pad, stride_length):
194
  batch = all_frames[i:i + seq_len]
195
  Xlist = []
@@ -237,6 +229,7 @@ def inference(stream_url, start_time, end_time, beep_detection_on, event_length,
237
  event_type_logits[idx:idx+seq_len] += event_type
238
  period_length_overlaps[idx:idx+seq_len] += 1
239
  event_type_logit_overlaps[idx:idx+seq_len] += 1
 
240
 
241
  periodLength = np.divide(period_lengths, period_length_overlaps, where=period_length_overlaps!=0)[:length]
242
  periodicity = np.divide(periodicities, period_length_overlaps, where=period_length_overlaps!=0)[:length]
@@ -286,7 +279,11 @@ def inference(stream_url, start_time, end_time, beep_detection_on, event_length,
286
  marks_count_pred = marks_count_pred / 2
287
  count = np.array(count) / 2
288
  try:
289
- confidence = (np.mean(periodicity[periodicity > miss_threshold]) - miss_threshold) / (1 - miss_threshold)
 
 
 
 
290
  except ZeroDivisionError:
291
  confidence = 0
292
  self_err = abs(count_pred - marks_count_pred)
@@ -299,15 +296,22 @@ def inference(stream_url, start_time, end_time, beep_detection_on, event_length,
299
  if both_feet:
300
  count_msg = f"## Reps Count (both feet): {count_pred:.1f}, Marks Count (both feet): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}"
301
  else:
302
- count_msg = f"## Predicted Count (one foot): {count_pred:.1f}, Marks Count (one foot): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}"
303
 
304
  if api_call:
 
 
 
 
 
 
 
305
  if count_only_api:
306
  return f"{count_pred:.2f} (conf: {total_confidence:.2f})"
307
  else:
308
- return np.array2string(periodLength, formatter={'float_kind':lambda x: "%.2f" % x}).replace('\n', ''), \
309
- np.array2string(periodicity, formatter={'float_kind':lambda x: "%.2f" % x}).replace('\n', ''), \
310
- np.array2string(full_marks, formatter={'float_kind':lambda x: "%.2f" % x}).replace('\n', ''), \
311
  f"reps: {count_pred:.2f}, marks: {marks_count_pred:.1f}, confidence: {total_confidence:.2f}", \
312
  f"single_rope_speed: {event_type_probs[0]:.3f}, double_dutch: {event_type_probs[1]:.3f}, double_unders: {event_type_probs[2]:.3f}, single_bounce: {event_type_probs[3]:.3f}"
313
 
@@ -393,7 +397,11 @@ def inference(stream_url, start_time, end_time, beep_detection_on, event_length,
393
  title="Event Type Distribution",
394
  labels={'x': 'event type', 'y': 'probability'},
395
  range_y=[0, 1])
396
- os.remove('temp.wav')
 
 
 
 
397
  return in_video, count_msg, fig, hist, bar
398
 
399
 
@@ -450,5 +458,11 @@ with gr.Blocks() as demo:
450
 
451
 
452
  if __name__ == "__main__":
453
-
454
- demo.queue(api_open=True, max_size=15).launch(share=False)
 
 
 
 
 
 
 
28
 
29
  plt.style.use('dark_background')
30
 
31
+ LOCAL = False
32
  IMG_SIZE = 256
33
+ CACHE_API_CALLS = True
34
+ os.makedirs(os.path.join(os.getcwd(), 'clips'), exist_ok=True)
35
 
36
  onnx_file = hf_hub_download(repo_id="dylanplummer/ropenet", filename="nextjump.onnx", repo_type="model", token=os.environ['DATASET_SECRET'])
37
  if torch.cuda.is_available():
38
+ print("Using CUDA")
39
  providers = [("CUDAExecutionProvider", {"device_id": torch.cuda.current_device(),
40
  "user_compute_stream": str(torch.cuda.current_stream().cuda_stream)})]
41
  sess_options = ort.SessionOptions()
42
+ #sess_options.log_severity_level = 0
43
  ort_sess = ort.InferenceSession(onnx_file, sess_options=sess_options, providers=providers)
44
  else:
45
+ print("Using CPU")
46
  ort_sess = ort.InferenceSession(onnx_file)
47
 
48
  # warmup inference
49
  ort_sess.run(None, {'video': np.zeros((4, 64, 3, IMG_SIZE, IMG_SIZE), dtype=np.float32)})
50
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  def square_pad_opencv(image):
53
  h, w = image.shape[:2]
 
66
  preprocess = transforms.Compose(transforms_list)
67
  return preprocess(img).unsqueeze(0)
68
 
69
+
70
  def run_inference(batch_X):
71
  batch_X = torch.cat(batch_X)
72
  return ort_sess.run(None, {'video': batch_X.numpy()})
 
76
  return 1 / (1 + np.exp(-x))
77
 
78
 
79
+ def detect_beeps(video_path, event_length=30, beep_height=0.8):
80
  reference_file = 'beep.WAV'
81
  fs, beep = wavfile.read(reference_file)
82
  beep = beep[:, 0] + beep[:, 1] # combine stereo to mono
83
  video = cv2.VideoCapture(video_path)
84
+ try:
85
+ os.remove('temp.wav')
86
+ except FileNotFoundError:
87
+ pass
88
  audio_convert_command = f'ffmpeg -i {video_path} -vn -acodec pcm_s16le -ar {fs} -ac 2 temp.wav'
89
  subprocess.call(audio_convert_command, shell=True)
90
  length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 
94
  corr = correlate(audio, beep, mode='same') / audio.size
95
  # min max scale to -1, 1
96
  corr = 2 * (corr - np.min(corr)) / (np.max(corr) - np.min(corr)) - 1
 
 
 
 
 
97
  event_start = length
98
  while length - event_start < fps * event_length:
99
  peaks, _ = find_peaks(corr, height=beep_height, distance=fs)
 
118
 
119
 
120
  def inference(stream_url, start_time, end_time, beep_detection_on, event_length, count_only_api, api_key,
121
+ img_size=256, seq_len=64, stride_length=32, stride_pad=3, batch_size=4,
122
+ miss_threshold=0.8, marks_threshold=0.5, median_pred_filter=True, both_feet=True,
123
  api_call=False,
124
  progress=gr.Progress()):
125
  progress(0, desc="Downloading clip...")
126
+ in_video = download_clips(stream_url, os.path.join(os.getcwd(), 'clips'), start_time, end_time)
127
  progress(0, desc="Running inference...")
128
  has_access = False
129
  if api_call:
 
145
  seconds = length / fps
146
  all_frames = []
147
  frame_i = 0
 
148
  resize_amount = max((img_size + 64) / frame_width, (img_size + 64) / frame_height)
149
  while cap.isOpened():
150
  frame_i += 1
 
153
  if ret is False:
154
  frame = all_frames[-1] # padding will be with last frame
155
  break
 
 
156
 
157
  frame = cv2.cvtColor(np.uint8(frame), cv2.COLOR_BGR2RGB)
158
  # add square padding with opencv
 
181
  batch_list = []
182
  idx_list = []
183
  inference_futures = []
184
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
185
  for i in range(0, length + stride_length - stride_pad, stride_length):
186
  batch = all_frames[i:i + seq_len]
187
  Xlist = []
 
229
  event_type_logits[idx:idx+seq_len] += event_type
230
  period_length_overlaps[idx:idx+seq_len] += 1
231
  event_type_logit_overlaps[idx:idx+seq_len] += 1
232
+ del y1_out, y2_out, y3_out, y4_out # free up memory
233
 
234
  periodLength = np.divide(period_lengths, period_length_overlaps, where=period_length_overlaps!=0)[:length]
235
  periodicity = np.divide(periodicities, period_length_overlaps, where=period_length_overlaps!=0)[:length]
 
279
  marks_count_pred = marks_count_pred / 2
280
  count = np.array(count) / 2
281
  try:
282
+ periodicity_mask = periodicity > miss_threshold
283
+ if np.sum(periodicity_mask) == 0:
284
+ confidence = 0
285
+ else:
286
+ confidence = (np.mean(periodicity[periodicity > miss_threshold]) - miss_threshold) / (1 - miss_threshold)
287
  except ZeroDivisionError:
288
  confidence = 0
289
  self_err = abs(count_pred - marks_count_pred)
 
296
  if both_feet:
297
  count_msg = f"## Reps Count (both feet): {count_pred:.1f}, Marks Count (both feet): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}"
298
  else:
299
+ count_msg = f"## Reps Count (one foot): {count_pred:.1f}, Marks Count (one foot): {marks_count_pred:.1f}, Confidence: {total_confidence:.2f}"
300
 
301
  if api_call:
302
+ if CACHE_API_CALLS:
303
+ # write outputs as row of csv
304
+ with open('api_calls.tsv', 'a') as f:
305
+ periodicity_str = np.array2string(periodicity, formatter={'float_kind':lambda x: "%.2f" % x}, threshold=np.inf).replace('\n', '')
306
+ periodLength_str = np.array2string(periodLength, formatter={'float_kind':lambda x: "%.2f" % x}, threshold=np.inf).replace('\n', '')
307
+ full_marks_str = np.array2string(full_marks, formatter={'float_kind':lambda x: "%.2f" % x}, threshold=np.inf).replace('\n', '')
308
+ f.write(f"{stream_url}\t{start_time}\t{end_time}\t{beep_detection_on}\t{event_length}\t{periodicity_str}\t{periodLength_str}\t{full_marks_str}\t{count_pred}\t{total_confidence}\n")
309
  if count_only_api:
310
  return f"{count_pred:.2f} (conf: {total_confidence:.2f})"
311
  else:
312
+ return np.array2string(periodLength, formatter={'float_kind':lambda x: "%.2f" % x}, threshold=np.inf).replace('\n', ''), \
313
+ np.array2string(periodicity, formatter={'float_kind':lambda x: "%.2f" % x}, threshold=np.inf).replace('\n', ''), \
314
+ np.array2string(full_marks, formatter={'float_kind':lambda x: "%.2f" % x}, threshold=np.inf).replace('\n', ''), \
315
  f"reps: {count_pred:.2f}, marks: {marks_count_pred:.1f}, confidence: {total_confidence:.2f}", \
316
  f"single_rope_speed: {event_type_probs[0]:.3f}, double_dutch: {event_type_probs[1]:.3f}, double_unders: {event_type_probs[2]:.3f}, single_bounce: {event_type_probs[3]:.3f}"
317
 
 
397
  title="Event Type Distribution",
398
  labels={'x': 'event type', 'y': 'probability'},
399
  range_y=[0, 1])
400
+ try:
401
+ os.remove('temp.wav')
402
+ except FileNotFoundError:
403
+ pass
404
+
405
  return in_video, count_msg, fig, hist, bar
406
 
407
 
 
458
 
459
 
460
  if __name__ == "__main__":
461
+ if LOCAL:
462
+ demo.queue(api_open=True, max_size=15).launch(server_name="0.0.0.0",
463
+ server_port=7860,
464
+ debug=False,
465
+ ssl_verify=False,
466
+ share=False)
467
+ else:
468
+ demo.queue(api_open=True, max_size=15).launch(share=False)
hls_download.py CHANGED
@@ -2,36 +2,23 @@ import subprocess
2
  import os
3
 
4
  def download_clips(stream_url, out_dir, start_time, end_time, resize=True):
 
 
 
 
 
5
  output_file = os.path.join(out_dir, f"train_{len(os.listdir(out_dir))}.mp4")
6
- tmp_file = os.path.join(out_dir, f"train_{len(os.listdir(out_dir))}_tmp.mp4")
7
- if end_time is None or end_time == '':
8
- end_time = 'inf'
9
- yt_dlp_cmd = [
10
- 'yt-dlp',
11
- '--download-sections', f'*{start_time}-{end_time}',
12
- stream_url,
13
- '-o', tmp_file
14
- ]
15
-
16
- print(' '.join(yt_dlp_cmd))
17
-
18
- try:
19
- subprocess.run(yt_dlp_cmd, check=True, capture_output=True, text=True)
20
- except subprocess.CalledProcessError as e:
21
- print(f"Error occurred: {e}")
22
- print(f"yt-dlp output: {e.output}")
23
- return None
24
- print(f"Downloaded {output_file}")
25
  if resize: # resize and convert to 30 fps
26
  ffmpeg_cmd = [
27
  'ffmpeg',
28
- '-i', tmp_file,
29
  '-c:v', 'libx264',
30
  '-crf', '23',
31
  '-r', '30',
32
  '-maxrate', '2M',
33
  '-bufsize', '4M',
34
  '-vf', f"scale=-2:300",
 
35
  '-c:a', 'aac',
36
  '-b:a', '128k',
37
  '-y',
@@ -44,8 +31,8 @@ def download_clips(stream_url, out_dir, start_time, end_time, resize=True):
44
  print(f"Error occurred: {e}")
45
  print(f"ffmpeg output: {e.output}")
46
  return None
47
- else:
48
- os.rename(tmp_file, output_file)
49
  print(f"Converted {output_file}")
50
 
51
  return output_file
 
2
  import os
3
 
4
  def download_clips(stream_url, out_dir, start_time, end_time, resize=True):
5
+ # remove all .mp4 files in out_dir to avoid confusion
6
+ if len(os.listdir(out_dir)) > 5:
7
+ for f in os.listdir(out_dir):
8
+ if f.endswith('.mp4'):
9
+ os.remove(os.path.join(out_dir, f))
10
  output_file = os.path.join(out_dir, f"train_{len(os.listdir(out_dir))}.mp4")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  if resize: # resize and convert to 30 fps
12
  ffmpeg_cmd = [
13
  'ffmpeg',
14
+ '-i', stream_url,
15
  '-c:v', 'libx264',
16
  '-crf', '23',
17
  '-r', '30',
18
  '-maxrate', '2M',
19
  '-bufsize', '4M',
20
  '-vf', f"scale=-2:300",
21
+ '-ss', start_time] + (['-to', end_time] if end_time != '' else []) + [
22
  '-c:a', 'aac',
23
  '-b:a', '128k',
24
  '-y',
 
31
  print(f"Error occurred: {e}")
32
  print(f"ffmpeg output: {e.output}")
33
  return None
34
+ # else:
35
+ # os.rename(tmp_file, output_file)
36
  print(f"Converted {output_file}")
37
 
38
  return output_file