rjzevallos commited on
Commit
89b1173
·
1 Parent(s): 20547d7

Fix: send 'FINISH' text over WebSocket on stop to match server

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -4,13 +4,15 @@ import gradio as gr
4
  import librosa
5
  import numpy as np
6
  # import soundfile as sf
7
- from transformers import pipeline
8
 
9
  TARGET_SAMPLE_RATE = 16_000
10
  AUDIO_SECONDS_THRESHOLD = 2
11
- pipe = pipeline("audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593")
12
  prediction = [{"score": 1, "label": "recording..."}]
13
 
 
 
 
14
 
15
  def normalize_waveform(waveform, datatype=np.float32): # source datatype: np.int16
16
  waveform = waveform.astype(dtype=datatype)
@@ -25,7 +27,7 @@ def streaming_recording_fn(stream, new_chunk):
25
  y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
26
  if stream is not None:
27
  if (stream.shape[-1] / TARGET_SAMPLE_RATE) >= AUDIO_SECONDS_THRESHOLD:
28
- prediction = pipe(stream)
29
  file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
30
  # # sf.write(file_name, stream, TARGET_SAMPLE_RATE)
31
  print(f"SAVE AUDIO: {file_name}")
 
4
  import librosa
5
  import numpy as np
6
  # import soundfile as sf
 
7
 
8
  TARGET_SAMPLE_RATE = 16_000
9
  AUDIO_SECONDS_THRESHOLD = 2
10
+ #pipe = pipeline("audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593")
11
  prediction = [{"score": 1, "label": "recording..."}]
12
 
13
+ from server_wrapper import process_chunk_from_bytes
14
+
15
+
16
 
17
  def normalize_waveform(waveform, datatype=np.float32): # source datatype: np.int16
18
  waveform = waveform.astype(dtype=datatype)
 
27
  y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
28
  if stream is not None:
29
  if (stream.shape[-1] / TARGET_SAMPLE_RATE) >= AUDIO_SECONDS_THRESHOLD:
30
+ prediction = process_chunk_from_bytes((stream * 32768).astype(np.int16).tobytes())
31
  file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
32
  # # sf.write(file_name, stream, TARGET_SAMPLE_RATE)
33
  print(f"SAVE AUDIO: {file_name}")