Spaces:
Runtime error
Runtime error
File size: 2,535 Bytes
20547d7 bcc3253 20547d7 87071e6 20547d7 0eb9991 20547d7 89b1173 20547d7 bcc3253 89b1173 20547d7 bcc3253 a2e856a bcc3253 20547d7 3e8e10c 20547d7 87071e6 20547d7 87071e6 0eb9991 20547d7 1e30c31 20547d7 87071e6 bcc3253 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import time
import threading
import os
import gradio as gr
import librosa
import numpy as np
# import soundfile as sf
TARGET_SAMPLE_RATE = 16_000
AUDIO_SECONDS_THRESHOLD = 2
#pipe = pipeline("audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593")
prediction = [{"score": 1, "label": "recording..."}]
from server_wrapper import process_chunk_from_bytes, init_model
def normalize_waveform(waveform, datatype=np.float32): # source datatype: np.int16
waveform = waveform.astype(dtype=datatype)
waveform /= 32768.0
return waveform
def streaming_recording_fn(stream, new_chunk):
global prediction
sr, y = new_chunk
y = normalize_waveform(y)
y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
if stream is not None:
if (stream.shape[-1] / TARGET_SAMPLE_RATE) >= AUDIO_SECONDS_THRESHOLD:
try:
prediction = "Hola"
#prediction = process_chunk_from_bytes((stream * 32768).astype(np.int16).tobytes())
except Exception as e:
print(f"process_chunk_from_bytes failed: {e}")
prediction = [{"score": 0.0, "label": "error"}]
os.makedirs('./audio', exist_ok=True)
file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
# # sf.write(file_name, stream, TARGET_SAMPLE_RATE)
print(f"SAVE AUDIO: {file_name}")
print(f">>>>>>1\t{y.shape=}, {stream.shape=}\n\t{prediction[0]=}")
stream = None
else:
stream = np.concatenate([stream, y], axis=-1)
print(f">>>>>>2\t{y.shape=}, {stream.shape=}")
else:
stream = y
print(f">>>>>>3\t{y.shape=}, {stream.shape=}")
return stream, {i['label']: i['score'] for i in prediction}
streaming_demo = gr.Interface(
fn=streaming_recording_fn,
inputs=["state", gr.Audio(sources=["microphone"], streaming=True)],
outputs=["state", "label"],
live=True,
)
with gr.Blocks() as demo:
gr.TabbedInterface([streaming_demo],
["Streaming"])
if __name__ == "__main__":
# Initialize the ASR model in a background thread to avoid blocking the UI startup.
try:
t = threading.Thread(target=init_model, daemon=True)
t.start()
print("Background model initialization started")
except Exception as e:
print(f"Failed to start background model init: {e}")
# Do not use `share=True` inside Spaces; launch normally.
demo.launch() |