Spaces:
Sleeping
Sleeping
File size: 814 Bytes
c222f61 cffe62c c222f61 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | import gradio as gr
import torch
from transformers import AutoProcessor, AutoModel
model_name = "MERaLiON/MERaLiON-SpeechEncoder-v1"
processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
def encode_audio(audio):
# audio = (sample_rate, numpy array)
sr, data = audio
inputs = processor(
data,
sampling_rate=sr,
return_tensors="pt",
padding=True
)
with torch.no_grad():
embeddings = model(**inputs).last_hidden_state.mean(dim=1).squeeze().tolist()
return {
"embeddings": embeddings
}
demo = gr.Interface(
fn=encode_audio,
inputs=gr.Audio(type="numpy", label="Upload audio"),
outputs=gr.JSON(label="Embeddings")
)
demo.launch() |