jjoyce003's picture
Update app.py
cffe62c verified
raw
history blame contribute delete
814 Bytes
import gradio as gr
import torch
from transformers import AutoProcessor, AutoModel
model_name = "MERaLiON/MERaLiON-SpeechEncoder-v1"
processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
def encode_audio(audio):
# audio = (sample_rate, numpy array)
sr, data = audio
inputs = processor(
data,
sampling_rate=sr,
return_tensors="pt",
padding=True
)
with torch.no_grad():
embeddings = model(**inputs).last_hidden_state.mean(dim=1).squeeze().tolist()
return {
"embeddings": embeddings
}
demo = gr.Interface(
fn=encode_audio,
inputs=gr.Audio(type="numpy", label="Upload audio"),
outputs=gr.JSON(label="Embeddings")
)
demo.launch()