Spaces:
Runtime error
Runtime error
File size: 1,583 Bytes
a834bb6 2de24ed a834bb6 2de24ed 50e2b80 291ce94 2de24ed 291ce94 2dad960 291ce94 2de24ed 291ce94 2de24ed 291ce94 2dad960 2de24ed 2dad960 2de24ed 2dad960 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torchaudio
import torch
speech_classifier = pipeline("audio-classification", model="superb/wav2vec2-base-superb-er")
text_tokenizer = AutoTokenizer.from_pretrained("tae898/emoberta-base")
text_model = AutoModelForSequenceClassification.from_pretrained("tae898/emoberta-base")
text_model.gradient_checkpointing_enable()
def predict_emotion(audio, text):
results = {}
if audio is not None:
waveform, sr = torchaudio.load(audio)
preds = speech_classifier(waveform.squeeze().numpy(), sampling_rate=sr, top_k=3)
results["audio_emotion"] = preds[0]["label"]
if text is not None and text.strip() != "":
inputs = text_tokenizer(text, return_tensors="pt")
with torch.no_grad():
outputs = text_model(**inputs)
emotion = text_model.config.id2label[torch.argmax(outputs.logits)]
results["text_emotion"] = emotion
return results
# Building the UI
gradio_ui = gr.Interface(
fn=gradio_combined,
inputs=[
gr.Audio(label="π€ Upload or Record Speech", sources=["microphone", "upload"], type="filepath"),
gr.Textbox(label="π¬ Enter Text Emotion", placeholder="Type something...")
],
outputs="json",
title="π Multimodal Emotion Recognizer",
description="Use either speech or text β the model detects the emotion automatically!"
)
# Mount Gradio at /gradio
app = gr.mount_gradio_app(app, gradio_ui, path="/gradio")
gradio_ui.launch(share=True)
|