import gradio as gr import torch import numpy as np from transformers import AutoModelForAudioClassification, AutoFeatureExtractor MODEL_NAME = "your-username/your-model" model = AutoModelForAudioClassification.from_pretrained(MODEL_NAME) feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME) def predict(audio): waveform, sample_rate = audio inputs = feature_extractor( waveform, sampling_rate=sample_rate, return_tensors="pt" ) with torch.no_grad(): outputs = model(**inputs) probs = torch.nn.functional.softmax(outputs.logits, dim=-1) predicted_class = torch.argmax(probs, dim=-1).item() return f"Predicted Emotion: {model.config.id2label[predicted_class]}" interface = gr.Interface( fn=predict, inputs=gr.Audio(type="numpy"), outputs="text", title="🎤 Voice Emotion Classifier", description="Upload an audio file to detect emotion" ) interface.launch()