| import gradio as gr | |
| import torch | |
| import numpy as np | |
| from transformers import AutoModelForAudioClassification, AutoFeatureExtractor | |
| MODEL_NAME = "your-username/your-model" | |
| model = AutoModelForAudioClassification.from_pretrained(MODEL_NAME) | |
| feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME) | |
| def predict(audio): | |
| waveform, sample_rate = audio | |
| inputs = feature_extractor( | |
| waveform, | |
| sampling_rate=sample_rate, | |
| return_tensors="pt" | |
| ) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| probs = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
| predicted_class = torch.argmax(probs, dim=-1).item() | |
| return f"Predicted Emotion: {model.config.id2label[predicted_class]}" | |
| interface = gr.Interface( | |
| fn=predict, | |
| inputs=gr.Audio(type="numpy"), | |
| outputs="text", | |
| title="🎤 Voice Emotion Classifier", | |
| description="Upload an audio file to detect emotion" | |
| ) | |
| interface.launch() | |