Spaces:

LingoJr
/

emotion-recognition

Runtime error

App Files Files Community

LingoJr commited on Nov 18, 2025

Commit

2dad960

verified ·

1 Parent(s): a834bb6

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -16

app.py CHANGED Viewed

@@ -7,28 +7,52 @@ speech_classifier = pipeline("audio-classification", model="superb/wav2vec2-base
 text_tokenizer = AutoTokenizer.from_pretrained("tae898/emoberta-base")
 text_model = AutoModelForSequenceClassification.from_pretrained("tae898/emoberta-base")
-def predict_emotion(audio, text):
-    results = {}
-    if audio is not None:
-        waveform, sr = torchaudio.load(audio)
         preds = speech_classifier(waveform.squeeze().numpy(), sampling_rate=sr, top_k=3)
-        results["audio_emotion"] = preds[0]["label"]
-    if text is not None and text.strip() != "":
-        inputs = text_tokenizer(text, return_tensors="pt")
         with torch.no_grad():
             outputs = text_model(**inputs)
-        emotion = text_model.config.id2label[torch.argmax(outputs.logits)]
-        results["text_emotion"] = emotion
-    return results
-ui = gr.Interface(
-    fn=predict_emotion,
-    inputs=[gr.Audio(type="filepath"), gr.Textbox()],
     outputs="json",
-    title="Multimodal Emotion Recognition"
 )
-ui.launch()

 text_tokenizer = AutoTokenizer.from_pretrained("tae898/emoberta-base")
 text_model = AutoModelForSequenceClassification.from_pretrained("tae898/emoberta-base")
+def gradio_combined(audio_file, text):
+    # Case 1 — Audio provided
+    if audio_file is not None:
+        waveform, sr = torchaudio.load(audio_file)
         preds = speech_classifier(waveform.squeeze().numpy(), sampling_rate=sr, top_k=3)
+        return {
+            "Detected Emotion": preds[0]["label"],
+            "Top Predictions": {p["label"]: round(p["score"], 3) for p in preds},
+            "Source": "Audio"
+        }
+    # Case 2 — Text provided
+    if text.strip() != "":
+        inputs = text_tokenizer(text, return_tensors="pt", truncation=True)
         with torch.no_grad():
             outputs = text_model(**inputs)
+        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
+        label_id = torch.argmax(probs).item()
+        return {
+            "Detected Emotion": text_model.config.id2label[label_id],
+            "Top Predictions": {
+                text_model.config.id2label[i]: round(p, 3)
+                for i, p in enumerate(probs[0].tolist())
+            },
+            "Source": "Text"
+        }
+    return {"Error": "Please provide audio or text input."}
+# Building the UI
+gradio_ui = gr.Interface(
+    fn=gradio_combined,
+    inputs=[
+        gr.Audio(label="🎤 Upload or Record Speech", sources=["microphone", "upload"], type="filepath"),
+        gr.Textbox(label="💬 Enter Text Emotion", placeholder="Type something...")
+    ],
     outputs="json",
+    title="🎭 Multimodal Emotion Recognizer",
+    description="Use either speech or text — the model detects the emotion automatically!"
 )
+# Mount Gradio at /gradio
+app = gr.mount_gradio_app(app, gradio_ui, path="/gradio")
+gradio_ui.launch(share=True)