Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
import torchaudio
|
| 4 |
-
from transformers import pipeline, AutoModel
|
| 5 |
import librosa
|
| 6 |
import numpy as np
|
| 7 |
import re
|
| 8 |
import warnings
|
| 9 |
import os
|
| 10 |
-
import
|
|
|
|
| 11 |
warnings.filterwarnings('ignore')
|
| 12 |
|
| 13 |
print("🚀 Starting Enhanced Hindi Speech Emotion Analysis App...")
|
|
@@ -17,13 +18,16 @@ print("🚀 Starting Enhanced Hindi Speech Emotion Analysis App...")
|
|
| 17 |
# ============================================
|
| 18 |
|
| 19 |
SENTIMENT_PIPELINE = None
|
|
|
|
|
|
|
|
|
|
| 20 |
ASR_MODEL = None
|
| 21 |
|
| 22 |
def load_models():
|
| 23 |
"""Load all models once at startup and cache them globally"""
|
| 24 |
-
global SENTIMENT_PIPELINE, ASR_MODEL
|
| 25 |
|
| 26 |
-
if SENTIMENT_PIPELINE is not None and ASR_MODEL is not None:
|
| 27 |
print("✅ Models already loaded, skipping...")
|
| 28 |
return
|
| 29 |
|
|
@@ -40,6 +44,21 @@ def load_models():
|
|
| 40 |
print(f"❌ Error loading sentiment model: {e}")
|
| 41 |
raise
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
print("🎤 Loading Indic Conformer 600M ASR model...")
|
| 44 |
try:
|
| 45 |
ASR_MODEL = AutoModel.from_pretrained(
|
|
@@ -56,107 +75,45 @@ def load_models():
|
|
| 56 |
load_models()
|
| 57 |
|
| 58 |
# ============================================
|
| 59 |
-
# 2. EMOTION
|
| 60 |
# ============================================
|
| 61 |
|
| 62 |
-
|
| 63 |
-
""
|
| 64 |
-
|
| 65 |
-
""
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
#
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
high_energy = prosodic_features['energy_mean'] > 0.12
|
| 99 |
-
high_pitch_var = prosodic_features['pitch_std'] > 40
|
| 100 |
-
low_energy = prosodic_features['energy_mean'] < 0.03
|
| 101 |
-
calm_pitch = prosodic_features['pitch_std'] < 15
|
| 102 |
-
|
| 103 |
-
# Determine emotion
|
| 104 |
-
if is_crisis:
|
| 105 |
-
emotion = "fear"
|
| 106 |
-
secondary_emotion = "distress"
|
| 107 |
-
confidence = max(0.85, max_score)
|
| 108 |
-
elif is_mixed:
|
| 109 |
-
if len(detected_emotions) >= 2:
|
| 110 |
-
emotion = detected_emotions[0]
|
| 111 |
-
secondary_emotion = detected_emotions[1]
|
| 112 |
-
elif detected_emotions:
|
| 113 |
-
emotion = detected_emotions[0]
|
| 114 |
-
secondary_emotion = "neutral"
|
| 115 |
-
else:
|
| 116 |
-
emotion = "mixed"
|
| 117 |
-
secondary_emotion = None
|
| 118 |
-
confidence = sentiment_scores['Neutral']
|
| 119 |
-
elif detected_emotions:
|
| 120 |
-
# Use detected emotions
|
| 121 |
-
emotion = detected_emotions[0]
|
| 122 |
-
secondary_emotion = detected_emotions[1] if len(detected_emotions) > 1 else None
|
| 123 |
-
confidence = max_score
|
| 124 |
-
else:
|
| 125 |
-
# Map based on sentiment + prosody
|
| 126 |
-
secondary_emotion = None
|
| 127 |
-
if dominant_sentiment == 'Positive':
|
| 128 |
-
if high_energy and high_pitch_var:
|
| 129 |
-
emotion = "joy"
|
| 130 |
-
secondary_emotion = "excitement"
|
| 131 |
-
elif 'प्यार' in text_lower or 'love' in text_lower:
|
| 132 |
-
emotion = "love"
|
| 133 |
-
else:
|
| 134 |
-
emotion = "happiness"
|
| 135 |
-
confidence = max_score
|
| 136 |
-
|
| 137 |
-
elif dominant_sentiment == 'Negative':
|
| 138 |
-
if is_crisis or 'डर' in text_lower or 'fear' in text_lower:
|
| 139 |
-
emotion = "fear"
|
| 140 |
-
elif 'गुस्सा' in text_lower or 'angry' in text_lower:
|
| 141 |
-
emotion = "anger"
|
| 142 |
-
elif 'दुख' in text_lower or 'sad' in text_lower or 'रो' in text_lower:
|
| 143 |
-
emotion = "sadness"
|
| 144 |
-
elif 'चिंता' in text_lower or 'worry' in text_lower:
|
| 145 |
-
emotion = "anxiety"
|
| 146 |
-
else:
|
| 147 |
-
emotion = "sadness"
|
| 148 |
-
confidence = max_score
|
| 149 |
-
|
| 150 |
-
else: # Neutral
|
| 151 |
-
if calm_pitch and low_energy:
|
| 152 |
-
emotion = "calm"
|
| 153 |
-
elif 'समझ नहीं' in text_lower or 'confus' in text_lower:
|
| 154 |
-
emotion = "confusion"
|
| 155 |
-
else:
|
| 156 |
-
emotion = "neutral"
|
| 157 |
-
confidence = max_score
|
| 158 |
-
|
| 159 |
-
return emotion, secondary_emotion, confidence
|
| 160 |
|
| 161 |
# ============================================
|
| 162 |
# 3. AUDIO PREPROCESSING FUNCTIONS
|
|
@@ -393,7 +350,46 @@ def detect_mixed_emotions(text, prosodic_features):
|
|
| 393 |
return text_mixed
|
| 394 |
|
| 395 |
# ============================================
|
| 396 |
-
# 6.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
# ============================================
|
| 398 |
|
| 399 |
def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
|
|
@@ -450,8 +446,47 @@ def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
|
|
| 450 |
|
| 451 |
return sentiment_scores, final_confidence, is_mixed
|
| 452 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
# ============================================
|
| 454 |
-
#
|
| 455 |
# ============================================
|
| 456 |
|
| 457 |
def predict(audio_filepath):
|
|
@@ -519,38 +554,39 @@ def predict(audio_filepath):
|
|
| 519 |
"hindi_content_percentage": round(hindi_ratio * 100, 2)
|
| 520 |
}
|
| 521 |
|
| 522 |
-
# Sentiment Analysis
|
| 523 |
-
print("💭 Analyzing sentiment...")
|
| 524 |
try:
|
| 525 |
-
|
|
|
|
| 526 |
|
|
|
|
| 527 |
sentiment_scores, confidence, is_mixed = enhanced_sentiment_analysis(
|
| 528 |
transcription,
|
| 529 |
prosodic_features,
|
| 530 |
-
|
| 531 |
)
|
| 532 |
|
| 533 |
-
#
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
)
|
| 540 |
|
| 541 |
# Build structured output
|
| 542 |
result = {
|
| 543 |
"status": "success",
|
| 544 |
"transcription": transcription,
|
| 545 |
-
"emotion":
|
| 546 |
-
|
| 547 |
-
"
|
| 548 |
-
"
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
"
|
| 554 |
},
|
| 555 |
"analysis": {
|
| 556 |
"mixed_emotions": is_mixed,
|
|
@@ -567,17 +603,17 @@ def predict(audio_filepath):
|
|
| 567 |
}
|
| 568 |
}
|
| 569 |
|
| 570 |
-
print(f"✅ Detected Emotion: {emotion}")
|
| 571 |
-
print(f"📝 Transcription: {transcription}")
|
| 572 |
print(f"{'='*60}\n")
|
| 573 |
|
| 574 |
return result
|
| 575 |
|
| 576 |
-
except Exception as
|
|
|
|
|
|
|
| 577 |
return {
|
| 578 |
"status": "error",
|
| 579 |
-
"error_type": "
|
| 580 |
-
"message": str(
|
| 581 |
"transcription": transcription
|
| 582 |
}
|
| 583 |
|
|
@@ -591,7 +627,7 @@ def predict(audio_filepath):
|
|
| 591 |
}
|
| 592 |
|
| 593 |
# ============================================
|
| 594 |
-
#
|
| 595 |
# ============================================
|
| 596 |
|
| 597 |
demo = gr.Interface(
|
|
@@ -601,14 +637,16 @@ demo = gr.Interface(
|
|
| 601 |
label="🎤 Record or Upload Hindi Audio",
|
| 602 |
sources=["upload", "microphone"]
|
| 603 |
),
|
| 604 |
-
outputs=gr.JSON(label="📊 Emotion Analysis Results (API-Ready JSON)"),
|
| 605 |
-
title="🎭 Hindi Speech Emotion Analysis API",
|
| 606 |
description="""
|
| 607 |
-
## 🇮🇳 Advanced Hindi/Hinglish Speech Emotion Detection
|
| 608 |
|
| 609 |
### ✨ Features:
|
| 610 |
- **🎙️ Indic Conformer 600M** - State-of-the-art multilingual ASR
|
| 611 |
-
-
|
|
|
|
|
|
|
| 612 |
- **🎵 Voice Analysis** - Analyzes tone, pitch, energy, and spectral features
|
| 613 |
- **🌐 Hinglish Support** - Works with Hindi + English mix
|
| 614 |
- **📝 JSON Output** - Easy to parse for API integration
|
|
@@ -620,13 +658,22 @@ demo = gr.Interface(
|
|
| 620 |
"transcription": "मैं बहुत खुश हूं",
|
| 621 |
"emotion": {
|
| 622 |
"primary": "joy",
|
| 623 |
-
"secondary":
|
| 624 |
-
"confidence": 0.8745
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 625 |
},
|
| 626 |
-
"
|
| 627 |
-
"
|
| 628 |
-
"
|
| 629 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 630 |
},
|
| 631 |
"analysis": {
|
| 632 |
"mixed_emotions": false,
|
|
@@ -644,10 +691,10 @@ demo = gr.Interface(
|
|
| 644 |
}
|
| 645 |
```
|
| 646 |
|
| 647 |
-
### 🎯 Supported Emotions:
|
| 648 |
- **Positive**: joy, happiness, love, excitement, calm
|
| 649 |
-
- **Negative**: sadness, anger, fear, anxiety, disgust
|
| 650 |
-
- **Neutral**: neutral, confusion,
|
| 651 |
|
| 652 |
### 🧪 Test Examples:
|
| 653 |
- **😊 Joy**: "मैं बहुत खुश हूं आज"
|
|
@@ -658,19 +705,11 @@ demo = gr.Interface(
|
|
| 658 |
- **❤️ Love**: "मुझे तुमसे बहुत प्यार है"
|
| 659 |
|
| 660 |
### 💡 API Usage:
|
| 661 |
-
1. Send audio file to the endpoint
|
| 662 |
-
2. Receive structured JSON response
|
| 663 |
-
3. Parse `emotion.primary` for the main emotion
|
| 664 |
-
4. Use `transcription` for text analysis
|
| 665 |
-
5. Check `analysis.mixed_emotions` for complex states
|
| 666 |
-
|
| 667 |
-
### 🔗 Integration Examples:
|
| 668 |
|
| 669 |
**Python API Client:**
|
| 670 |
```python
|
| 671 |
import requests
|
| 672 |
|
| 673 |
-
# Send audio file
|
| 674 |
with open("audio.wav", "rb") as f:
|
| 675 |
response = requests.post(
|
| 676 |
"YOUR_API_URL/predict",
|
|
@@ -681,41 +720,14 @@ demo = gr.Interface(
|
|
| 681 |
|
| 682 |
if result["status"] == "success":
|
| 683 |
print(f"Emotion: {result['emotion']['primary']}")
|
| 684 |
-
print(f"
|
| 685 |
-
print(f"
|
| 686 |
-
```
|
| 687 |
-
|
| 688 |
-
**Database Storage:**
|
| 689 |
-
```python
|
| 690 |
-
# Store in MongoDB
|
| 691 |
-
db.emotions.insert_one({
|
| 692 |
-
"user_id": user_id,
|
| 693 |
-
"timestamp": datetime.now(),
|
| 694 |
-
"emotion": result["emotion"]["primary"],
|
| 695 |
-
"transcription": result["transcription"],
|
| 696 |
-
"confidence": result["emotion"]["confidence"],
|
| 697 |
-
"sentiment_positive": result["sentiment_scores"]["positive"],
|
| 698 |
-
"is_crisis": result["analysis"]["is_crisis"]
|
| 699 |
-
})
|
| 700 |
```
|
| 701 |
|
| 702 |
-
**
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
fetch('YOUR_API_URL/predict', {
|
| 708 |
-
method: 'POST',
|
| 709 |
-
body: formData
|
| 710 |
-
})
|
| 711 |
-
.then(res => res.json())
|
| 712 |
-
.then(data => {
|
| 713 |
-
if (data.status === 'success') {
|
| 714 |
-
console.log('Emotion:', data.emotion.primary);
|
| 715 |
-
console.log('Text:', data.transcription);
|
| 716 |
-
}
|
| 717 |
-
});
|
| 718 |
-
```
|
| 719 |
""",
|
| 720 |
theme=gr.themes.Soft(),
|
| 721 |
flagging_mode="never",
|
|
@@ -725,10 +737,10 @@ demo = gr.Interface(
|
|
| 725 |
)
|
| 726 |
|
| 727 |
# ============================================
|
| 728 |
-
#
|
| 729 |
# ============================================
|
| 730 |
|
| 731 |
if __name__ == "__main__":
|
| 732 |
print("🌐 Starting server...")
|
| 733 |
demo.launch()
|
| 734 |
-
print("🎉 Hindi Emotion Analysis API is ready!")
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
import torchaudio
|
| 4 |
+
from transformers import pipeline, AutoModel, AutoTokenizer, AutoModelForSequenceClassification
|
| 5 |
import librosa
|
| 6 |
import numpy as np
|
| 7 |
import re
|
| 8 |
import warnings
|
| 9 |
import os
|
| 10 |
+
import asyncio
|
| 11 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 12 |
warnings.filterwarnings('ignore')
|
| 13 |
|
| 14 |
print("🚀 Starting Enhanced Hindi Speech Emotion Analysis App...")
|
|
|
|
| 18 |
# ============================================
|
| 19 |
|
| 20 |
SENTIMENT_PIPELINE = None
|
| 21 |
+
EMOTION_TOKENIZER = None
|
| 22 |
+
EMOTION_MODEL = None
|
| 23 |
+
EMOTION_PIPELINE = None
|
| 24 |
ASR_MODEL = None
|
| 25 |
|
| 26 |
def load_models():
|
| 27 |
"""Load all models once at startup and cache them globally"""
|
| 28 |
+
global SENTIMENT_PIPELINE, EMOTION_TOKENIZER, EMOTION_MODEL, EMOTION_PIPELINE, ASR_MODEL
|
| 29 |
|
| 30 |
+
if SENTIMENT_PIPELINE is not None and ASR_MODEL is not None and EMOTION_MODEL is not None:
|
| 31 |
print("✅ Models already loaded, skipping...")
|
| 32 |
return
|
| 33 |
|
|
|
|
| 44 |
print(f"❌ Error loading sentiment model: {e}")
|
| 45 |
raise
|
| 46 |
|
| 47 |
+
print("🎭 Loading Zero-Shot Emotion Classification model...")
|
| 48 |
+
try:
|
| 49 |
+
EMOTION_TOKENIZER = AutoTokenizer.from_pretrained("joeddav/xlm-roberta-large-xnli")
|
| 50 |
+
EMOTION_MODEL = AutoModelForSequenceClassification.from_pretrained("joeddav/xlm-roberta-large-xnli")
|
| 51 |
+
EMOTION_PIPELINE = pipeline(
|
| 52 |
+
"zero-shot-classification",
|
| 53 |
+
model=EMOTION_MODEL,
|
| 54 |
+
tokenizer=EMOTION_TOKENIZER,
|
| 55 |
+
device=0 if torch.cuda.is_available() else -1
|
| 56 |
+
)
|
| 57 |
+
print("✅ Zero-Shot emotion model loaded successfully")
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"❌ Error loading emotion model: {e}")
|
| 60 |
+
raise
|
| 61 |
+
|
| 62 |
print("🎤 Loading Indic Conformer 600M ASR model...")
|
| 63 |
try:
|
| 64 |
ASR_MODEL = AutoModel.from_pretrained(
|
|
|
|
| 75 |
load_models()
|
| 76 |
|
| 77 |
# ============================================
|
| 78 |
+
# 2. EMOTION LABELS FOR ZERO-SHOT
|
| 79 |
# ============================================
|
| 80 |
|
| 81 |
+
EMOTION_LABELS = [
|
| 82 |
+
"joy",
|
| 83 |
+
"happiness",
|
| 84 |
+
"sadness",
|
| 85 |
+
"anger",
|
| 86 |
+
"fear",
|
| 87 |
+
"anxiety",
|
| 88 |
+
"love",
|
| 89 |
+
"surprise",
|
| 90 |
+
"disgust",
|
| 91 |
+
"calm",
|
| 92 |
+
"neutral",
|
| 93 |
+
"confusion",
|
| 94 |
+
"excitement",
|
| 95 |
+
"frustration",
|
| 96 |
+
"disappointment"
|
| 97 |
+
]
|
| 98 |
+
|
| 99 |
+
# Hindi translations for better multilingual understanding
|
| 100 |
+
EMOTION_LABELS_HINDI = [
|
| 101 |
+
"खुशी", # joy
|
| 102 |
+
"प्रसन्नता", # happiness
|
| 103 |
+
"दुख", # sadness
|
| 104 |
+
"गुस्सा", # anger
|
| 105 |
+
"डर", # fear
|
| 106 |
+
"चिंता", # anxiety
|
| 107 |
+
"प्यार", # love
|
| 108 |
+
"आश्चर्य", # surprise
|
| 109 |
+
"घृणा", # disgust
|
| 110 |
+
"शांति", # calm
|
| 111 |
+
"सामान्य", # neutral
|
| 112 |
+
"उलझन", # confusion
|
| 113 |
+
"उत्साह", # excitement
|
| 114 |
+
"निराशा", # frustration
|
| 115 |
+
"मायूसी" # disappointment
|
| 116 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
# ============================================
|
| 119 |
# 3. AUDIO PREPROCESSING FUNCTIONS
|
|
|
|
| 350 |
return text_mixed
|
| 351 |
|
| 352 |
# ============================================
|
| 353 |
+
# 6. ASYNC ANALYSIS FUNCTIONS
|
| 354 |
+
# ============================================
|
| 355 |
+
|
| 356 |
+
async def async_sentiment_analysis(text):
|
| 357 |
+
"""Run sentiment analysis asynchronously"""
|
| 358 |
+
loop = asyncio.get_event_loop()
|
| 359 |
+
with ThreadPoolExecutor() as executor:
|
| 360 |
+
result = await loop.run_in_executor(executor, SENTIMENT_PIPELINE, text)
|
| 361 |
+
return result
|
| 362 |
+
|
| 363 |
+
async def async_emotion_classification(text):
|
| 364 |
+
"""Run zero-shot emotion classification asynchronously"""
|
| 365 |
+
loop = asyncio.get_event_loop()
|
| 366 |
+
with ThreadPoolExecutor() as executor:
|
| 367 |
+
# Use both English and Hindi labels for better multilingual performance
|
| 368 |
+
all_labels = EMOTION_LABELS + EMOTION_LABELS_HINDI
|
| 369 |
+
result = await loop.run_in_executor(
|
| 370 |
+
executor,
|
| 371 |
+
lambda: EMOTION_PIPELINE(text, all_labels, multi_label=False)
|
| 372 |
+
)
|
| 373 |
+
return result
|
| 374 |
+
|
| 375 |
+
async def parallel_analysis(text):
|
| 376 |
+
"""Run sentiment and emotion analysis in parallel"""
|
| 377 |
+
print("🔄 Running parallel sentiment and emotion analysis...")
|
| 378 |
+
|
| 379 |
+
# Execute both analyses concurrently
|
| 380 |
+
sentiment_task = async_sentiment_analysis(text)
|
| 381 |
+
emotion_task = async_emotion_classification(text)
|
| 382 |
+
|
| 383 |
+
sentiment_result, emotion_result = await asyncio.gather(
|
| 384 |
+
sentiment_task,
|
| 385 |
+
emotion_task,
|
| 386 |
+
return_exceptions=True
|
| 387 |
+
)
|
| 388 |
+
|
| 389 |
+
return sentiment_result, emotion_result
|
| 390 |
+
|
| 391 |
+
# ============================================
|
| 392 |
+
# 7. ENHANCED SENTIMENT ANALYSIS
|
| 393 |
# ============================================
|
| 394 |
|
| 395 |
def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
|
|
|
|
| 446 |
|
| 447 |
return sentiment_scores, final_confidence, is_mixed
|
| 448 |
|
| 449 |
+
def process_emotion_results(emotion_result):
|
| 450 |
+
"""Process zero-shot emotion classification results"""
|
| 451 |
+
if isinstance(emotion_result, Exception):
|
| 452 |
+
print(f"⚠️ Emotion classification error: {emotion_result}")
|
| 453 |
+
return {
|
| 454 |
+
"primary": "unknown",
|
| 455 |
+
"secondary": None,
|
| 456 |
+
"confidence": 0.0,
|
| 457 |
+
"top_emotions": []
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
# Get top 5 emotions
|
| 461 |
+
labels = emotion_result['labels']
|
| 462 |
+
scores = emotion_result['scores']
|
| 463 |
+
|
| 464 |
+
# Map Hindi labels back to English
|
| 465 |
+
hindi_to_english = dict(zip(EMOTION_LABELS_HINDI, EMOTION_LABELS))
|
| 466 |
+
|
| 467 |
+
top_emotions = []
|
| 468 |
+
for i in range(min(5, len(labels))):
|
| 469 |
+
label = labels[i]
|
| 470 |
+
# Convert Hindi to English if necessary
|
| 471 |
+
english_label = hindi_to_english.get(label, label)
|
| 472 |
+
top_emotions.append({
|
| 473 |
+
"emotion": english_label,
|
| 474 |
+
"score": round(scores[i], 4)
|
| 475 |
+
})
|
| 476 |
+
|
| 477 |
+
primary_emotion = top_emotions[0]["emotion"] if top_emotions else "unknown"
|
| 478 |
+
secondary_emotion = top_emotions[1]["emotion"] if len(top_emotions) > 1 else None
|
| 479 |
+
confidence = top_emotions[0]["score"] if top_emotions else 0.0
|
| 480 |
+
|
| 481 |
+
return {
|
| 482 |
+
"primary": primary_emotion,
|
| 483 |
+
"secondary": secondary_emotion,
|
| 484 |
+
"confidence": confidence,
|
| 485 |
+
"top_emotions": top_emotions
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
# ============================================
|
| 489 |
+
# 8. MAIN PREDICTION FUNCTION
|
| 490 |
# ============================================
|
| 491 |
|
| 492 |
def predict(audio_filepath):
|
|
|
|
| 554 |
"hindi_content_percentage": round(hindi_ratio * 100, 2)
|
| 555 |
}
|
| 556 |
|
| 557 |
+
# Parallel Sentiment and Emotion Analysis
|
| 558 |
+
print("💭 Analyzing sentiment and emotions in parallel...")
|
| 559 |
try:
|
| 560 |
+
# Run both analyses concurrently
|
| 561 |
+
sentiment_result, emotion_result = asyncio.run(parallel_analysis(transcription))
|
| 562 |
|
| 563 |
+
# Process sentiment
|
| 564 |
sentiment_scores, confidence, is_mixed = enhanced_sentiment_analysis(
|
| 565 |
transcription,
|
| 566 |
prosodic_features,
|
| 567 |
+
sentiment_result
|
| 568 |
)
|
| 569 |
|
| 570 |
+
# Process emotion
|
| 571 |
+
emotion_data = process_emotion_results(emotion_result)
|
| 572 |
+
|
| 573 |
+
print(f"✅ Detected Emotion: {emotion_data['primary']}")
|
| 574 |
+
print(f"✅ Sentiment: {max(sentiment_scores, key=sentiment_scores.get)}")
|
| 575 |
+
print(f"📝 Transcription: {transcription}")
|
|
|
|
| 576 |
|
| 577 |
# Build structured output
|
| 578 |
result = {
|
| 579 |
"status": "success",
|
| 580 |
"transcription": transcription,
|
| 581 |
+
"emotion": emotion_data,
|
| 582 |
+
"sentiment": {
|
| 583 |
+
"dominant": max(sentiment_scores, key=sentiment_scores.get),
|
| 584 |
+
"scores": {
|
| 585 |
+
"positive": round(sentiment_scores['Positive'], 4),
|
| 586 |
+
"neutral": round(sentiment_scores['Neutral'], 4),
|
| 587 |
+
"negative": round(sentiment_scores['Negative'], 4)
|
| 588 |
+
},
|
| 589 |
+
"confidence": round(confidence, 4)
|
| 590 |
},
|
| 591 |
"analysis": {
|
| 592 |
"mixed_emotions": is_mixed,
|
|
|
|
| 603 |
}
|
| 604 |
}
|
| 605 |
|
|
|
|
|
|
|
| 606 |
print(f"{'='*60}\n")
|
| 607 |
|
| 608 |
return result
|
| 609 |
|
| 610 |
+
except Exception as analysis_error:
|
| 611 |
+
import traceback
|
| 612 |
+
traceback.print_exc()
|
| 613 |
return {
|
| 614 |
"status": "error",
|
| 615 |
+
"error_type": "analysis_error",
|
| 616 |
+
"message": str(analysis_error),
|
| 617 |
"transcription": transcription
|
| 618 |
}
|
| 619 |
|
|
|
|
| 627 |
}
|
| 628 |
|
| 629 |
# ============================================
|
| 630 |
+
# 9. GRADIO INTERFACE
|
| 631 |
# ============================================
|
| 632 |
|
| 633 |
demo = gr.Interface(
|
|
|
|
| 637 |
label="🎤 Record or Upload Hindi Audio",
|
| 638 |
sources=["upload", "microphone"]
|
| 639 |
),
|
| 640 |
+
outputs=gr.JSON(label="📊 Emotion & Sentiment Analysis Results (API-Ready JSON)"),
|
| 641 |
+
title="🎭 Hindi Speech Emotion & Sentiment Analysis API",
|
| 642 |
description="""
|
| 643 |
+
## 🇮🇳 Advanced Hindi/Hinglish Speech Emotion & Sentiment Detection
|
| 644 |
|
| 645 |
### ✨ Features:
|
| 646 |
- **🎙️ Indic Conformer 600M** - State-of-the-art multilingual ASR
|
| 647 |
+
- **🎭 Zero-Shot Emotion Detection** - 15+ emotions using XLM-RoBERTa
|
| 648 |
+
- **💭 Sentiment Analysis** - Positive/Neutral/Negative classification
|
| 649 |
+
- **⚡ Parallel Processing** - Async execution for faster results
|
| 650 |
- **🎵 Voice Analysis** - Analyzes tone, pitch, energy, and spectral features
|
| 651 |
- **🌐 Hinglish Support** - Works with Hindi + English mix
|
| 652 |
- **📝 JSON Output** - Easy to parse for API integration
|
|
|
|
| 658 |
"transcription": "मैं बहुत खुश हूं",
|
| 659 |
"emotion": {
|
| 660 |
"primary": "joy",
|
| 661 |
+
"secondary": "happiness",
|
| 662 |
+
"confidence": 0.8745,
|
| 663 |
+
"top_emotions": [
|
| 664 |
+
{"emotion": "joy", "score": 0.8745},
|
| 665 |
+
{"emotion": "happiness", "score": 0.0923},
|
| 666 |
+
{"emotion": "excitement", "score": 0.0332}
|
| 667 |
+
]
|
| 668 |
},
|
| 669 |
+
"sentiment": {
|
| 670 |
+
"dominant": "Positive",
|
| 671 |
+
"scores": {
|
| 672 |
+
"positive": 0.8745,
|
| 673 |
+
"neutral": 0.0923,
|
| 674 |
+
"negative": 0.0332
|
| 675 |
+
},
|
| 676 |
+
"confidence": 0.8745
|
| 677 |
},
|
| 678 |
"analysis": {
|
| 679 |
"mixed_emotions": false,
|
|
|
|
| 691 |
}
|
| 692 |
```
|
| 693 |
|
| 694 |
+
### 🎯 Supported Emotions (15+):
|
| 695 |
- **Positive**: joy, happiness, love, excitement, calm
|
| 696 |
+
- **Negative**: sadness, anger, fear, anxiety, disgust, frustration, disappointment
|
| 697 |
+
- **Neutral**: neutral, confusion, surprise
|
| 698 |
|
| 699 |
### 🧪 Test Examples:
|
| 700 |
- **😊 Joy**: "मैं बहुत खुश हूं आज"
|
|
|
|
| 705 |
- **❤️ Love**: "मुझे तुमसे बहुत प्यार है"
|
| 706 |
|
| 707 |
### 💡 API Usage:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 708 |
|
| 709 |
**Python API Client:**
|
| 710 |
```python
|
| 711 |
import requests
|
| 712 |
|
|
|
|
| 713 |
with open("audio.wav", "rb") as f:
|
| 714 |
response = requests.post(
|
| 715 |
"YOUR_API_URL/predict",
|
|
|
|
| 720 |
|
| 721 |
if result["status"] == "success":
|
| 722 |
print(f"Emotion: {result['emotion']['primary']}")
|
| 723 |
+
print(f"Sentiment: {result['sentiment']['dominant']}")
|
| 724 |
+
print(f"Top 3 emotions: {result['emotion']['top_emotions'][:3]}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 725 |
```
|
| 726 |
|
| 727 |
+
**Async Processing Benefits:**
|
| 728 |
+
- ⚡ 2x faster analysis (parallel execution)
|
| 729 |
+
- 🔄 Non-blocking I/O operations
|
| 730 |
+
- 💪 Better resource utilization
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 731 |
""",
|
| 732 |
theme=gr.themes.Soft(),
|
| 733 |
flagging_mode="never",
|
|
|
|
| 737 |
)
|
| 738 |
|
| 739 |
# ============================================
|
| 740 |
+
# 10. LAUNCH APP
|
| 741 |
# ============================================
|
| 742 |
|
| 743 |
if __name__ == "__main__":
|
| 744 |
print("🌐 Starting server...")
|
| 745 |
demo.launch()
|
| 746 |
+
print("🎉 Hindi Emotion & Sentiment Analysis API is ready!")
|