Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
import torchaudio
|
| 4 |
-
from transformers import
|
|
|
|
| 5 |
import librosa
|
| 6 |
import numpy as np
|
| 7 |
import re
|
|
@@ -18,42 +19,28 @@ print("🚀 Starting Enhanced Hindi Speech Emotion Analysis App...")
|
|
| 18 |
# 1. GLOBAL MODEL LOADING (ONLY ONCE AT STARTUP)
|
| 19 |
# ============================================
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
ASR_MODEL = None
|
| 24 |
|
| 25 |
def load_models():
|
| 26 |
"""Load all models once at startup and cache them globally"""
|
| 27 |
-
global
|
| 28 |
|
| 29 |
-
if
|
| 30 |
print("✅ Models already loaded, skipping...")
|
| 31 |
return
|
| 32 |
|
| 33 |
-
print("📚 Loading Hindi
|
| 34 |
try:
|
| 35 |
-
sentiment_model_name = "
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
top_k=None
|
| 40 |
-
)
|
| 41 |
-
print("✅ Hindi sentiment model loaded successfully")
|
| 42 |
except Exception as e:
|
| 43 |
print(f"❌ Error loading sentiment model: {e}")
|
| 44 |
raise
|
| 45 |
|
| 46 |
-
print("🎭 Loading Zero-Shot Emotion Classification model...")
|
| 47 |
-
try:
|
| 48 |
-
EMOTION_PIPELINE = pipeline(
|
| 49 |
-
"zero-shot-classification",
|
| 50 |
-
model="joeddav/xlm-roberta-large-xnli"
|
| 51 |
-
)
|
| 52 |
-
print("✅ Zero-Shot emotion model loaded successfully")
|
| 53 |
-
except Exception as e:
|
| 54 |
-
print(f"❌ Error loading emotion model: {e}")
|
| 55 |
-
raise
|
| 56 |
-
|
| 57 |
print("🎤 Loading Indic Conformer 600M ASR model...")
|
| 58 |
try:
|
| 59 |
ASR_MODEL = AutoModel.from_pretrained(
|
|
@@ -70,45 +57,45 @@ def load_models():
|
|
| 70 |
load_models()
|
| 71 |
|
| 72 |
# ============================================
|
| 73 |
-
# 2.
|
| 74 |
# ============================================
|
| 75 |
|
| 76 |
-
|
| 77 |
-
"
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
"
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
|
| 113 |
# ============================================
|
| 114 |
# 3. AUDIO PREPROCESSING FUNCTIONS
|
|
@@ -318,8 +305,8 @@ def detect_crisis_keywords(text):
|
|
| 318 |
return True
|
| 319 |
return False
|
| 320 |
|
| 321 |
-
def
|
| 322 |
-
"""Detect mixed
|
| 323 |
text_lower = text.lower()
|
| 324 |
|
| 325 |
if detect_crisis_keywords(text):
|
|
@@ -352,133 +339,59 @@ async def async_sentiment_analysis(text):
|
|
| 352 |
"""Run sentiment analysis asynchronously"""
|
| 353 |
loop = asyncio.get_event_loop()
|
| 354 |
with ThreadPoolExecutor() as executor:
|
| 355 |
-
result = await loop.run_in_executor(executor,
|
| 356 |
return result
|
| 357 |
|
| 358 |
-
async def async_emotion_classification(text):
|
| 359 |
-
"""Run zero-shot emotion classification asynchronously"""
|
| 360 |
-
loop = asyncio.get_event_loop()
|
| 361 |
-
with ThreadPoolExecutor() as executor:
|
| 362 |
-
# Use both English and Hindi labels for better multilingual performance
|
| 363 |
-
all_labels = EMOTION_LABELS + EMOTION_LABELS_HINDI
|
| 364 |
-
result = await loop.run_in_executor(
|
| 365 |
-
executor,
|
| 366 |
-
lambda: EMOTION_PIPELINE(text, all_labels, multi_label=False)
|
| 367 |
-
)
|
| 368 |
-
return result
|
| 369 |
-
|
| 370 |
-
async def parallel_analysis(text):
|
| 371 |
-
"""Run sentiment and emotion analysis in parallel"""
|
| 372 |
-
print("🔄 Running parallel sentiment and emotion analysis...")
|
| 373 |
-
|
| 374 |
-
# Execute both analyses concurrently
|
| 375 |
-
sentiment_task = async_sentiment_analysis(text)
|
| 376 |
-
emotion_task = async_emotion_classification(text)
|
| 377 |
-
|
| 378 |
-
sentiment_result, emotion_result = await asyncio.gather(
|
| 379 |
-
sentiment_task,
|
| 380 |
-
emotion_task,
|
| 381 |
-
return_exceptions=True
|
| 382 |
-
)
|
| 383 |
-
|
| 384 |
-
return sentiment_result, emotion_result
|
| 385 |
-
|
| 386 |
# ============================================
|
| 387 |
# 7. ENHANCED SENTIMENT ANALYSIS
|
| 388 |
# ============================================
|
| 389 |
|
| 390 |
def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
|
| 391 |
-
"""Enhanced
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
'
|
| 399 |
-
'
|
| 400 |
-
'
|
| 401 |
-
'negative': 'Negative',
|
| 402 |
-
'neutral': 'Neutral',
|
| 403 |
-
'positive': 'Positive'
|
| 404 |
}
|
| 405 |
|
| 406 |
-
for result in raw_results[0]:
|
| 407 |
-
label = result['label']
|
| 408 |
-
score = result['score']
|
| 409 |
-
mapped_label = label_mapping.get(label, 'Neutral')
|
| 410 |
-
sentiment_scores[mapped_label] = score
|
| 411 |
-
|
| 412 |
-
for sentiment in ['Negative', 'Neutral', 'Positive']:
|
| 413 |
-
if sentiment not in sentiment_scores:
|
| 414 |
-
sentiment_scores[sentiment] = 0.0
|
| 415 |
-
|
| 416 |
is_crisis = detect_crisis_keywords(text)
|
| 417 |
if is_crisis:
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
|
|
|
|
|
|
| 421 |
is_mixed = False
|
| 422 |
else:
|
| 423 |
has_negation = detect_negation(text)
|
| 424 |
if has_negation:
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
|
|
|
| 428 |
|
| 429 |
-
is_mixed =
|
| 430 |
if is_mixed:
|
|
|
|
| 431 |
neutral_boost = 0.20
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
|
|
|
| 435 |
|
| 436 |
-
|
|
|
|
| 437 |
if total > 0:
|
| 438 |
-
|
| 439 |
|
| 440 |
-
final_confidence = max(
|
| 441 |
-
|
| 442 |
-
return sentiment_scores, final_confidence, is_mixed
|
| 443 |
-
|
| 444 |
-
def process_emotion_results(emotion_result):
|
| 445 |
-
"""Process zero-shot emotion classification results"""
|
| 446 |
-
if isinstance(emotion_result, Exception):
|
| 447 |
-
print(f"⚠️ Emotion classification error: {emotion_result}")
|
| 448 |
-
return {
|
| 449 |
-
"primary": "unknown",
|
| 450 |
-
"secondary": None,
|
| 451 |
-
"confidence": 0.0,
|
| 452 |
-
"top_emotions": []
|
| 453 |
-
}
|
| 454 |
|
| 455 |
-
|
| 456 |
-
labels = emotion_result['labels']
|
| 457 |
-
scores = emotion_result['scores']
|
| 458 |
-
|
| 459 |
-
# Map Hindi labels back to English
|
| 460 |
-
hindi_to_english = dict(zip(EMOTION_LABELS_HINDI, EMOTION_LABELS))
|
| 461 |
-
|
| 462 |
-
top_emotions = []
|
| 463 |
-
for i in range(min(5, len(labels))):
|
| 464 |
-
label = labels[i]
|
| 465 |
-
# Convert Hindi to English if necessary
|
| 466 |
-
english_label = hindi_to_english.get(label, label)
|
| 467 |
-
top_emotions.append({
|
| 468 |
-
"emotion": english_label,
|
| 469 |
-
"score": round(scores[i], 4)
|
| 470 |
-
})
|
| 471 |
-
|
| 472 |
-
primary_emotion = top_emotions[0]["emotion"] if top_emotions else "unknown"
|
| 473 |
-
secondary_emotion = top_emotions[1]["emotion"] if len(top_emotions) > 1 else None
|
| 474 |
-
confidence = top_emotions[0]["score"] if top_emotions else 0.0
|
| 475 |
-
|
| 476 |
-
return {
|
| 477 |
-
"primary": primary_emotion,
|
| 478 |
-
"secondary": secondary_emotion,
|
| 479 |
-
"confidence": round(confidence, 4),
|
| 480 |
-
"top_emotions": top_emotions
|
| 481 |
-
}
|
| 482 |
|
| 483 |
# ============================================
|
| 484 |
# 8. MAIN PREDICTION FUNCTION
|
|
@@ -549,37 +462,35 @@ def predict(audio_filepath):
|
|
| 549 |
"hindi_content_percentage": round(hindi_ratio * 100, 2)
|
| 550 |
}
|
| 551 |
|
| 552 |
-
#
|
| 553 |
-
print("💭 Analyzing
|
| 554 |
try:
|
| 555 |
-
# Run
|
| 556 |
-
|
| 557 |
|
| 558 |
-
# Process
|
| 559 |
-
|
| 560 |
transcription,
|
| 561 |
prosodic_features,
|
| 562 |
-
|
| 563 |
)
|
| 564 |
|
| 565 |
-
|
| 566 |
-
emotion_data = process_emotion_results(emotion_result)
|
| 567 |
|
| 568 |
-
print(f"✅
|
| 569 |
-
print(f"✅ Sentiment: {max(sentiment_scores, key=sentiment_scores.get)}")
|
| 570 |
print(f"📝 Transcription: {transcription}")
|
| 571 |
|
| 572 |
# Build structured output
|
| 573 |
result = {
|
| 574 |
"status": "success",
|
| 575 |
"transcription": transcription,
|
| 576 |
-
"emotion":
|
| 577 |
-
|
| 578 |
-
"dominant": max(sentiment_scores, key=sentiment_scores.get),
|
| 579 |
"scores": {
|
| 580 |
-
"
|
| 581 |
-
"
|
| 582 |
-
"
|
|
|
|
| 583 |
},
|
| 584 |
"confidence": round(confidence, 4)
|
| 585 |
},
|
|
@@ -632,16 +543,15 @@ demo = gr.Interface(
|
|
| 632 |
label="🎤 Record or Upload Hindi Audio",
|
| 633 |
sources=["upload", "microphone"]
|
| 634 |
),
|
| 635 |
-
outputs=gr.JSON(label="📊 Emotion
|
| 636 |
-
title="🎭 Hindi Speech Emotion
|
| 637 |
description="""
|
| 638 |
-
## 🇮🇳 Advanced Hindi/Hinglish Speech Emotion
|
| 639 |
|
| 640 |
### ✨ Features:
|
| 641 |
- **🎙️ Indic Conformer 600M** - State-of-the-art multilingual ASR
|
| 642 |
-
- **🎭
|
| 643 |
-
-
|
| 644 |
-
- **⚡ Parallel Processing** - Async execution for faster results
|
| 645 |
- **🎵 Voice Analysis** - Analyzes tone, pitch, energy, and spectral features
|
| 646 |
- **🌐 Hinglish Support** - Works with Hindi + English mix
|
| 647 |
- **📝 JSON Output** - Easy to parse for API integration
|
|
@@ -650,23 +560,14 @@ demo = gr.Interface(
|
|
| 650 |
```json
|
| 651 |
{
|
| 652 |
"status": "success",
|
| 653 |
-
"transcription": "
|
| 654 |
"emotion": {
|
| 655 |
-
"
|
| 656 |
-
"secondary": "happiness",
|
| 657 |
-
"confidence": 0.8745,
|
| 658 |
-
"top_emotions": [
|
| 659 |
-
{"emotion": "joy", "score": 0.8745},
|
| 660 |
-
{"emotion": "happiness", "score": 0.0923},
|
| 661 |
-
{"emotion": "excitement", "score": 0.0332}
|
| 662 |
-
]
|
| 663 |
-
},
|
| 664 |
-
"sentiment": {
|
| 665 |
-
"dominant": "Positive",
|
| 666 |
"scores": {
|
| 667 |
-
"
|
| 668 |
-
"
|
| 669 |
-
"
|
|
|
|
| 670 |
},
|
| 671 |
"confidence": 0.8745
|
| 672 |
},
|
|
@@ -686,18 +587,17 @@ demo = gr.Interface(
|
|
| 686 |
}
|
| 687 |
```
|
| 688 |
|
| 689 |
-
### 🎯
|
| 690 |
-
-
|
| 691 |
-
-
|
| 692 |
-
-
|
|
|
|
| 693 |
|
| 694 |
### 🧪 Test Examples:
|
| 695 |
-
-
|
| 696 |
-
-
|
| 697 |
-
- **😠
|
| 698 |
-
-
|
| 699 |
-
- **😐 Calm**: "सब ठीक है, मैं शांत हूं"
|
| 700 |
-
- **❤️ Love**: "मुझे तुमसे बहुत प्यार है"
|
| 701 |
|
| 702 |
### 💡 API Usage:
|
| 703 |
|
|
@@ -714,15 +614,16 @@ demo = gr.Interface(
|
|
| 714 |
result = response.json()
|
| 715 |
|
| 716 |
if result["status"] == "success":
|
| 717 |
-
print(f"
|
| 718 |
-
print(f"
|
| 719 |
-
print(f"
|
|
|
|
| 720 |
```
|
| 721 |
|
| 722 |
**Async Processing Benefits:**
|
| 723 |
-
- ⚡
|
| 724 |
- 🔄 Non-blocking I/O operations
|
| 725 |
-
- 💪
|
| 726 |
""",
|
| 727 |
theme=gr.themes.Soft(),
|
| 728 |
flagging_mode="never",
|
|
@@ -738,4 +639,4 @@ demo = gr.Interface(
|
|
| 738 |
if __name__ == "__main__":
|
| 739 |
print("🌐 Starting server...")
|
| 740 |
demo.launch()
|
| 741 |
-
print("🎉 Hindi Emotion
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
import torchaudio
|
| 4 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModel
|
| 5 |
+
from torch.nn.functional import softmax
|
| 6 |
import librosa
|
| 7 |
import numpy as np
|
| 8 |
import re
|
|
|
|
| 19 |
# 1. GLOBAL MODEL LOADING (ONLY ONCE AT STARTUP)
|
| 20 |
# ============================================
|
| 21 |
|
| 22 |
+
SENTIMENT_MODEL = None
|
| 23 |
+
SENTIMENT_TOKENIZER = None
|
| 24 |
ASR_MODEL = None
|
| 25 |
|
| 26 |
def load_models():
|
| 27 |
"""Load all models once at startup and cache them globally"""
|
| 28 |
+
global SENTIMENT_MODEL, SENTIMENT_TOKENIZER, ASR_MODEL
|
| 29 |
|
| 30 |
+
if SENTIMENT_MODEL is not None and ASR_MODEL is not None:
|
| 31 |
print("✅ Models already loaded, skipping...")
|
| 32 |
return
|
| 33 |
|
| 34 |
+
print("📚 Loading Hindi emotion analysis model...")
|
| 35 |
try:
|
| 36 |
+
sentiment_model_name = "yashkahalkar/hindi_sentiment_analysis"
|
| 37 |
+
SENTIMENT_TOKENIZER = AutoTokenizer.from_pretrained(sentiment_model_name)
|
| 38 |
+
SENTIMENT_MODEL = AutoModelForSequenceClassification.from_pretrained(sentiment_model_name)
|
| 39 |
+
print("✅ Hindi emotion model loaded successfully")
|
|
|
|
|
|
|
|
|
|
| 40 |
except Exception as e:
|
| 41 |
print(f"❌ Error loading sentiment model: {e}")
|
| 42 |
raise
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
print("🎤 Loading Indic Conformer 600M ASR model...")
|
| 45 |
try:
|
| 46 |
ASR_MODEL = AutoModel.from_pretrained(
|
|
|
|
| 57 |
load_models()
|
| 58 |
|
| 59 |
# ============================================
|
| 60 |
+
# 2. SENTIMENT PREDICTION FUNCTION
|
| 61 |
# ============================================
|
| 62 |
|
| 63 |
+
def predict_sentiment(text):
|
| 64 |
+
"""
|
| 65 |
+
Predict sentiment/emotion using yashkahalkar/hindi_sentiment_analysis model
|
| 66 |
+
Detects: Happy, Sad, Angry, Neutral
|
| 67 |
+
Returns: dict with emotion label and scores
|
| 68 |
+
"""
|
| 69 |
+
try:
|
| 70 |
+
inputs = SENTIMENT_TOKENIZER(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
| 71 |
+
outputs = SENTIMENT_MODEL(**inputs)
|
| 72 |
+
probabilities = softmax(outputs.logits, dim=-1)
|
| 73 |
+
|
| 74 |
+
# Get emotion index
|
| 75 |
+
emotion_idx = probabilities.argmax().item()
|
| 76 |
+
scores = probabilities[0].detach().numpy()
|
| 77 |
+
|
| 78 |
+
# Label mapping for yashkahalkar model: Happy, Sad, Angry, Neutral
|
| 79 |
+
label_map = {0: 'sad', 1: 'angry', 2: 'happy', 3: 'neutral'}
|
| 80 |
+
emotion_label = label_map.get(emotion_idx, 'neutral')
|
| 81 |
+
|
| 82 |
+
return {
|
| 83 |
+
'label': emotion_label,
|
| 84 |
+
'scores': {
|
| 85 |
+
'sad': float(scores[0]),
|
| 86 |
+
'angry': float(scores[1]),
|
| 87 |
+
'happy': float(scores[2]),
|
| 88 |
+
'neutral': float(scores[3]) if len(scores) > 3 else 0.0
|
| 89 |
+
},
|
| 90 |
+
'confidence': float(scores[emotion_idx])
|
| 91 |
+
}
|
| 92 |
+
except Exception as e:
|
| 93 |
+
print(f"⚠️ Sentiment prediction error: {e}")
|
| 94 |
+
return {
|
| 95 |
+
'label': 'neutral',
|
| 96 |
+
'scores': {'sad': 0.25, 'angry': 0.25, 'happy': 0.25, 'neutral': 0.25},
|
| 97 |
+
'confidence': 0.25
|
| 98 |
+
}
|
| 99 |
|
| 100 |
# ============================================
|
| 101 |
# 3. AUDIO PREPROCESSING FUNCTIONS
|
|
|
|
| 305 |
return True
|
| 306 |
return False
|
| 307 |
|
| 308 |
+
def detect_mixed_sentiment(text):
|
| 309 |
+
"""Detect if text contains mixed or conflicting sentiment indicators"""
|
| 310 |
text_lower = text.lower()
|
| 311 |
|
| 312 |
if detect_crisis_keywords(text):
|
|
|
|
| 339 |
"""Run sentiment analysis asynchronously"""
|
| 340 |
loop = asyncio.get_event_loop()
|
| 341 |
with ThreadPoolExecutor() as executor:
|
| 342 |
+
result = await loop.run_in_executor(executor, predict_sentiment, text)
|
| 343 |
return result
|
| 344 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
# ============================================
|
| 346 |
# 7. ENHANCED SENTIMENT ANALYSIS
|
| 347 |
# ============================================
|
| 348 |
|
| 349 |
def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
|
| 350 |
+
"""Enhanced emotion analysis with context awareness"""
|
| 351 |
+
if not raw_results or not isinstance(raw_results, dict):
|
| 352 |
+
return {'sad': 0.25, 'angry': 0.25, 'happy': 0.25, 'neutral': 0.25}, 0.25, False
|
| 353 |
+
|
| 354 |
+
# Get base scores from model
|
| 355 |
+
emotion_scores = {
|
| 356 |
+
'sad': raw_results['scores']['sad'],
|
| 357 |
+
'angry': raw_results['scores']['angry'],
|
| 358 |
+
'happy': raw_results['scores']['happy'],
|
| 359 |
+
'neutral': raw_results['scores']['neutral']
|
|
|
|
|
|
|
|
|
|
| 360 |
}
|
| 361 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
is_crisis = detect_crisis_keywords(text)
|
| 363 |
if is_crisis:
|
| 364 |
+
# Boost negative emotions for crisis situations
|
| 365 |
+
emotion_scores['sad'] = min(0.50, emotion_scores['sad'] * 1.5)
|
| 366 |
+
emotion_scores['angry'] = min(0.50, emotion_scores['angry'] * 1.5)
|
| 367 |
+
emotion_scores['neutral'] = max(0.02, emotion_scores['neutral'] * 0.2)
|
| 368 |
+
emotion_scores['happy'] = max(0.01, emotion_scores['happy'] * 0.1)
|
| 369 |
is_mixed = False
|
| 370 |
else:
|
| 371 |
has_negation = detect_negation(text)
|
| 372 |
if has_negation:
|
| 373 |
+
# Swap happy with sad on negation
|
| 374 |
+
temp = emotion_scores['happy']
|
| 375 |
+
emotion_scores['happy'] = emotion_scores['sad']
|
| 376 |
+
emotion_scores['sad'] = temp
|
| 377 |
|
| 378 |
+
is_mixed = detect_mixed_sentiment(text)
|
| 379 |
if is_mixed:
|
| 380 |
+
# Boost neutral for mixed emotions
|
| 381 |
neutral_boost = 0.20
|
| 382 |
+
emotion_scores['neutral'] = min(0.65, emotion_scores['neutral'] + neutral_boost)
|
| 383 |
+
emotion_scores['happy'] = max(0.05, emotion_scores['happy'] - neutral_boost/3)
|
| 384 |
+
emotion_scores['sad'] = max(0.05, emotion_scores['sad'] - neutral_boost/3)
|
| 385 |
+
emotion_scores['angry'] = max(0.05, emotion_scores['angry'] - neutral_boost/3)
|
| 386 |
|
| 387 |
+
# Normalize scores
|
| 388 |
+
total = sum(emotion_scores.values())
|
| 389 |
if total > 0:
|
| 390 |
+
emotion_scores = {k: v/total for k, v in emotion_scores.items()}
|
| 391 |
|
| 392 |
+
final_confidence = max(emotion_scores.values())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
|
| 394 |
+
return emotion_scores, final_confidence, is_mixed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 395 |
|
| 396 |
# ============================================
|
| 397 |
# 8. MAIN PREDICTION FUNCTION
|
|
|
|
| 462 |
"hindi_content_percentage": round(hindi_ratio * 100, 2)
|
| 463 |
}
|
| 464 |
|
| 465 |
+
# Emotion Analysis
|
| 466 |
+
print("💭 Analyzing emotion...")
|
| 467 |
try:
|
| 468 |
+
# Run emotion analysis
|
| 469 |
+
emotion_result = asyncio.run(async_sentiment_analysis(transcription))
|
| 470 |
|
| 471 |
+
# Process emotion with context enhancement
|
| 472 |
+
emotion_scores, confidence, is_mixed = enhanced_sentiment_analysis(
|
| 473 |
transcription,
|
| 474 |
prosodic_features,
|
| 475 |
+
emotion_result
|
| 476 |
)
|
| 477 |
|
| 478 |
+
dominant_emotion = max(emotion_scores, key=emotion_scores.get)
|
|
|
|
| 479 |
|
| 480 |
+
print(f"✅ Emotion: {dominant_emotion}")
|
|
|
|
| 481 |
print(f"📝 Transcription: {transcription}")
|
| 482 |
|
| 483 |
# Build structured output
|
| 484 |
result = {
|
| 485 |
"status": "success",
|
| 486 |
"transcription": transcription,
|
| 487 |
+
"emotion": {
|
| 488 |
+
"dominant": dominant_emotion,
|
|
|
|
| 489 |
"scores": {
|
| 490 |
+
"happy": round(emotion_scores['happy'], 4),
|
| 491 |
+
"sad": round(emotion_scores['sad'], 4),
|
| 492 |
+
"angry": round(emotion_scores['angry'], 4),
|
| 493 |
+
"neutral": round(emotion_scores['neutral'], 4)
|
| 494 |
},
|
| 495 |
"confidence": round(confidence, 4)
|
| 496 |
},
|
|
|
|
| 543 |
label="🎤 Record or Upload Hindi Audio",
|
| 544 |
sources=["upload", "microphone"]
|
| 545 |
),
|
| 546 |
+
outputs=gr.JSON(label="📊 Emotion Analysis Results (API-Ready JSON)"),
|
| 547 |
+
title="🎭 Hindi Speech Emotion Analysis API",
|
| 548 |
description="""
|
| 549 |
+
## 🇮🇳 Advanced Hindi/Hinglish Speech Emotion Detection
|
| 550 |
|
| 551 |
### ✨ Features:
|
| 552 |
- **🎙️ Indic Conformer 600M** - State-of-the-art multilingual ASR
|
| 553 |
+
- **🎭 Emotion Classification** - Using yashkahalkar/hindi_sentiment_analysis
|
| 554 |
+
- **⚡ Async Processing** - Fast emotion detection
|
|
|
|
| 555 |
- **🎵 Voice Analysis** - Analyzes tone, pitch, energy, and spectral features
|
| 556 |
- **🌐 Hinglish Support** - Works with Hindi + English mix
|
| 557 |
- **📝 JSON Output** - Easy to parse for API integration
|
|
|
|
| 560 |
```json
|
| 561 |
{
|
| 562 |
"status": "success",
|
| 563 |
+
"transcription": "मुझे आज बहुत खुशी हो रही है",
|
| 564 |
"emotion": {
|
| 565 |
+
"dominant": "happy",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 566 |
"scores": {
|
| 567 |
+
"happy": 0.8745,
|
| 568 |
+
"sad": 0.0432,
|
| 569 |
+
"angry": 0.0321,
|
| 570 |
+
"neutral": 0.0502
|
| 571 |
},
|
| 572 |
"confidence": 0.8745
|
| 573 |
},
|
|
|
|
| 587 |
}
|
| 588 |
```
|
| 589 |
|
| 590 |
+
### 🎯 Emotion Classes:
|
| 591 |
+
- **😃 Happy**: Joyful, cheerful, optimistic content
|
| 592 |
+
- **😞 Sad**: Sorrowful, disappointed, melancholic content
|
| 593 |
+
- **😠 Angry**: Frustrated, irritated, aggressive content
|
| 594 |
+
- **😐 Neutral**: Factual, balanced, or informational content
|
| 595 |
|
| 596 |
### 🧪 Test Examples:
|
| 597 |
+
- **😃 Happy**: "मुझे आज बहुत खुशी हो रही है"
|
| 598 |
+
- **😞 Sad**: "मुझे बहुत दुख हो रहा है"
|
| 599 |
+
- **😠 Angry**: "मुझे बहुत गुस्सा आ रहा है"
|
| 600 |
+
- **😐 Neutral**: "आज मौसम अच्छा है"
|
|
|
|
|
|
|
| 601 |
|
| 602 |
### 💡 API Usage:
|
| 603 |
|
|
|
|
| 614 |
result = response.json()
|
| 615 |
|
| 616 |
if result["status"] == "success":
|
| 617 |
+
print(f"Transcription: {result['transcription']}")
|
| 618 |
+
print(f"Emotion: {result['emotion']['dominant']}")
|
| 619 |
+
print(f"Confidence: {result['emotion']['confidence']}")
|
| 620 |
+
print(f"All emotions: {result['emotion']['scores']}")
|
| 621 |
```
|
| 622 |
|
| 623 |
**Async Processing Benefits:**
|
| 624 |
+
- ⚡ Fast emotion analysis
|
| 625 |
- 🔄 Non-blocking I/O operations
|
| 626 |
+
- 💪 Efficient resource utilization
|
| 627 |
""",
|
| 628 |
theme=gr.themes.Soft(),
|
| 629 |
flagging_mode="never",
|
|
|
|
| 639 |
if __name__ == "__main__":
|
| 640 |
print("🌐 Starting server...")
|
| 641 |
demo.launch()
|
| 642 |
+
print("🎉 Hindi Emotion Analysis API is ready!")
|