JustNikunj commited on
Commit
c4e8a9d
·
verified ·
1 Parent(s): cdf105f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +333 -226
app.py CHANGED
@@ -7,30 +7,26 @@ import numpy as np
7
  import re
8
  import warnings
9
  import os
 
10
  warnings.filterwarnings('ignore')
11
 
12
- print("🚀 Starting Enhanced Hindi Speech Sentiment Analysis App...")
13
 
14
  # ============================================
15
  # 1. GLOBAL MODEL LOADING (ONLY ONCE AT STARTUP)
16
  # ============================================
17
 
18
- # Global variables to store loaded models
19
  SENTIMENT_PIPELINE = None
20
  ASR_MODEL = None
21
 
22
  def load_models():
23
- """
24
- Load all models once at startup and cache them globally
25
- """
26
  global SENTIMENT_PIPELINE, ASR_MODEL
27
 
28
- # Check if already loaded
29
  if SENTIMENT_PIPELINE is not None and ASR_MODEL is not None:
30
  print("✅ Models already loaded, skipping...")
31
  return
32
 
33
- # Load Hindi Sentiment Model
34
  print("📚 Loading Hindi sentiment analysis model...")
35
  try:
36
  sentiment_model_name = "LondonStory/txlm-roberta-hindi-sentiment"
@@ -44,7 +40,6 @@ def load_models():
44
  print(f"❌ Error loading sentiment model: {e}")
45
  raise
46
 
47
- # Load Indic Conformer for Hindi ASR
48
  print("🎤 Loading Indic Conformer 600M ASR model...")
49
  try:
50
  ASR_MODEL = AutoModel.from_pretrained(
@@ -58,67 +53,152 @@ def load_models():
58
 
59
  print("✅ All models loaded and cached in memory")
60
 
61
- # Load models at startup
62
  load_models()
63
 
64
  # ============================================
65
- # 2. ENHANCED AUDIO PREPROCESSING FUNCTIONS
66
  # ============================================
67
 
68
- def advanced_preprocess_audio(audio_path, target_sr=16000):
69
  """
70
- Advanced audio preprocessing pipeline for optimal ASR performance
71
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  try:
73
- # Load audio with torchaudio for better compatibility
74
  wav, sr = torchaudio.load(audio_path)
75
 
76
- # Convert stereo to mono by averaging channels
77
  if wav.shape[0] > 1:
78
  wav = torch.mean(wav, dim=0, keepdim=True)
79
  print(f"📊 Converted stereo to mono")
80
 
81
- # Resample if needed
82
  if sr != target_sr:
83
  resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_sr)
84
  wav = resampler(wav)
85
  print(f"🔄 Resampled from {sr}Hz to {target_sr}Hz")
86
 
87
- # Convert to numpy for processing
88
  audio_np = wav.squeeze().numpy()
89
-
90
- # 1. Remove DC offset (center around zero)
91
  audio_np = audio_np - np.mean(audio_np)
92
 
93
- # 2. Trim silence from beginning and end (aggressive trimming)
94
- audio_trimmed, trim_indices = librosa.effects.trim(
95
  audio_np,
96
- top_db=25, # More aggressive silence removal
97
  frame_length=2048,
98
  hop_length=512
99
  )
100
  print(f"✂️ Trimmed {len(audio_np) - len(audio_trimmed)} silent samples")
101
 
102
- # 3. Normalize audio amplitude to [-1, 1]
103
  audio_normalized = librosa.util.normalize(audio_trimmed)
104
 
105
- # 4. Apply pre-emphasis filter (boost high frequencies)
106
  pre_emphasis = 0.97
107
  audio_emphasized = np.append(
108
  audio_normalized[0],
109
  audio_normalized[1:] - pre_emphasis * audio_normalized[:-1]
110
  )
111
 
112
- # 5. Advanced noise reduction
113
  audio_denoised = spectral_noise_gate(audio_emphasized, target_sr)
114
-
115
- # 6. Dynamic range compression (reduce volume spikes)
116
  audio_compressed = dynamic_range_compression(audio_denoised)
117
-
118
- # 7. Final normalization
119
  audio_final = librosa.util.normalize(audio_compressed)
120
 
121
- # Convert back to torch tensor
122
  audio_tensor = torch.from_numpy(audio_final).float().unsqueeze(0)
123
 
124
  print(f"✅ Preprocessing complete: {len(audio_final)/target_sr:.2f}s of audio")
@@ -130,9 +210,7 @@ def advanced_preprocess_audio(audio_path, target_sr=16000):
130
  return basic_preprocess_audio(audio_path, target_sr)
131
 
132
  def basic_preprocess_audio(audio_path, target_sr=16000):
133
- """
134
- Fallback basic preprocessing if advanced fails
135
- """
136
  try:
137
  wav, sr = torchaudio.load(audio_path)
138
 
@@ -151,26 +229,17 @@ def basic_preprocess_audio(audio_path, target_sr=16000):
151
  raise
152
 
153
  def spectral_noise_gate(audio, sr, noise_floor_percentile=10, reduction_factor=0.6):
154
- """
155
- Advanced spectral noise gating using STFT
156
- """
157
  try:
158
- # Compute Short-Time Fourier Transform
159
  stft = librosa.stft(audio, n_fft=2048, hop_length=512)
160
  magnitude = np.abs(stft)
161
  phase = np.angle(stft)
162
 
163
- # Estimate noise floor from quietest frames
164
  noise_profile = np.percentile(magnitude, noise_floor_percentile, axis=1, keepdims=True)
165
-
166
- # Create noise gate mask (soft gating)
167
  snr = magnitude / (noise_profile + 1e-10)
168
  gate = np.minimum(1.0, np.maximum(0.0, (snr - 1.0) / 2.0))
169
-
170
- # Apply gate with reduction
171
  magnitude_gated = magnitude * (gate + (1 - gate) * (1 - reduction_factor))
172
 
173
- # Reconstruct signal
174
  stft_clean = magnitude_gated * np.exp(1j * phase)
175
  audio_clean = librosa.istft(stft_clean, hop_length=512)
176
 
@@ -180,15 +249,11 @@ def spectral_noise_gate(audio, sr, noise_floor_percentile=10, reduction_factor=0
180
  return audio
181
 
182
  def dynamic_range_compression(audio, threshold=0.5, ratio=3.0):
183
- """
184
- Simple dynamic range compression to reduce volume spikes
185
- """
186
  try:
187
- # Find samples above threshold
188
  abs_audio = np.abs(audio)
189
  above_threshold = abs_audio > threshold
190
 
191
- # Apply compression to loud parts
192
  compressed = audio.copy()
193
  compressed[above_threshold] = np.sign(audio[above_threshold]) * (
194
  threshold + (abs_audio[above_threshold] - threshold) / ratio
@@ -200,21 +265,18 @@ def dynamic_range_compression(audio, threshold=0.5, ratio=3.0):
200
  return audio
201
 
202
  # ============================================
203
- # 3. AUDIO FEATURE EXTRACTION (PROSODY)
204
  # ============================================
205
 
206
  def extract_prosodic_features(audio, sr):
207
- """
208
- Extract prosodic features that indicate emotional state
209
- """
210
  try:
211
  features = {}
212
 
213
- # 1. Pitch variation (f0) with improved tracking
214
  pitches, magnitudes = librosa.piptrack(
215
  y=audio,
216
  sr=sr,
217
- fmin=80, # Typical human speech range
218
  fmax=400
219
  )
220
  pitch_values = []
@@ -231,20 +293,16 @@ def extract_prosodic_features(audio, sr):
231
  else:
232
  features['pitch_mean'] = features['pitch_std'] = features['pitch_range'] = 0
233
 
234
- # 2. Energy/Intensity
235
  rms = librosa.feature.rms(y=audio)[0]
236
  features['energy_mean'] = np.mean(rms)
237
  features['energy_std'] = np.std(rms)
238
 
239
- # 3. Speech rate (zero crossing rate)
240
  zcr = librosa.feature.zero_crossing_rate(audio)[0]
241
  features['speech_rate'] = np.mean(zcr)
242
 
243
- # 4. Spectral features
244
  spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
245
  features['spectral_centroid_mean'] = np.mean(spectral_centroid)
246
 
247
- # 5. Spectral rolloff (brightness)
248
  spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)[0]
249
  features['spectral_rolloff_mean'] = np.mean(spectral_rolloff)
250
 
@@ -259,18 +317,12 @@ def extract_prosodic_features(audio, sr):
259
  }
260
 
261
  # ============================================
262
- # 4. LANGUAGE DETECTION & VALIDATION
263
  # ============================================
264
 
265
  def validate_hindi_text(text):
266
- """
267
- Validate if text contains Hindi/Devanagari characters
268
- Supports Hinglish (Hindi + English)
269
- """
270
- # Devanagari Unicode range
271
  hindi_pattern = re.compile(r'[\u0900-\u097F]')
272
-
273
- # Count Hindi characters
274
  hindi_chars = len(hindi_pattern.findall(text))
275
  total_chars = len(re.findall(r'\S', text))
276
 
@@ -279,20 +331,13 @@ def validate_hindi_text(text):
279
 
280
  hindi_ratio = hindi_chars / total_chars
281
 
282
- # Allow Hinglish (at least 15% Hindi characters - more lenient)
283
  if hindi_ratio < 0.15:
284
  return False, f"Insufficient Hindi content ({hindi_ratio*100:.1f}% Hindi)", hindi_ratio
285
 
286
  return True, "Valid Hindi/Hinglish", hindi_ratio
287
 
288
- # ============================================
289
- # 5. ENHANCED SENTIMENT ANALYSIS
290
- # ============================================
291
-
292
  def detect_negation(text):
293
- """
294
- Detect negation words that might flip sentiment
295
- """
296
  negation_words = [
297
  'नहीं', 'न', 'मत', 'नही', 'ना',
298
  'not', 'no', 'never', 'neither', 'nor',
@@ -306,15 +351,13 @@ def detect_negation(text):
306
  return False
307
 
308
  def detect_crisis_keywords(text):
309
- """
310
- Detect crisis/emergency keywords that indicate strong negative emotion
311
- """
312
  crisis_keywords = [
313
- 'बचाओ', 'मदद', 'help', 'save', # Distress calls
314
- 'मार', 'पीट', 'हिंसा', 'beat', 'hit', 'violence', # Violence
315
- 'डर', 'खतरा', 'fear', 'danger', # Fear/danger
316
- 'मर', 'मौत', 'death', 'die', # Death
317
- 'छोड़', 'leave me', 'stop' # Desperate pleas
318
  ]
319
 
320
  text_lower = text.lower()
@@ -324,15 +367,10 @@ def detect_crisis_keywords(text):
324
  return False
325
 
326
  def detect_mixed_emotions(text, prosodic_features):
327
- """
328
- Advanced mixed emotion detection using text and audio features
329
- CRITICAL: Don't mark crisis/distress as mixed emotions
330
- """
331
  text_lower = text.lower()
332
 
333
- # FIRST: Check if this is a crisis situation (never mixed)
334
  if detect_crisis_keywords(text):
335
- print("⚠️ Crisis keywords detected - NOT treating as mixed emotion")
336
  return False
337
 
338
  mixed_indicators = [
@@ -343,32 +381,26 @@ def detect_mixed_emotions(text, prosodic_features):
343
  'शायद', 'maybe', 'perhaps'
344
  ]
345
 
346
- positive_words = ['खुश', 'प्यार', 'अच्छा', 'बढ़िया', 'मज़ा', 'happy', 'love', 'good', 'nice', 'सुंदर', 'प्रसन्न']
347
- negative_words = ['दुख', 'रो', 'गुस्सा', 'बुरा', 'परेशान', 'sad', 'cry', 'angry', 'bad', 'upset', 'निराश', 'चिंता']
348
 
349
  has_mixed_indicators = any(ind in text_lower for ind in mixed_indicators)
350
  has_positive = any(word in text_lower for word in positive_words)
351
  has_negative = any(word in text_lower for word in negative_words)
352
 
353
- # Only prosodic if both high pitch AND high energy variation
354
- high_pitch_variation = prosodic_features['pitch_std'] > 35
355
- high_energy_variation = prosodic_features['energy_std'] > 0.08
356
-
357
- # Text must have BOTH opposing emotions to be truly mixed
358
  text_mixed = has_mixed_indicators and (has_positive and has_negative)
359
- audio_mixed = high_pitch_variation and high_energy_variation and (has_positive and has_negative)
360
 
361
- return text_mixed or audio_mixed
 
 
 
 
362
 
363
  def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
364
- """
365
- Enhanced sentiment analysis combining text and prosodic features
366
- CRITICAL: Properly handle crisis/distress situations
367
- """
368
  sentiment_scores = {}
369
 
370
  if not raw_results or not isinstance(raw_results, list) or len(raw_results) == 0:
371
- print("⚠️ Unexpected sentiment results format")
372
  return {'Negative': 0.33, 'Neutral': 0.34, 'Positive': 0.33}, 0.34, False
373
 
374
  label_mapping = {
@@ -390,48 +422,26 @@ def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
390
  if sentiment not in sentiment_scores:
391
  sentiment_scores[sentiment] = 0.0
392
 
393
- initial_confidence = max(sentiment_scores.values())
394
-
395
- # CRITICAL: Check for crisis keywords first
396
  is_crisis = detect_crisis_keywords(text)
397
  if is_crisis:
398
- print("🚨 CRISIS DETECTED - Strongly amplifying negative sentiment")
399
- # Heavily boost negative sentiment for crisis situations
400
  sentiment_scores['Negative'] = min(0.95, sentiment_scores['Negative'] * 1.8)
401
  sentiment_scores['Neutral'] = max(0.02, sentiment_scores['Neutral'] * 0.2)
402
  sentiment_scores['Positive'] = max(0.01, sentiment_scores['Positive'] * 0.1)
403
- is_mixed = False # Crisis is NEVER mixed emotion
404
  else:
405
- # Negation detection (only for non-crisis)
406
  has_negation = detect_negation(text)
407
  if has_negation:
408
- print("🔄 Negation detected - adjusting sentiment")
409
  temp = sentiment_scores['Positive']
410
  sentiment_scores['Positive'] = sentiment_scores['Negative']
411
  sentiment_scores['Negative'] = temp
412
 
413
- # Mixed emotions (only for non-crisis)
414
  is_mixed = detect_mixed_emotions(text, prosodic_features)
415
  if is_mixed:
416
- print("🔄 Mixed emotions detected - boosting neutral")
417
- neutral_boost = 0.20 # Reduced from 0.25
418
  sentiment_scores['Neutral'] = min(0.65, sentiment_scores['Neutral'] + neutral_boost)
419
  sentiment_scores['Positive'] = max(0.1, sentiment_scores['Positive'] - neutral_boost/2)
420
  sentiment_scores['Negative'] = max(0.1, sentiment_scores['Negative'] - neutral_boost/2)
421
-
422
- # Prosodic adjustments (only for non-crisis)
423
- if prosodic_features['pitch_std'] > 45 and prosodic_features['energy_mean'] > 0.12:
424
- print("🎵 Strong emotional prosody detected")
425
- if sentiment_scores['Positive'] > sentiment_scores['Negative']:
426
- sentiment_scores['Positive'] = min(0.9, sentiment_scores['Positive'] * 1.2)
427
- else:
428
- sentiment_scores['Negative'] = min(0.9, sentiment_scores['Negative'] * 1.2)
429
- sentiment_scores['Neutral'] = max(0.05, sentiment_scores['Neutral'] * 0.8)
430
- elif prosodic_features['energy_mean'] < 0.03 and prosodic_features['pitch_std'] < 15:
431
- print("🎵 Calm/neutral prosody detected")
432
- sentiment_scores['Neutral'] = min(0.8, sentiment_scores['Neutral'] * 1.2)
433
 
434
- # Normalize
435
  total = sum(sentiment_scores.values())
436
  if total > 0:
437
  sentiment_scores = {k: v/total for k, v in sentiment_scores.items()}
@@ -441,45 +451,41 @@ def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
441
  return sentiment_scores, final_confidence, is_mixed
442
 
443
  # ============================================
444
- # 6. MAIN PREDICTION FUNCTION
445
  # ============================================
446
 
447
  def predict(audio_filepath):
448
- """
449
- Main prediction function with Indic Conformer ASR
450
- """
451
  try:
452
  print(f"\n{'='*60}")
453
  print(f"🎧 Processing audio file...")
454
 
455
  if audio_filepath is None:
456
- return {"⚠️ Error": "No audio file uploaded"}
 
 
 
 
457
 
458
- # ============================================
459
- # STEP 1: Advanced Audio Preprocessing
460
- # ============================================
461
  print("🔧 Applying advanced audio preprocessing...")
462
  try:
463
  audio_tensor, sr, audio_np = advanced_preprocess_audio(audio_filepath)
464
  prosodic_features = extract_prosodic_features(audio_np, sr)
465
  except Exception as e:
466
- print(f"⚠️ Preprocessing error: {e}")
467
- return {"⚠️ Preprocessing Error": str(e)}
 
 
 
468
 
469
- # ============================================
470
- # STEP 2: ASR with Indic Conformer
471
- # ============================================
472
- print("🔄 Transcribing with Indic Conformer (CTC & RNNT)...")
473
  try:
474
- # Try RNNT first (usually more accurate)
475
  transcription_rnnt = ASR_MODEL(audio_tensor, "hi", "rnnt")
476
- print(f"📝 RNNT Transcription: '{transcription_rnnt}'")
477
 
478
- # Fallback to CTC if RNNT fails or is empty
479
  if not transcription_rnnt or len(transcription_rnnt.strip()) < 2:
480
- print("⚠️ RNNT empty, trying CTC...")
481
  transcription_ctc = ASR_MODEL(audio_tensor, "hi", "ctc")
482
- print(f"📝 CTC Transcription: '{transcription_ctc}'")
483
  transcription = transcription_ctc
484
  else:
485
  transcription = transcription_rnnt
@@ -487,27 +493,33 @@ def predict(audio_filepath):
487
  transcription = transcription.strip()
488
 
489
  except Exception as asr_error:
490
- print(f"❌ ASR Error: {asr_error}")
491
- return {"⚠️ ASR Error": str(asr_error)}
 
 
 
492
 
493
- # ============================================
494
- # STEP 3: Validate Transcription
495
- # ============================================
496
  if not transcription or len(transcription) < 2:
497
- return {"⚠️ No Speech Detected": f"Transcription: {transcription or 'Empty'}"}
 
 
 
 
 
498
 
499
  is_valid, validation_msg, hindi_ratio = validate_hindi_text(transcription)
500
- print(f"🔍 {validation_msg} ({hindi_ratio*100:.1f}% Hindi)")
501
 
502
  if not is_valid:
503
  return {
504
- "⚠️ Language Error": validation_msg,
505
- "📝 Transcription": transcription
 
 
 
506
  }
507
 
508
- # ============================================
509
- # STEP 4: Sentiment Analysis
510
- # ============================================
511
  print("💭 Analyzing sentiment...")
512
  try:
513
  raw_sentiment = SENTIMENT_PIPELINE(transcription)
@@ -518,38 +530,68 @@ def predict(audio_filepath):
518
  raw_sentiment
519
  )
520
 
521
- # ============================================
522
- # STEP 5: Format Results
523
- # ============================================
524
- result_dict = {}
525
-
526
- for sentiment, score in sorted(sentiment_scores.items(), key=lambda x: x[1], reverse=True):
527
- result_dict[sentiment] = float(score)
528
 
529
- result_dict["_Confidence"] = float(confidence)
530
- result_dict["_Mixed_Emotions"] = 1.0 if is_mixed else 0.0
531
- result_dict["_Hindi_Content_Pct"] = float(hindi_ratio * 100)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
 
533
- print(f"📝 Full Transcription: {transcription}")
534
- print(f" Complete! Confidence: {confidence:.3f}")
535
- print(f"🔀 Mixed Emotions: {'Yes' if is_mixed else 'No'}")
536
- print(f"🌐 Hindi Content: {hindi_ratio*100:.0f}%")
537
  print(f"{'='*60}\n")
538
 
539
- return result_dict
540
 
541
  except Exception as sentiment_error:
542
- print(f"❌ Sentiment Error: {sentiment_error}")
543
- return {"⚠️ Sentiment Error": str(sentiment_error)}
 
 
 
 
544
 
545
  except Exception as e:
546
- print(f"❌ Critical Error: {str(e)}")
547
  import traceback
548
  traceback.print_exc()
549
- return {"⚠️ System Error": str(e)}
 
 
 
 
550
 
551
  # ============================================
552
- # 7. GRADIO INTERFACE
553
  # ============================================
554
 
555
  demo = gr.Interface(
@@ -559,69 +601,134 @@ demo = gr.Interface(
559
  label="🎤 Record or Upload Hindi Audio",
560
  sources=["upload", "microphone"]
561
  ),
562
- outputs=gr.Label(
563
- label="🎭 Enhanced Sentiment Analysis Results",
564
- num_top_classes=10
565
- ),
566
- title="🎤 Advanced Hindi Speech Sentiment Analysis (Indic Conformer)",
567
  description="""
568
- ## 🇮🇳 Professional-grade Hindi/Hinglish Speech Emotion Analysis
569
 
570
- ### ✨ Advanced Features:
571
- - **🎙️ Indic Conformer 600M** - State-of-the-art multilingual ASR with CTC & RNNT decoding
572
- - **🧠 txlm-RoBERTa** - Hindi-optimized sentiment analysis
573
- - **🎵 Prosodic Analysis** - Voice tone, pitch, energy, spectral features
574
- - **🔄 Mixed Emotion Detection** - Handles complex feelings
575
  - **🌐 Hinglish Support** - Works with Hindi + English mix
576
- - **🎯 Confidence Scoring** - Know how reliable the prediction is
577
- - **🔧 Advanced Audio Preprocessing**:
578
- - DC offset removal
579
- - Aggressive silence trimming
580
- - Pre-emphasis filtering
581
- - Spectral noise gating
582
- - Dynamic range compression
583
- - Multi-stage normalization
584
- - **⚡ Cached Models** - Fast predictions after first load
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
585
 
586
  ### 🧪 Test Examples:
587
- - **😊 Positive**: "मैं बहुत खुश हूं आज"
588
- - **😢 Negative**: "मुझे बहुत दुख हो रहा है"
589
- - **😐 Neutral**: "मैं घर जा रहा हूं"
590
- - **🔀 Mixed**: "कभी खुश हूं कभी उदास"
591
- - **💭 Confused**: "समझ नहीं रहा क्या करूं"
592
- - **🗣️ Hinglish**: "I'm feeling बहुत अच्छा today"
593
-
594
- ### 📊 Output:
595
- - Sentiment probabilities (Positive/Negative/Neutral)
596
- - _Confidence: Prediction reliability
597
- - _Mixed_Emotions: 1.0 if mixed, 0.0 if single emotion
598
- - _Hindi_Content_Pct: % of Hindi characters
599
- - Full transcription in console logs
600
-
601
- ### 💡 Best Practices:
602
- 1. Speak clearly for 3-10 seconds
603
- 2. Reduce background noise when possible
604
- 3. Natural conversational tone works best
605
- 4. Both Hindi and Hinglish supported
606
-
607
- ### 🎯 Use Cases:
608
- - Mental health tracking
609
- - Customer feedback analysis
610
- - Call center monitoring
611
- - Personal diary analysis
612
- - Relationship counseling
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
  """,
614
- examples=None,
615
  theme=gr.themes.Soft(),
616
  flagging_mode="never",
617
- allow_flagging="never"
 
 
618
  )
619
 
620
  # ============================================
621
- # 8. LAUNCH APP
622
  # ============================================
623
 
624
  if __name__ == "__main__":
625
  print("🌐 Starting server...")
626
  demo.launch()
627
- print("🎉 Enhanced Hindi Sentiment Analysis App is ready!")
 
7
  import re
8
  import warnings
9
  import os
10
+ import json
11
  warnings.filterwarnings('ignore')
12
 
13
+ print("🚀 Starting Enhanced Hindi Speech Emotion Analysis App...")
14
 
15
  # ============================================
16
  # 1. GLOBAL MODEL LOADING (ONLY ONCE AT STARTUP)
17
  # ============================================
18
 
 
19
  SENTIMENT_PIPELINE = None
20
  ASR_MODEL = None
21
 
22
  def load_models():
23
+ """Load all models once at startup and cache them globally"""
 
 
24
  global SENTIMENT_PIPELINE, ASR_MODEL
25
 
 
26
  if SENTIMENT_PIPELINE is not None and ASR_MODEL is not None:
27
  print("✅ Models already loaded, skipping...")
28
  return
29
 
 
30
  print("📚 Loading Hindi sentiment analysis model...")
31
  try:
32
  sentiment_model_name = "LondonStory/txlm-roberta-hindi-sentiment"
 
40
  print(f"❌ Error loading sentiment model: {e}")
41
  raise
42
 
 
43
  print("🎤 Loading Indic Conformer 600M ASR model...")
44
  try:
45
  ASR_MODEL = AutoModel.from_pretrained(
 
53
 
54
  print("✅ All models loaded and cached in memory")
55
 
 
56
  load_models()
57
 
58
  # ============================================
59
+ # 2. EMOTION MAPPING
60
  # ============================================
61
 
62
+ def map_sentiment_to_emotion(sentiment_scores, text, prosodic_features, is_mixed):
63
  """
64
+ Map sentiment to specific emotions with confidence
65
  """
66
+ # Get dominant sentiment
67
+ dominant_sentiment = max(sentiment_scores, key=sentiment_scores.get)
68
+ max_score = sentiment_scores[dominant_sentiment]
69
+
70
+ # Detect crisis/distress
71
+ is_crisis = detect_crisis_keywords(text)
72
+ has_negation = detect_negation(text)
73
+
74
+ # Analyze text for specific emotions
75
+ text_lower = text.lower()
76
+
77
+ # Emotion keyword mapping
78
+ emotion_keywords = {
79
+ 'joy': ['खुश', 'प्रसन्न', 'मज़ा', 'आनंद', 'happy', 'joy', 'excited', 'wonderful', 'बढ़िया', 'शानदार'],
80
+ 'love': ['प्यार', 'love', 'दिल', 'heart', 'romantic', 'affection', 'स्नेह'],
81
+ 'anger': ['गुस्सा', 'क्रोध', 'angry', 'mad', 'furious', 'rage', 'नाराज़'],
82
+ 'fear': ['डर', 'भय', 'खतरा', 'fear', 'scared', 'afraid', 'terror', 'panic', 'चिंता'],
83
+ 'sadness': ['दुख', 'रो', 'उदास', 'sad', 'cry', 'depressed', 'lonely', 'निराश', 'अकेला'],
84
+ 'surprise': ['हैरान', 'आश्चर्य', 'surprise', 'shocked', 'amazed', 'unexpected', 'अचंभा'],
85
+ 'disgust': ['घृणा', 'नफरत', 'disgust', 'hate', 'disgusting', 'gross'],
86
+ 'anxiety': ['चिंता', 'तनाव', 'परेशान', 'worry', 'anxious', 'stress', 'nervous', 'बेचैन'],
87
+ 'confusion': ['समझ नहीं', 'उलझन', 'confus', 'don\'t know', 'पता नहीं', 'क्या करूं'],
88
+ 'calm': ['शांत', 'ठीक', 'calm', 'peace', 'okay', 'fine', 'normal', 'सामान्य']
89
+ }
90
+
91
+ # Detect specific emotions from text
92
+ detected_emotions = []
93
+ for emotion, keywords in emotion_keywords.items():
94
+ if any(keyword in text_lower for keyword in keywords):
95
+ detected_emotions.append(emotion)
96
+
97
+ # Prosodic analysis
98
+ high_energy = prosodic_features['energy_mean'] > 0.12
99
+ high_pitch_var = prosodic_features['pitch_std'] > 40
100
+ low_energy = prosodic_features['energy_mean'] < 0.03
101
+ calm_pitch = prosodic_features['pitch_std'] < 15
102
+
103
+ # Determine emotion
104
+ if is_crisis:
105
+ emotion = "fear"
106
+ secondary_emotion = "distress"
107
+ confidence = max(0.85, max_score)
108
+ elif is_mixed:
109
+ if len(detected_emotions) >= 2:
110
+ emotion = detected_emotions[0]
111
+ secondary_emotion = detected_emotions[1]
112
+ elif detected_emotions:
113
+ emotion = detected_emotions[0]
114
+ secondary_emotion = "neutral"
115
+ else:
116
+ emotion = "mixed"
117
+ secondary_emotion = None
118
+ confidence = sentiment_scores['Neutral']
119
+ elif detected_emotions:
120
+ # Use detected emotions
121
+ emotion = detected_emotions[0]
122
+ secondary_emotion = detected_emotions[1] if len(detected_emotions) > 1 else None
123
+ confidence = max_score
124
+ else:
125
+ # Map based on sentiment + prosody
126
+ secondary_emotion = None
127
+ if dominant_sentiment == 'Positive':
128
+ if high_energy and high_pitch_var:
129
+ emotion = "joy"
130
+ secondary_emotion = "excitement"
131
+ elif 'प्यार' in text_lower or 'love' in text_lower:
132
+ emotion = "love"
133
+ else:
134
+ emotion = "happiness"
135
+ confidence = max_score
136
+
137
+ elif dominant_sentiment == 'Negative':
138
+ if is_crisis or 'डर' in text_lower or 'fear' in text_lower:
139
+ emotion = "fear"
140
+ elif 'गुस्सा' in text_lower or 'angry' in text_lower:
141
+ emotion = "anger"
142
+ elif 'दुख' in text_lower or 'sad' in text_lower or 'रो' in text_lower:
143
+ emotion = "sadness"
144
+ elif 'चिंता' in text_lower or 'worry' in text_lower:
145
+ emotion = "anxiety"
146
+ else:
147
+ emotion = "sadness"
148
+ confidence = max_score
149
+
150
+ else: # Neutral
151
+ if calm_pitch and low_energy:
152
+ emotion = "calm"
153
+ elif 'समझ नहीं' in text_lower or 'confus' in text_lower:
154
+ emotion = "confusion"
155
+ else:
156
+ emotion = "neutral"
157
+ confidence = max_score
158
+
159
+ return emotion, secondary_emotion, confidence
160
+
161
+ # ============================================
162
+ # 3. AUDIO PREPROCESSING FUNCTIONS
163
+ # ============================================
164
+
165
+ def advanced_preprocess_audio(audio_path, target_sr=16000):
166
+ """Advanced audio preprocessing pipeline"""
167
  try:
 
168
  wav, sr = torchaudio.load(audio_path)
169
 
 
170
  if wav.shape[0] > 1:
171
  wav = torch.mean(wav, dim=0, keepdim=True)
172
  print(f"📊 Converted stereo to mono")
173
 
 
174
  if sr != target_sr:
175
  resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_sr)
176
  wav = resampler(wav)
177
  print(f"🔄 Resampled from {sr}Hz to {target_sr}Hz")
178
 
 
179
  audio_np = wav.squeeze().numpy()
 
 
180
  audio_np = audio_np - np.mean(audio_np)
181
 
182
+ audio_trimmed, _ = librosa.effects.trim(
 
183
  audio_np,
184
+ top_db=25,
185
  frame_length=2048,
186
  hop_length=512
187
  )
188
  print(f"✂️ Trimmed {len(audio_np) - len(audio_trimmed)} silent samples")
189
 
 
190
  audio_normalized = librosa.util.normalize(audio_trimmed)
191
 
 
192
  pre_emphasis = 0.97
193
  audio_emphasized = np.append(
194
  audio_normalized[0],
195
  audio_normalized[1:] - pre_emphasis * audio_normalized[:-1]
196
  )
197
 
 
198
  audio_denoised = spectral_noise_gate(audio_emphasized, target_sr)
 
 
199
  audio_compressed = dynamic_range_compression(audio_denoised)
 
 
200
  audio_final = librosa.util.normalize(audio_compressed)
201
 
 
202
  audio_tensor = torch.from_numpy(audio_final).float().unsqueeze(0)
203
 
204
  print(f"✅ Preprocessing complete: {len(audio_final)/target_sr:.2f}s of audio")
 
210
  return basic_preprocess_audio(audio_path, target_sr)
211
 
212
  def basic_preprocess_audio(audio_path, target_sr=16000):
213
+ """Fallback basic preprocessing"""
 
 
214
  try:
215
  wav, sr = torchaudio.load(audio_path)
216
 
 
229
  raise
230
 
231
  def spectral_noise_gate(audio, sr, noise_floor_percentile=10, reduction_factor=0.6):
232
+ """Advanced spectral noise gating using STFT"""
 
 
233
  try:
 
234
  stft = librosa.stft(audio, n_fft=2048, hop_length=512)
235
  magnitude = np.abs(stft)
236
  phase = np.angle(stft)
237
 
 
238
  noise_profile = np.percentile(magnitude, noise_floor_percentile, axis=1, keepdims=True)
 
 
239
  snr = magnitude / (noise_profile + 1e-10)
240
  gate = np.minimum(1.0, np.maximum(0.0, (snr - 1.0) / 2.0))
 
 
241
  magnitude_gated = magnitude * (gate + (1 - gate) * (1 - reduction_factor))
242
 
 
243
  stft_clean = magnitude_gated * np.exp(1j * phase)
244
  audio_clean = librosa.istft(stft_clean, hop_length=512)
245
 
 
249
  return audio
250
 
251
  def dynamic_range_compression(audio, threshold=0.5, ratio=3.0):
252
+ """Simple dynamic range compression"""
 
 
253
  try:
 
254
  abs_audio = np.abs(audio)
255
  above_threshold = abs_audio > threshold
256
 
 
257
  compressed = audio.copy()
258
  compressed[above_threshold] = np.sign(audio[above_threshold]) * (
259
  threshold + (abs_audio[above_threshold] - threshold) / ratio
 
265
  return audio
266
 
267
  # ============================================
268
+ # 4. PROSODIC FEATURE EXTRACTION
269
  # ============================================
270
 
271
  def extract_prosodic_features(audio, sr):
272
+ """Extract prosodic features"""
 
 
273
  try:
274
  features = {}
275
 
 
276
  pitches, magnitudes = librosa.piptrack(
277
  y=audio,
278
  sr=sr,
279
+ fmin=80,
280
  fmax=400
281
  )
282
  pitch_values = []
 
293
  else:
294
  features['pitch_mean'] = features['pitch_std'] = features['pitch_range'] = 0
295
 
 
296
  rms = librosa.feature.rms(y=audio)[0]
297
  features['energy_mean'] = np.mean(rms)
298
  features['energy_std'] = np.std(rms)
299
 
 
300
  zcr = librosa.feature.zero_crossing_rate(audio)[0]
301
  features['speech_rate'] = np.mean(zcr)
302
 
 
303
  spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
304
  features['spectral_centroid_mean'] = np.mean(spectral_centroid)
305
 
 
306
  spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)[0]
307
  features['spectral_rolloff_mean'] = np.mean(spectral_rolloff)
308
 
 
317
  }
318
 
319
  # ============================================
320
+ # 5. TEXT ANALYSIS HELPERS
321
  # ============================================
322
 
323
  def validate_hindi_text(text):
324
+ """Validate if text contains Hindi/Devanagari characters"""
 
 
 
 
325
  hindi_pattern = re.compile(r'[\u0900-\u097F]')
 
 
326
  hindi_chars = len(hindi_pattern.findall(text))
327
  total_chars = len(re.findall(r'\S', text))
328
 
 
331
 
332
  hindi_ratio = hindi_chars / total_chars
333
 
 
334
  if hindi_ratio < 0.15:
335
  return False, f"Insufficient Hindi content ({hindi_ratio*100:.1f}% Hindi)", hindi_ratio
336
 
337
  return True, "Valid Hindi/Hinglish", hindi_ratio
338
 
 
 
 
 
339
  def detect_negation(text):
340
+ """Detect negation words"""
 
 
341
  negation_words = [
342
  'नहीं', 'न', 'मत', 'नही', 'ना',
343
  'not', 'no', 'never', 'neither', 'nor',
 
351
  return False
352
 
353
  def detect_crisis_keywords(text):
354
+ """Detect crisis/emergency keywords"""
 
 
355
  crisis_keywords = [
356
+ 'बचाओ', 'मदद', 'help', 'save',
357
+ 'मार', 'पीट', 'हिंसा', 'beat', 'hit', 'violence',
358
+ 'डर', 'खतरा', 'fear', 'danger',
359
+ 'मर', 'मौत', 'death', 'die',
360
+ 'छोड़', 'leave me', 'stop'
361
  ]
362
 
363
  text_lower = text.lower()
 
367
  return False
368
 
369
  def detect_mixed_emotions(text, prosodic_features):
370
+ """Detect mixed emotions"""
 
 
 
371
  text_lower = text.lower()
372
 
 
373
  if detect_crisis_keywords(text):
 
374
  return False
375
 
376
  mixed_indicators = [
 
381
  'शायद', 'maybe', 'perhaps'
382
  ]
383
 
384
+ positive_words = ['खुश', 'प्यार', 'अच्छा', 'बढ़िया', 'मज़ा', 'happy', 'love', 'good', 'nice']
385
+ negative_words = ['दुख', 'रो', 'गुस्सा', 'बुरा', 'परेशान', 'sad', 'cry', 'angry', 'bad', 'upset']
386
 
387
  has_mixed_indicators = any(ind in text_lower for ind in mixed_indicators)
388
  has_positive = any(word in text_lower for word in positive_words)
389
  has_negative = any(word in text_lower for word in negative_words)
390
 
 
 
 
 
 
391
  text_mixed = has_mixed_indicators and (has_positive and has_negative)
 
392
 
393
+ return text_mixed
394
+
395
+ # ============================================
396
+ # 6. ENHANCED SENTIMENT ANALYSIS
397
+ # ============================================
398
 
399
  def enhanced_sentiment_analysis(text, prosodic_features, raw_results):
400
+ """Enhanced sentiment analysis"""
 
 
 
401
  sentiment_scores = {}
402
 
403
  if not raw_results or not isinstance(raw_results, list) or len(raw_results) == 0:
 
404
  return {'Negative': 0.33, 'Neutral': 0.34, 'Positive': 0.33}, 0.34, False
405
 
406
  label_mapping = {
 
422
  if sentiment not in sentiment_scores:
423
  sentiment_scores[sentiment] = 0.0
424
 
 
 
 
425
  is_crisis = detect_crisis_keywords(text)
426
  if is_crisis:
 
 
427
  sentiment_scores['Negative'] = min(0.95, sentiment_scores['Negative'] * 1.8)
428
  sentiment_scores['Neutral'] = max(0.02, sentiment_scores['Neutral'] * 0.2)
429
  sentiment_scores['Positive'] = max(0.01, sentiment_scores['Positive'] * 0.1)
430
+ is_mixed = False
431
  else:
 
432
  has_negation = detect_negation(text)
433
  if has_negation:
 
434
  temp = sentiment_scores['Positive']
435
  sentiment_scores['Positive'] = sentiment_scores['Negative']
436
  sentiment_scores['Negative'] = temp
437
 
 
438
  is_mixed = detect_mixed_emotions(text, prosodic_features)
439
  if is_mixed:
440
+ neutral_boost = 0.20
 
441
  sentiment_scores['Neutral'] = min(0.65, sentiment_scores['Neutral'] + neutral_boost)
442
  sentiment_scores['Positive'] = max(0.1, sentiment_scores['Positive'] - neutral_boost/2)
443
  sentiment_scores['Negative'] = max(0.1, sentiment_scores['Negative'] - neutral_boost/2)
 
 
 
 
 
 
 
 
 
 
 
 
444
 
 
445
  total = sum(sentiment_scores.values())
446
  if total > 0:
447
  sentiment_scores = {k: v/total for k, v in sentiment_scores.items()}
 
451
  return sentiment_scores, final_confidence, is_mixed
452
 
453
  # ============================================
454
+ # 7. MAIN PREDICTION FUNCTION
455
  # ============================================
456
 
457
  def predict(audio_filepath):
458
+ """Main prediction function - Returns JSON-parseable dict"""
 
 
459
  try:
460
  print(f"\n{'='*60}")
461
  print(f"🎧 Processing audio file...")
462
 
463
  if audio_filepath is None:
464
+ return {
465
+ "status": "error",
466
+ "error_type": "no_audio",
467
+ "message": "No audio file uploaded"
468
+ }
469
 
470
+ # Preprocessing
 
 
471
  print("🔧 Applying advanced audio preprocessing...")
472
  try:
473
  audio_tensor, sr, audio_np = advanced_preprocess_audio(audio_filepath)
474
  prosodic_features = extract_prosodic_features(audio_np, sr)
475
  except Exception as e:
476
+ return {
477
+ "status": "error",
478
+ "error_type": "preprocessing_error",
479
+ "message": str(e)
480
+ }
481
 
482
+ # ASR Transcription
483
+ print("🔄 Transcribing with Indic Conformer...")
 
 
484
  try:
 
485
  transcription_rnnt = ASR_MODEL(audio_tensor, "hi", "rnnt")
 
486
 
 
487
  if not transcription_rnnt or len(transcription_rnnt.strip()) < 2:
 
488
  transcription_ctc = ASR_MODEL(audio_tensor, "hi", "ctc")
 
489
  transcription = transcription_ctc
490
  else:
491
  transcription = transcription_rnnt
 
493
  transcription = transcription.strip()
494
 
495
  except Exception as asr_error:
496
+ return {
497
+ "status": "error",
498
+ "error_type": "asr_error",
499
+ "message": str(asr_error)
500
+ }
501
 
502
+ # Validation
 
 
503
  if not transcription or len(transcription) < 2:
504
+ return {
505
+ "status": "error",
506
+ "error_type": "no_speech",
507
+ "message": "No speech detected in the audio",
508
+ "transcription": transcription or ""
509
+ }
510
 
511
  is_valid, validation_msg, hindi_ratio = validate_hindi_text(transcription)
 
512
 
513
  if not is_valid:
514
  return {
515
+ "status": "error",
516
+ "error_type": "language_error",
517
+ "message": validation_msg,
518
+ "transcription": transcription,
519
+ "hindi_content_percentage": round(hindi_ratio * 100, 2)
520
  }
521
 
522
+ # Sentiment Analysis
 
 
523
  print("💭 Analyzing sentiment...")
524
  try:
525
  raw_sentiment = SENTIMENT_PIPELINE(transcription)
 
530
  raw_sentiment
531
  )
532
 
533
+ # Map to emotion
534
+ emotion, secondary_emotion, emotion_confidence = map_sentiment_to_emotion(
535
+ sentiment_scores,
536
+ transcription,
537
+ prosodic_features,
538
+ is_mixed
539
+ )
540
 
541
+ # Build structured output
542
+ result = {
543
+ "status": "success",
544
+ "transcription": transcription,
545
+ "emotion": {
546
+ "primary": emotion,
547
+ "secondary": secondary_emotion,
548
+ "confidence": round(emotion_confidence, 4)
549
+ },
550
+ "sentiment_scores": {
551
+ "positive": round(sentiment_scores['Positive'], 4),
552
+ "neutral": round(sentiment_scores['Neutral'], 4),
553
+ "negative": round(sentiment_scores['Negative'], 4)
554
+ },
555
+ "analysis": {
556
+ "mixed_emotions": is_mixed,
557
+ "hindi_content_percentage": round(hindi_ratio * 100, 2),
558
+ "is_crisis": detect_crisis_keywords(transcription),
559
+ "has_negation": detect_negation(transcription)
560
+ },
561
+ "prosodic_features": {
562
+ "pitch_mean": round(prosodic_features['pitch_mean'], 2),
563
+ "pitch_std": round(prosodic_features['pitch_std'], 2),
564
+ "energy_mean": round(prosodic_features['energy_mean'], 4),
565
+ "energy_std": round(prosodic_features['energy_std'], 4),
566
+ "speech_rate": round(prosodic_features['speech_rate'], 4)
567
+ }
568
+ }
569
 
570
+ print(f" Detected Emotion: {emotion}")
571
+ print(f"📝 Transcription: {transcription}")
 
 
572
  print(f"{'='*60}\n")
573
 
574
+ return result
575
 
576
  except Exception as sentiment_error:
577
+ return {
578
+ "status": "error",
579
+ "error_type": "sentiment_error",
580
+ "message": str(sentiment_error),
581
+ "transcription": transcription
582
+ }
583
 
584
  except Exception as e:
 
585
  import traceback
586
  traceback.print_exc()
587
+ return {
588
+ "status": "error",
589
+ "error_type": "system_error",
590
+ "message": str(e)
591
+ }
592
 
593
  # ============================================
594
+ # 8. GRADIO INTERFACE
595
  # ============================================
596
 
597
  demo = gr.Interface(
 
601
  label="🎤 Record or Upload Hindi Audio",
602
  sources=["upload", "microphone"]
603
  ),
604
+ outputs=gr.JSON(label="📊 Emotion Analysis Results (API-Ready JSON)"),
605
+ title="🎭 Hindi Speech Emotion Analysis API",
 
 
 
606
  description="""
607
+ ## 🇮🇳 Advanced Hindi/Hinglish Speech Emotion Detection
608
 
609
+ ### ✨ Features:
610
+ - **🎙️ Indic Conformer 600M** - State-of-the-art multilingual ASR
611
+ - **🧠 Emotion Detection** - Joy, Sadness, Anger, Fear, Love, Calm, etc.
612
+ - **🎵 Voice Analysis** - Analyzes tone, pitch, energy, and spectral features
 
613
  - **🌐 Hinglish Support** - Works with Hindi + English mix
614
+ - **📝 JSON Output** - Easy to parse for API integration
615
+
616
+ ### 📊 JSON Output Format:
617
+ ```json
618
+ {
619
+ "status": "success",
620
+ "transcription": "मैं बहुत खुश हूं",
621
+ "emotion": {
622
+ "primary": "joy",
623
+ "secondary": null,
624
+ "confidence": 0.8745
625
+ },
626
+ "sentiment_scores": {
627
+ "positive": 0.8745,
628
+ "neutral": 0.0923,
629
+ "negative": 0.0332
630
+ },
631
+ "analysis": {
632
+ "mixed_emotions": false,
633
+ "hindi_content_percentage": 100.0,
634
+ "is_crisis": false,
635
+ "has_negation": false
636
+ },
637
+ "prosodic_features": {
638
+ "pitch_mean": 180.45,
639
+ "pitch_std": 35.12,
640
+ "energy_mean": 0.0876,
641
+ "energy_std": 0.0234,
642
+ "speech_rate": 0.1234
643
+ }
644
+ }
645
+ ```
646
+
647
+ ### 🎯 Supported Emotions:
648
+ - **Positive**: joy, happiness, love, excitement, calm
649
+ - **Negative**: sadness, anger, fear, anxiety, disgust
650
+ - **Neutral**: neutral, confusion, mixed
651
 
652
  ### 🧪 Test Examples:
653
+ - **😊 Joy**: "मैं बहुत खुश हूं आज"
654
+ - **😢 Sadness**: "मुझे बहुत दुख हो रहा है"
655
+ - **😠 Anger**: "मुझे बहुत गुस्सा रहा है"
656
+ - **😨 Fear**: "मुझे डर लग रहा है"
657
+ - **😐 Calm**: "सब ठीक है, मैं शांत हूं"
658
+ - **❤️ Love**: "मुझे तुमसे बहुत प्यार है"
659
+
660
+ ### 💡 API Usage:
661
+ 1. Send audio file to the endpoint
662
+ 2. Receive structured JSON response
663
+ 3. Parse `emotion.primary` for the main emotion
664
+ 4. Use `transcription` for text analysis
665
+ 5. Check `analysis.mixed_emotions` for complex states
666
+
667
+ ### 🔗 Integration Examples:
668
+
669
+ **Python API Client:**
670
+ ```python
671
+ import requests
672
+
673
+ # Send audio file
674
+ with open("audio.wav", "rb") as f:
675
+ response = requests.post(
676
+ "YOUR_API_URL/predict",
677
+ files={"audio": f}
678
+ )
679
+
680
+ result = response.json()
681
+
682
+ if result["status"] == "success":
683
+ print(f"Emotion: {result['emotion']['primary']}")
684
+ print(f"Text: {result['transcription']}")
685
+ print(f"Confidence: {result['emotion']['confidence']}")
686
+ ```
687
+
688
+ **Database Storage:**
689
+ ```python
690
+ # Store in MongoDB
691
+ db.emotions.insert_one({
692
+ "user_id": user_id,
693
+ "timestamp": datetime.now(),
694
+ "emotion": result["emotion"]["primary"],
695
+ "transcription": result["transcription"],
696
+ "confidence": result["emotion"]["confidence"],
697
+ "sentiment_positive": result["sentiment_scores"]["positive"],
698
+ "is_crisis": result["analysis"]["is_crisis"]
699
+ })
700
+ ```
701
+
702
+ **React/JavaScript:**
703
+ ```javascript
704
+ const formData = new FormData();
705
+ formData.append('audio', audioBlob);
706
+
707
+ fetch('YOUR_API_URL/predict', {
708
+ method: 'POST',
709
+ body: formData
710
+ })
711
+ .then(res => res.json())
712
+ .then(data => {
713
+ if (data.status === 'success') {
714
+ console.log('Emotion:', data.emotion.primary);
715
+ console.log('Text:', data.transcription);
716
+ }
717
+ });
718
+ ```
719
  """,
 
720
  theme=gr.themes.Soft(),
721
  flagging_mode="never",
722
+ examples=[
723
+ ["examples/happy.wav"] if os.path.exists("examples/happy.wav") else None,
724
+ ] if os.path.exists("examples") else None
725
  )
726
 
727
  # ============================================
728
+ # 9. LAUNCH APP
729
  # ============================================
730
 
731
  if __name__ == "__main__":
732
  print("🌐 Starting server...")
733
  demo.launch()
734
+ print("🎉 Hindi Emotion Analysis API is ready!")