Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import torchaudio | |
| from transformers import pipeline, AutoModel, AutoConfig | |
| import librosa | |
| import numpy as np | |
| import re | |
| import warnings | |
| import os | |
| import logging | |
| import hashlib | |
| import json | |
| import time | |
| from datetime import datetime | |
| from typing import Dict, Any, Optional, Tuple | |
| from functools import lru_cache | |
| from enum import Enum | |
| from huggingface_hub import login, InferenceClient | |
| # Pre-load onnxruntime to handle stack execution issues | |
| try: | |
| import onnxruntime as ort | |
| logger_temp = logging.getLogger("onnxruntime_check") | |
| logger_temp.info(f"✅ onnxruntime loaded successfully: {ort.__version__}") | |
| except Exception as e: | |
| logger_temp = logging.getLogger("onnxruntime_check") | |
| logger_temp.warning(f"⚠️ onnxruntime import issue: {e}") | |
| # ============================================ | |
| # ENVIRONMENT & LOGGING SETUP | |
| # ============================================ | |
| HUGGINGFACE_TOKEN = os.environ.get("HF_TOKEN") | |
| if HUGGINGFACE_TOKEN: | |
| login(token=HUGGINGFACE_TOKEN) | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s [%(levelname)s] %(name)s: %(message)s', | |
| handlers=[logging.StreamHandler()] | |
| ) | |
| logger = logging.getLogger("hindi_emotion_system") | |
| warnings.filterwarnings('ignore') | |
| # Configuration | |
| MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct" | |
| MAX_PROMPT_LENGTH = 2000 | |
| RECOMMENDATION_TIMEOUT = 60 | |
| MAX_RETRIES = 2 | |
| ENABLE_CACHING = True | |
| CACHE_TTL_SECONDS = 3600 | |
| logger.info("🚀 Starting Enhanced Hindi Speech Emotion & Recommendation System...") | |
| # ============================================ | |
| # MODEL INITIALIZATION | |
| # ============================================ | |
| SENTIMENT_PIPELINE = None | |
| EMOTION_PIPELINE = None | |
| ASR_MODEL = None | |
| LLM_CLIENT = None | |
| recommendation_cache = {} | |
| def load_models(): | |
| """Load all models once at startup""" | |
| global SENTIMENT_PIPELINE, EMOTION_PIPELINE, ASR_MODEL, LLM_CLIENT | |
| if SENTIMENT_PIPELINE and ASR_MODEL and EMOTION_PIPELINE and LLM_CLIENT: | |
| logger.info("✅ Models already loaded") | |
| return | |
| # Sentiment Model | |
| logger.info("📚 Loading Hindi sentiment model...") | |
| try: | |
| SENTIMENT_PIPELINE = pipeline( | |
| "text-classification", | |
| model="LondonStory/txlm-roberta-hindi-sentiment", | |
| top_k=None | |
| ) | |
| logger.info("✅ Sentiment model loaded") | |
| except Exception as e: | |
| logger.error(f"❌ Sentiment model error: {e}") | |
| raise | |
| # Emotion Model | |
| logger.info("🎭 Loading Zero-Shot emotion model...") | |
| try: | |
| EMOTION_PIPELINE = pipeline( | |
| "zero-shot-classification", | |
| model="joeddav/xlm-roberta-large-xnli" | |
| ) | |
| logger.info("✅ Emotion model loaded") | |
| except Exception as e: | |
| logger.error(f"❌ Emotion model error: {e}") | |
| raise | |
| # ASR Model | |
| logger.info("🎤 Loading Indic Conformer ASR...") | |
| try: | |
| ASR_MODEL = AutoModel.from_pretrained( | |
| "ai4bharat/indic-conformer-600m-multilingual", | |
| trust_remote_code=True | |
| ) | |
| logger.info("✅ ASR model loaded") | |
| except Exception as e: | |
| logger.error(f"❌ ASR model error: {e}") | |
| raise | |
| # LLM Client - Using Novita AI provider for free Llama 3.1 access | |
| logger.info("🤖 Initializing Llama 3.1 client via Novita AI...") | |
| try: | |
| if HUGGINGFACE_TOKEN: | |
| LLM_CLIENT = InferenceClient( | |
| provider="novita", | |
| api_key=HUGGINGFACE_TOKEN | |
| ) | |
| logger.info("✅ LLM client initialized with Novita AI provider") | |
| else: | |
| logger.warning("⚠️ HF_TOKEN not set - recommendations will use fallback") | |
| except Exception as e: | |
| logger.error(f"❌ LLM client error: {e}") | |
| logger.info("✅ All models loaded successfully") | |
| load_models() | |
| # ============================================ | |
| # EMOTION LABELS | |
| # ============================================ | |
| EMOTION_LABELS = [ | |
| "joy", "happiness", "sadness", "anger", "fear", | |
| "distress", "panic", "love", "surprise", "calm", | |
| "neutral", "excitement", "frustration" | |
| ] | |
| # ============================================ | |
| # AUDIO PREPROCESSING | |
| # ============================================ | |
| CACHED_RESAMPLERS = {} | |
| def get_resampler(orig_freq, new_freq): | |
| key = (orig_freq, new_freq) | |
| if key not in CACHED_RESAMPLERS: | |
| CACHED_RESAMPLERS[key] = torchaudio.transforms.Resample( | |
| orig_freq=orig_freq, | |
| new_freq=new_freq | |
| ) | |
| return CACHED_RESAMPLERS[key] | |
| def spectral_noise_gate(audio, sr, noise_floor_percentile=10, reduction_factor=0.6): | |
| try: | |
| stft = librosa.stft(audio, n_fft=2048, hop_length=512) | |
| magnitude = np.abs(stft) | |
| phase = np.angle(stft) | |
| noise_profile = np.percentile(magnitude, noise_floor_percentile, axis=1, keepdims=True) | |
| snr = magnitude / (noise_profile + 1e-10) | |
| gate = np.minimum(1.0, np.maximum(0.0, (snr - 1.0) / 2.0)) | |
| magnitude_gated = magnitude * (gate + (1 - gate) * (1 - reduction_factor)) | |
| stft_clean = magnitude_gated * np.exp(1j * phase) | |
| return librosa.istft(stft_clean, hop_length=512) | |
| except: | |
| return audio | |
| def dynamic_range_compression(audio, threshold=0.5, ratio=3.0): | |
| try: | |
| abs_audio = np.abs(audio) | |
| above_threshold = abs_audio > threshold | |
| compressed = audio.copy() | |
| compressed[above_threshold] = np.sign(audio[above_threshold]) * ( | |
| threshold + (abs_audio[above_threshold] - threshold) / ratio | |
| ) | |
| return compressed | |
| except: | |
| return audio | |
| def advanced_preprocess_audio(audio_path, target_sr=16000): | |
| try: | |
| wav, sr = torchaudio.load(audio_path) | |
| if wav.shape[0] > 1: | |
| wav = torch.mean(wav, dim=0, keepdim=True) | |
| if sr != target_sr: | |
| resampler = get_resampler(sr, target_sr) | |
| wav = resampler(wav) | |
| audio_np = wav.squeeze().numpy() | |
| audio_np = audio_np - np.mean(audio_np) | |
| audio_trimmed, _ = librosa.effects.trim(audio_np, top_db=25) | |
| audio_normalized = librosa.util.normalize(audio_trimmed) | |
| pre_emphasis = 0.97 | |
| audio_emphasized = np.append( | |
| audio_normalized[0], | |
| audio_normalized[1:] - pre_emphasis * audio_normalized[:-1] | |
| ) | |
| audio_denoised = spectral_noise_gate(audio_emphasized, target_sr) | |
| audio_compressed = dynamic_range_compression(audio_denoised) | |
| audio_final = librosa.util.normalize(audio_compressed) | |
| audio_tensor = torch.from_numpy(audio_final).float().unsqueeze(0) | |
| return audio_tensor, target_sr, audio_final | |
| except Exception as e: | |
| logger.warning(f"Advanced preprocessing failed: {e}") | |
| wav, sr = torchaudio.load(audio_path) | |
| if wav.shape[0] > 1: | |
| wav = torch.mean(wav, dim=0, keepdim=True) | |
| if sr != target_sr: | |
| wav = get_resampler(sr, target_sr)(wav) | |
| return wav, target_sr, wav.squeeze().numpy() | |
| def extract_prosodic_features(audio, sr): | |
| try: | |
| features = {} | |
| f0, voiced_flag, voiced_probs = librosa.pyin( | |
| audio, fmin=80, fmax=400, sr=sr, frame_length=2048 | |
| ) | |
| pitch_values = f0[~np.isnan(f0)] | |
| if len(pitch_values) > 0: | |
| features['pitch_mean'] = np.mean(pitch_values) | |
| features['pitch_std'] = np.std(pitch_values) | |
| features['pitch_range'] = np.max(pitch_values) - np.min(pitch_values) | |
| else: | |
| features['pitch_mean'] = features['pitch_std'] = features['pitch_range'] = 0 | |
| hop_length = 512 | |
| frame_length = 2048 | |
| rms = librosa.feature.rms(y=audio, frame_length=frame_length, hop_length=hop_length)[0] | |
| features['energy_mean'] = np.mean(rms) | |
| features['energy_std'] = np.std(rms) | |
| zcr = librosa.feature.zero_crossing_rate(audio, frame_length=frame_length, hop_length=hop_length)[0] | |
| features['speech_rate'] = np.mean(zcr) | |
| S = np.abs(librosa.stft(audio, n_fft=frame_length, hop_length=hop_length)) | |
| spectral_centroid = librosa.feature.spectral_centroid(S=S, sr=sr)[0] | |
| features['spectral_centroid_mean'] = np.mean(spectral_centroid) | |
| spectral_rolloff = librosa.feature.spectral_rolloff(S=S, sr=sr)[0] | |
| features['spectral_rolloff_mean'] = np.mean(spectral_rolloff) | |
| return features | |
| except Exception as e: | |
| logger.warning(f"Feature extraction error: {e}") | |
| return { | |
| 'pitch_mean': 0, 'pitch_std': 0, 'pitch_range': 0, | |
| 'energy_mean': 0, 'energy_std': 0, 'speech_rate': 0, | |
| 'spectral_centroid_mean': 0, 'spectral_rolloff_mean': 0 | |
| } | |
| # ============================================ | |
| # TEXT ANALYSIS | |
| # ============================================ | |
| def validate_hindi_text(text): | |
| hindi_pattern = re.compile(r'[\u0900-\u097F]') | |
| hindi_chars = len(hindi_pattern.findall(text)) | |
| total_chars = len(re.findall(r'\S', text)) | |
| if total_chars == 0: | |
| return False, "Empty transcription", 0 | |
| hindi_ratio = hindi_chars / total_chars | |
| if hindi_ratio < 0.15: | |
| return False, f"Insufficient Hindi content ({hindi_ratio*100:.1f}%)", hindi_ratio | |
| return True, "Valid Hindi/Hinglish", hindi_ratio | |
| def detect_negation(text): | |
| negation_words = [ | |
| 'नहीं', 'न', 'मत', 'नही', 'ना', | |
| 'not', 'no', 'never', 'neither', 'nor', | |
| 'कभी नहीं', 'बिल्कुल नहीं' | |
| ] | |
| text_lower = text.lower() | |
| return any(neg_word in text_lower for neg_word in negation_words) | |
| def detect_crisis_keywords(text): | |
| crisis_keywords = [ | |
| 'बचाओ', 'मदद', 'help', 'save', 'rescue', | |
| 'मार', 'मारो', 'पीट', 'हिंसा', 'beat', 'violence', | |
| 'हमला', 'attack', 'assault', 'चाकू', 'बंदूक', | |
| 'डर', 'भय', 'fear', 'scared', 'खतरा', 'danger', | |
| 'मर', 'मरना', 'मौत', 'death', 'die', 'kill', | |
| 'खून', 'blood', 'जान', 'life', 'छोड़ो', 'stop', | |
| 'आत्महत्या', 'suicide', 'दर्द', 'pain', 'सांस', 'breath', | |
| 'दौरा', 'seizure', 'बेहोश', 'unconscious', | |
| 'एम्बुलेंस', 'ambulance', 'अस्पताल', 'hospital', | |
| 'बलात्कार', 'rape', 'छेड़', 'molest', 'harassment', | |
| 'दुर्घटना', 'accident', 'आग', 'fire', 'घबरा', 'panic' | |
| ] | |
| text_lower = text.lower() | |
| return any(keyword in text_lower for keyword in crisis_keywords) | |
| def detect_mental_health_distress(text): | |
| keywords = [ | |
| 'अवसाद', 'डिप्रेशन', 'depression', 'उदास', 'निराश', | |
| 'घबराहट', 'anxiety', 'चिंता', 'अकेला', 'lonely', | |
| 'हार', 'give up', 'थक', 'tired', 'exhausted' | |
| ] | |
| text_lower = text.lower() | |
| return sum(1 for kw in keywords if kw in text_lower) >= 2 | |
| def detect_grief_loss(text): | |
| keywords = [ | |
| 'चल बसा', 'गुज़र', 'खो दिया', 'died', 'passed away', | |
| 'अंतिम संस्कार', 'funeral', 'याद', 'miss', 'गम', 'grief' | |
| ] | |
| text_lower = text.lower() | |
| return any(kw in text_lower for kw in keywords) | |
| def detect_relationship_distress(text): | |
| keywords = [ | |
| 'तलाक', 'divorce', 'breakup', 'धोखा', 'cheat', | |
| 'लड़ाई', 'fight', 'झगड़ा', 'argument', 'छोड़ दिया' | |
| ] | |
| text_lower = text.lower() | |
| return any(kw in text_lower for kw in keywords) | |
| def detect_mixed_emotions(text, prosodic_features): | |
| if detect_crisis_keywords(text): | |
| return False | |
| text_lower = text.lower() | |
| mixed_indicators = ['कभी', 'लेकिन', 'पर', 'but', 'या', 'or', 'शायद', 'maybe'] | |
| positive_words = ['खुश', 'प्यार', 'अच्छा', 'happy', 'love', 'good'] | |
| negative_words = ['दुख', 'रो', 'गुस्सा', 'sad', 'cry', 'angry'] | |
| has_mixed = any(ind in text_lower for ind in mixed_indicators) | |
| has_pos = any(w in text_lower for w in positive_words) | |
| has_neg = any(w in text_lower for w in negative_words) | |
| return has_mixed and (has_pos and has_neg) | |
| # ============================================ | |
| # SENTIMENT & EMOTION ANALYSIS | |
| # ============================================ | |
| def sentiment_analysis(text): | |
| try: | |
| return SENTIMENT_PIPELINE(text) | |
| except Exception as e: | |
| logger.warning(f"Sentiment error: {e}") | |
| return None | |
| def emotion_classification(text): | |
| try: | |
| return EMOTION_PIPELINE(text, EMOTION_LABELS, multi_label=False) | |
| except Exception as e: | |
| logger.warning(f"Emotion error: {e}") | |
| return None | |
| def enhanced_sentiment_analysis(text, prosodic_features, raw_results): | |
| sentiment_scores = {} | |
| if not raw_results or not isinstance(raw_results, list): | |
| return {'Negative': 0.33, 'Neutral': 0.34, 'Positive': 0.33}, 0.34, False | |
| label_mapping = { | |
| 'LABEL_0': 'Negative', 'LABEL_1': 'Neutral', 'LABEL_2': 'Positive', | |
| 'negative': 'Negative', 'neutral': 'Neutral', 'positive': 'Positive' | |
| } | |
| for result in raw_results[0]: | |
| mapped_label = label_mapping.get(result['label'], 'Neutral') | |
| sentiment_scores[mapped_label] = result['score'] | |
| for sentiment in ['Negative', 'Neutral', 'Positive']: | |
| if sentiment not in sentiment_scores: | |
| sentiment_scores[sentiment] = 0.0 | |
| is_crisis = detect_crisis_keywords(text) | |
| if is_crisis: | |
| sentiment_scores['Negative'] = min(0.95, sentiment_scores['Negative'] * 1.8) | |
| sentiment_scores['Neutral'] = max(0.02, sentiment_scores['Neutral'] * 0.2) | |
| sentiment_scores['Positive'] = max(0.01, sentiment_scores['Positive'] * 0.1) | |
| is_mixed = False | |
| else: | |
| if detect_negation(text): | |
| sentiment_scores['Positive'], sentiment_scores['Negative'] = \ | |
| sentiment_scores['Negative'], sentiment_scores['Positive'] | |
| is_mixed = detect_mixed_emotions(text, prosodic_features) | |
| if is_mixed: | |
| sentiment_scores['Neutral'] = min(0.65, sentiment_scores['Neutral'] + 0.20) | |
| sentiment_scores['Positive'] = max(0.1, sentiment_scores['Positive'] - 0.10) | |
| sentiment_scores['Negative'] = max(0.1, sentiment_scores['Negative'] - 0.10) | |
| total = sum(sentiment_scores.values()) | |
| if total > 0: | |
| sentiment_scores = {k: v/total for k, v in sentiment_scores.items()} | |
| return sentiment_scores, max(sentiment_scores.values()), is_mixed | |
| def process_emotion_results(emotion_result, transcription, prosodic_features=None): | |
| if not emotion_result: | |
| return { | |
| "primary": "unknown", "secondary": None, | |
| "confidence": 0.0, "top_emotions": [] | |
| } | |
| labels = emotion_result['labels'] | |
| scores = emotion_result['scores'] | |
| emotion_scores = {labels[i]: scores[i] for i in range(len(labels))} | |
| is_crisis = detect_crisis_keywords(transcription) | |
| is_mental_health = detect_mental_health_distress(transcription) | |
| is_grief = detect_grief_loss(transcription) | |
| is_relationship = detect_relationship_distress(transcription) | |
| if is_crisis: | |
| logger.info("🚨 Crisis detected - adjusting emotions") | |
| for emotion in ['fear', 'distress', 'panic', 'anger', 'sadness']: | |
| if emotion in emotion_scores: | |
| emotion_scores[emotion] = min(0.95, emotion_scores[emotion] * 4.0) | |
| for emotion in ['surprise', 'excitement', 'happiness', 'joy', 'calm']: | |
| if emotion in emotion_scores: | |
| emotion_scores[emotion] = max(0.01, emotion_scores[emotion] * 0.15) | |
| elif is_mental_health: | |
| for emotion in ['sadness', 'fear', 'frustration', 'neutral']: | |
| if emotion in emotion_scores: | |
| emotion_scores[emotion] = min(0.90, emotion_scores[emotion] * 2.0) | |
| elif is_grief: | |
| if 'sadness' in emotion_scores: | |
| emotion_scores['sadness'] = min(0.85, emotion_scores['sadness'] * 2.5) | |
| elif is_relationship: | |
| for emotion in ['sadness', 'anger', 'frustration']: | |
| if emotion in emotion_scores: | |
| emotion_scores[emotion] = min(0.80, emotion_scores[emotion] * 1.8) | |
| total = sum(emotion_scores.values()) | |
| if total > 0: | |
| emotion_scores = {k: v/total for k, v in emotion_scores.items()} | |
| sorted_emotions = sorted(emotion_scores.items(), key=lambda x: x[1], reverse=True) | |
| top_emotions = [{"emotion": e[0], "score": round(e[1], 4)} for e in sorted_emotions[:5]] | |
| return { | |
| "primary": top_emotions[0]["emotion"] if top_emotions else "unknown", | |
| "secondary": top_emotions[1]["emotion"] if len(top_emotions) > 1 else None, | |
| "confidence": top_emotions[0]["score"] if top_emotions else 0.0, | |
| "top_emotions": top_emotions | |
| } | |
| # ============================================ | |
| # LLM RECOMMENDATION SYSTEM | |
| # ============================================ | |
| class ValidationStatus(str, Enum): | |
| VALID = "valid" | |
| WARNING = "warning" | |
| INVALID = "invalid" | |
| class ResponseValidator: | |
| HELPLINES = { | |
| 'emergency': ['112'], | |
| 'women': ['181', '1091'], | |
| 'mental_health': ['9152987821', '08046110007'], | |
| 'suicide_prevention': ['9820466726'] | |
| } | |
| def validate_recommendation(cls, recommendation: str, emotion_result: dict) -> Dict[str, Any]: | |
| issues = [] | |
| warnings = [] | |
| if len(recommendation.strip()) < 10: | |
| issues.append("Recommendation too short") | |
| if not re.search(r'[\u0900-\u097F]', recommendation): | |
| issues.append("No Hindi script detected") | |
| analysis = emotion_result.get('analysis', {}).get('situations', {}) | |
| if analysis.get('is_crisis', False): | |
| has_helpline = any(h in recommendation for h in cls.HELPLINES['emergency'] + cls.HELPLINES['women']) | |
| if not has_helpline: | |
| issues.append("Crisis detected but no emergency helpline") | |
| if analysis.get('is_mental_health_distress', False): | |
| has_mh_helpline = any(h in recommendation for h in cls.HELPLINES['mental_health']) | |
| if not has_mh_helpline: | |
| warnings.append("Mental health distress but no helpline") | |
| transcript_lower = emotion_result.get('transcription', '').lower() | |
| suicide_keywords = ['आत्महत्या', 'suicide', 'मर जा', 'want to die'] | |
| if any(kw in transcript_lower for kw in suicide_keywords): | |
| if '9820466726' not in recommendation: | |
| issues.append("Suicide indicators but no prevention helpline") | |
| status = ValidationStatus.INVALID if issues else (ValidationStatus.WARNING if warnings else ValidationStatus.VALID) | |
| return { | |
| 'status': status.value, | |
| 'issues': issues, | |
| 'warnings': warnings, | |
| 'validated_at': datetime.utcnow().isoformat() | |
| } | |
| def enhance_recommendation(cls, recommendation: str, emotion_result: dict) -> str: | |
| analysis = emotion_result.get('analysis', {}).get('situations', {}) | |
| enhancements = [] | |
| if analysis.get('is_crisis', False): | |
| if '112' not in recommendation: | |
| enhancements.append("तुरंत 112 (पुलिस) या 181 (महिला हेल्पलाइन) पर संपर्क करें।") | |
| if analysis.get('is_mental_health_distress', False): | |
| if '9152987821' not in recommendation: | |
| enhancements.append("मानसिक स्वास्थ्य सहायता: 9152987821") | |
| return f"{recommendation} {' '.join(enhancements)}" if enhancements else recommendation | |
| def get_cache_key(emotion_result: dict) -> str: | |
| cache_data = { | |
| 'transcript': emotion_result.get('transcription', ''), | |
| 'sentiment': emotion_result.get('sentiment', {}).get('dominant', ''), | |
| 'primary_emotion': emotion_result.get('emotion', {}).get('primary', ''), | |
| 'is_crisis': emotion_result.get('analysis', {}).get('situations', {}).get('is_crisis', False) | |
| } | |
| return hashlib.md5(json.dumps(cache_data, sort_keys=True).encode()).hexdigest() | |
| def get_from_cache(cache_key: str) -> Optional[Dict[str, Any]]: | |
| if not ENABLE_CACHING or cache_key not in recommendation_cache: | |
| return None | |
| cached_data, timestamp = recommendation_cache[cache_key] | |
| if time.time() - timestamp > CACHE_TTL_SECONDS: | |
| del recommendation_cache[cache_key] | |
| return None | |
| return cached_data | |
| def save_to_cache(cache_key: str, data: Dict[str, Any]): | |
| if ENABLE_CACHING: | |
| recommendation_cache[cache_key] = (data, time.time()) | |
| def load_few_shot_examples() -> str: | |
| return """ | |
| Example 1: | |
| Transcript: "मुझे बचाओ! कोई मुझे मार रहा है।" | |
| Sentiment: "Negative" | |
| Primary Emotion: "fear" | |
| Is Crisis: True | |
| Action: "तुरंत 112 पर पुलिस को कॉल करें और सुरक्षित स्थान पर जाएं। यदि संभव हो तो महिला हेल्पलाइन 181 पर भी संपर्क करें।" | |
| Example 2: | |
| Transcript: "मैं बहुत अकेला और उदास महसूस कर रहा हूँ।" | |
| Sentiment: "Negative" | |
| Primary Emotion: "sadness" | |
| Is Mental Health Distress: True | |
| Action: "मानसिक स्वास्थ्य सहायता के लिए NIMHANS हेल्पलाइन 08046110007 या Vandrevala Foundation 9152987821 से संपर्क करें।" | |
| Example 3: | |
| Transcript: "मेरी पत्नी ने मुझे छोड़ दिया है।" | |
| Sentiment: "Negative" | |
| Primary Emotion: "sadness" | |
| Is Relationship Distress: True | |
| Action: "परिवार या विश्वसनीय मित्रों से बात करें। यदि आवश्यक हो तो व्यावसायिक परामर्श सेवा लें।" | |
| """ | |
| def compose_prompt(emotion_result: dict) -> str: | |
| analysis = emotion_result.get('analysis', {}).get('situations', {}) | |
| emotion = emotion_result["emotion"] | |
| transcript = emotion_result.get('transcription', '')[:MAX_PROMPT_LENGTH] | |
| prompt = f"""You are an AI assistant providing compassionate support recommendations for Indian women. | |
| {load_few_shot_examples()} | |
| Now analyze this input: | |
| Transcript: "{transcript}" | |
| Sentiment: "{emotion_result['sentiment']['dominant']}" | |
| Primary Emotion: "{emotion['primary']}" | |
| Secondary Emotion: "{emotion.get('secondary', '')}" | |
| Confidence: {emotion['confidence']:.2f} | |
| Is Crisis: {analysis.get('is_crisis', False)} | |
| Is Mental Health Distress: {analysis.get('is_mental_health_distress', False)} | |
| Is Grief/Loss: {analysis.get('is_grief_loss', False)} | |
| Is Relationship Distress: {analysis.get('is_relationship_distress', False)} | |
| Provide a direct, actionable recommendation in Hindi with empathy. Include relevant helplines: | |
| - Emergency/Police: 112 | |
| - Women's Helpline: 181, 1091 | |
| - Mental Health: 9152987821 (Vandrevala), 08046110007 (NIMHANS) | |
| - Suicide Prevention: 9820466726 (AASRA) | |
| Action Recommendation (in Hindi):""" | |
| return prompt | |
| def get_llama_recommendation(emotion_result: dict, retry_count: int = 0) -> str: | |
| if not LLM_CLIENT: | |
| return get_fallback_recommendation(emotion_result) | |
| prompt = compose_prompt(emotion_result) | |
| try: | |
| logger.info(f"Calling Llama 3.1 via Novita AI (attempt {retry_count + 1})") | |
| # Use chat.completions.create with Novita AI provider | |
| completion = LLM_CLIENT.chat.completions.create( | |
| model=MODEL_NAME, | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": prompt | |
| } | |
| ], | |
| max_tokens=300, | |
| temperature=0.7, | |
| top_p=0.9 | |
| ) | |
| recommendation = completion.choices[0].message.content.strip() | |
| if not recommendation: | |
| raise ValueError("Empty recommendation") | |
| logger.info("✅ LLM recommendation generated via Novita AI") | |
| return recommendation | |
| except Exception as e: | |
| logger.warning(f"LLM error (attempt {retry_count + 1}): {e}") | |
| if retry_count < MAX_RETRIES: | |
| time.sleep(2) | |
| return get_llama_recommendation(emotion_result, retry_count + 1) | |
| logger.error(f"LLM failed after {MAX_RETRIES + 1} attempts") | |
| return get_fallback_recommendation(emotion_result) | |
| def get_fallback_recommendation(emotion_result: dict) -> str: | |
| analysis = emotion_result.get('analysis', {}).get('situations', {}) | |
| if analysis.get('is_crisis', False): | |
| return "तुरंत 112 (पुलिस) या 181 (महिला हेल्पलाइन) पर संपर्क करें। आपकी सुरक्षा सर्वोपरि है।" | |
| if analysis.get('is_mental_health_distress', False): | |
| return "मानसिक स्वास्थ्य सहायता के लिए 9152987821 (Vandrevala Foundation) पर संपर्क करें। आप अकेली नहीं हैं।" | |
| if analysis.get('is_relationship_distress', False): | |
| return "परिवार या मित्रों से बात करें। यदि आवश्यक हो तो परामर्श सेवा लें।" | |
| return "यदि आपको सहायता चाहिए तो किसी विश्वसनीय व्यक्ति से संपर्क करें। आपकी भावनाएं महत्वपूर्ण हैं।" | |
| def assess_risk_level(emotion_result: dict) -> str: | |
| analysis = emotion_result.get('analysis', {}).get('situations', {}) | |
| confidence = emotion_result.get('emotion', {}).get('confidence', 0) | |
| primary = emotion_result.get('emotion', {}).get('primary', '').lower() | |
| if analysis.get('is_crisis', False): | |
| return "🔴 CRITICAL" | |
| if analysis.get('is_mental_health_distress', False) and confidence > 0.8: | |
| if primary in ['despair', 'fear', 'panic', 'hopelessness']: | |
| return "🟠 HIGH" | |
| if (analysis.get('is_mental_health_distress', False) or | |
| analysis.get('is_relationship_distress', False) or | |
| analysis.get('is_grief_loss', False)): | |
| return "🟡 MEDIUM" | |
| return "🟢 LOW" | |
| # ============================================ | |
| # MAIN PREDICTION FUNCTION | |
| # ============================================ | |
| def predict_emotion(audio_filepath): | |
| """Analyze audio and return emotion results""" | |
| try: | |
| logger.info(f"🎧 Processing audio file...") | |
| if audio_filepath is None: | |
| return { | |
| "status": "error", | |
| "error_type": "no_audio", | |
| "message": "No audio file uploaded" | |
| } | |
| # Preprocessing | |
| logger.info("🔧 Preprocessing audio...") | |
| audio_tensor, sr, audio_np = advanced_preprocess_audio(audio_filepath) | |
| prosodic_features = extract_prosodic_features(audio_np, sr) | |
| # ASR Transcription | |
| logger.info("🔄 Transcribing...") | |
| transcription_rnnt = ASR_MODEL(audio_tensor, "hi", "rnnt") | |
| if not transcription_rnnt or len(transcription_rnnt.strip()) < 2: | |
| transcription_ctc = ASR_MODEL(audio_tensor, "hi", "ctc") | |
| transcription = transcription_ctc | |
| else: | |
| transcription = transcription_rnnt | |
| transcription = transcription.strip() | |
| if not transcription or len(transcription) < 2: | |
| return { | |
| "status": "error", | |
| "error_type": "no_speech", | |
| "message": "No speech detected in the audio" | |
| } | |
| is_valid, validation_msg, hindi_ratio = validate_hindi_text(transcription) | |
| if not is_valid: | |
| return { | |
| "status": "error", | |
| "error_type": "language_error", | |
| "message": validation_msg, | |
| "transcription": transcription | |
| } | |
| # Sentiment and Emotion Analysis | |
| logger.info("💭 Analyzing sentiment and emotions...") | |
| sentiment_result = sentiment_analysis(transcription) | |
| emotion_result = emotion_classification(transcription) | |
| sentiment_scores, confidence, is_mixed = enhanced_sentiment_analysis( | |
| transcription, prosodic_features, sentiment_result | |
| ) | |
| emotion_data = process_emotion_results( | |
| emotion_result, transcription, prosodic_features | |
| ) | |
| logger.info(f"✅ Emotion: {emotion_data['primary']}, Sentiment: {max(sentiment_scores, key=sentiment_scores.get)}") | |
| result = { | |
| "status": "success", | |
| "transcription": transcription, | |
| "emotion": emotion_data, | |
| "sentiment": { | |
| "dominant": max(sentiment_scores, key=sentiment_scores.get), | |
| "scores": { | |
| "positive": round(sentiment_scores['Positive'], 4), | |
| "neutral": round(sentiment_scores['Neutral'], 4), | |
| "negative": round(sentiment_scores['Negative'], 4) | |
| }, | |
| "confidence": round(confidence, 4) | |
| }, | |
| "analysis": { | |
| "mixed_emotions": is_mixed, | |
| "hindi_content_percentage": round(hindi_ratio * 100, 2), | |
| "has_negation": detect_negation(transcription), | |
| "situations": { | |
| "is_crisis": detect_crisis_keywords(transcription), | |
| "is_mental_health_distress": detect_mental_health_distress(transcription), | |
| "is_grief_loss": detect_grief_loss(transcription), | |
| "is_relationship_distress": detect_relationship_distress(transcription) | |
| } | |
| }, | |
| "prosodic_features": { | |
| "pitch_mean": round(prosodic_features['pitch_mean'], 2), | |
| "pitch_std": round(prosodic_features['pitch_std'], 2), | |
| "energy_mean": round(prosodic_features['energy_mean'], 4), | |
| "speech_rate": round(prosodic_features['speech_rate'], 4) | |
| } | |
| } | |
| return result | |
| except Exception as e: | |
| import traceback | |
| traceback.print_exc() | |
| return { | |
| "status": "error", | |
| "error_type": "system_error", | |
| "message": str(e) | |
| } | |
| def get_recommendation(audio_filepath): | |
| """Main function: Audio -> Emotion Analysis -> LLM Recommendation""" | |
| if not audio_filepath: | |
| return ( | |
| "कृपया ऑडियो रिकॉर्ड या अपलोड करें।", | |
| "⚪️ N/A", | |
| "❌ No input", | |
| "", | |
| "" | |
| ) | |
| start_time = time.time() | |
| # Step 1: Emotion Analysis | |
| logger.info("=" * 60) | |
| logger.info("STEP 1: Emotion Analysis") | |
| emotion_result = predict_emotion(audio_filepath) | |
| if emotion_result.get('status') != 'success': | |
| error_type = emotion_result.get('error_type', 'unknown') | |
| error_msg = emotion_result.get('message', 'Unknown error') | |
| if error_type == 'no_speech': | |
| return ( | |
| "ऑडियो में कोई स्पीच नहीं मिली। कृपया फिर से प्रयास करें।", | |
| "⚪️ N/A", | |
| "❌ No speech detected", | |
| "", | |
| "" | |
| ) | |
| elif error_type == 'language_error': | |
| return ( | |
| f"भाषा त्रुटि: {error_msg}\n\nकृपया हिंदी या हिंग्लिश में बोलें।", | |
| "⚪️ N/A", | |
| f"❌ Language validation failed", | |
| "", | |
| f"Transcription: {emotion_result.get('transcription', 'N/A')}" | |
| ) | |
| else: | |
| return ( | |
| f"त्रुटि: {error_msg}", | |
| "🔴 ERROR", | |
| f"❌ {error_type}", | |
| "", | |
| str(emotion_result) | |
| ) | |
| # Step 2: Generate Recommendation | |
| logger.info("STEP 2: LLM Recommendation Generation") | |
| cache_key = get_cache_key(emotion_result) | |
| cached_data = get_from_cache(cache_key) | |
| if cached_data: | |
| logger.info("♻️ Using cached recommendation") | |
| action = cached_data['action'] | |
| validation_result = cached_data['validation'] | |
| enhanced = cached_data.get('enhanced', False) | |
| cached = True | |
| else: | |
| logger.info("🆕 Generating new recommendation") | |
| action = get_llama_recommendation(emotion_result) | |
| validation_result = ResponseValidator.validate_recommendation(action, emotion_result) | |
| enhanced = False | |
| if validation_result['status'] in [ValidationStatus.INVALID.value, ValidationStatus.WARNING.value]: | |
| logger.warning(f"Validation issues: {validation_result['issues'] + validation_result['warnings']}") | |
| original_action = action | |
| action = ResponseValidator.enhance_recommendation(action, emotion_result) | |
| if action != original_action: | |
| enhanced = True | |
| logger.info("🔧 Recommendation auto-enhanced") | |
| validation_result = ResponseValidator.validate_recommendation(action, emotion_result) | |
| cache_data = { | |
| 'action': action, | |
| 'validation': validation_result, | |
| 'enhanced': enhanced | |
| } | |
| save_to_cache(cache_key, cache_data) | |
| cached = False | |
| processing_time = round((time.time() - start_time) * 1000) | |
| risk_level = assess_risk_level(emotion_result) | |
| # Format outputs | |
| validation_status = validation_result['status'].upper() | |
| validation_emoji = { | |
| 'VALID': '✅', | |
| 'WARNING': '⚠️', | |
| 'INVALID': '❌' | |
| }.get(validation_status, '❓') | |
| validation_info = f"{validation_emoji} **{validation_status}**" | |
| if validation_result['issues']: | |
| validation_info += "\n\n**Issues:**\n" + "\n".join([f"- {i}" for i in validation_result['issues']]) | |
| if validation_result['warnings']: | |
| validation_info += "\n\n**Warnings:**\n" + "\n".join([f"- {w}" for w in validation_result['warnings']]) | |
| metadata = f""" | |
| **Processing Time:** {processing_time}ms | |
| **Cached:** {'Yes ♻️' if cached else 'No 🆕'} | |
| **Enhanced:** {'Yes 🔧' if enhanced else 'No'} | |
| **Model:** {MODEL_NAME} | |
| """ | |
| emotion = emotion_result['emotion'] | |
| sentiment = emotion_result['sentiment'] | |
| situations = emotion_result['analysis']['situations'] | |
| analysis_info = f""" | |
| **📝 Transcription:** {emotion_result['transcription']} | |
| **🎭 Emotion Analysis:** | |
| - Primary: {emotion['primary']} ({emotion['confidence']:.1%}) | |
| - Secondary: {emotion.get('secondary', 'N/A')} | |
| **💭 Sentiment:** {sentiment['dominant']} | |
| - Positive: {sentiment['scores']['positive']:.1%} | |
| - Neutral: {sentiment['scores']['neutral']:.1%} | |
| - Negative: {sentiment['scores']['negative']:.1%} | |
| **🚨 Situation Detection:** | |
| - Crisis: {'✅' if situations['is_crisis'] else '❌'} | |
| - Mental Health: {'✅' if situations['is_mental_health_distress'] else '❌'} | |
| - Grief/Loss: {'✅' if situations['is_grief_loss'] else '❌'} | |
| - Relationship: {'✅' if situations['is_relationship_distress'] else '❌'} | |
| """ | |
| logger.info("=" * 60) | |
| return action, risk_level, validation_info, metadata, analysis_info | |
| # ============================================ | |
| # GRADIO INTERFACE | |
| # ============================================ | |
| def create_interface(): | |
| with gr.Blocks( | |
| title="Hindi Emotion & Recommendation System", | |
| theme=gr.themes.Soft() | |
| ) as demo: | |
| gr.Markdown(""" | |
| # 🇮🇳 Hindi Speech Emotion & Action Recommendation System | |
| **Complete AI Pipeline:** Audio → Emotion Analysis → LLM-Powered Recommendations | |
| ### 🔄 System Architecture: | |
| 1. **🎙️ Speech Recognition:** Indic Conformer 600M (Hindi ASR) | |
| 2. **🎭 Emotion Detection:** Zero-Shot Classification (13 emotions) | |
| 3. **💭 Sentiment Analysis:** Hindi-specific sentiment model | |
| 4. **🤖 Recommendations:** Llama 3.1 8B Instruct (contextual support) | |
| 5. **✅ Validation:** Automatic helpline integration & quality checks | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 🎙️ Audio Input") | |
| audio_input = gr.Audio( | |
| label="Record or Upload Hindi Audio", | |
| sources=["microphone", "upload"], | |
| type="filepath" | |
| ) | |
| submit_btn = gr.Button("🚀 Analyze & Get Recommendation", variant="primary", size="lg") | |
| gr.Markdown("### 📊 System Status") | |
| status_md = f""" | |
| **Models Loaded:** | |
| - ASR: {'✅' if ASR_MODEL else '❌'} Indic Conformer | |
| - Sentiment: {'✅' if SENTIMENT_PIPELINE else '❌'} Hindi RoBERTa | |
| - Emotion: {'✅' if EMOTION_PIPELINE else '❌'} XLM-RoBERTa | |
| - LLM: {'✅' if LLM_CLIENT else '⚠️ Fallback'} Llama 3.1 | |
| **Configuration:** | |
| - HF Token: {'✅ Set' if HUGGINGFACE_TOKEN else '⚠️ Missing'} | |
| - Caching: {'✅ Enabled' if ENABLE_CACHING else '❌ Disabled'} | |
| - Max Retries: {MAX_RETRIES} | |
| """ | |
| gr.Markdown(status_md) | |
| gr.Markdown(""" | |
| ### 💡 Tips: | |
| - Speak clearly in Hindi or Hinglish | |
| - 3-10 seconds of audio works best | |
| - Background noise is automatically reduced | |
| - Recommendations are context-aware | |
| """) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 💬 AI Recommendation (Hindi)") | |
| recommendation_output = gr.Textbox( | |
| label="Personalized Action Recommendation", | |
| lines=8, | |
| interactive=False, | |
| placeholder="AI-generated recommendation will appear here..." | |
| ) | |
| risk_output = gr.Textbox( | |
| label="🎯 Risk Level Assessment", | |
| interactive=False | |
| ) | |
| with gr.Accordion("🔍 Validation Report", open=False): | |
| validation_output = gr.Markdown() | |
| with gr.Accordion("⚙️ Processing Details", open=False): | |
| metadata_output = gr.Markdown() | |
| with gr.Accordion("📊 Complete Analysis", open=True): | |
| analysis_output = gr.Markdown() | |
| # Connect button | |
| submit_btn.click( | |
| fn=get_recommendation, | |
| inputs=[audio_input], | |
| outputs=[ | |
| recommendation_output, | |
| risk_output, | |
| validation_output, | |
| metadata_output, | |
| analysis_output | |
| ] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### 📞 Emergency Helplines (India) | |
| | **Category** | **Number** | **Available** | | |
| |--------------|-----------|---------------| | |
| | 🚨 **Emergency/Police** | **112** | 24/7 | | |
| | 👩 **Women's Helpline** | **181** | 24/7 | | |
| | 🆘 **Women in Distress** | **1091** | 24/7 | | |
| | 🧠 **Mental Health (Vandrevala)** | **9152987821** | 24/7 | | |
| | 🏥 **Mental Health (NIMHANS)** | **08046110007** | 24/7 | | |
| | 💙 **Suicide Prevention (AASRA)** | **9820466726** | 24/7 | | |
| --- | |
| ### 🎯 Supported Features: | |
| **13 Emotions Detected:** | |
| - 😊 Positive: joy, happiness, love, excitement, calm | |
| - 😢 Negative: sadness, anger, fear, distress, panic, frustration | |
| - 😐 Neutral: neutral, surprise | |
| **4 Crisis Situations:** | |
| - 🚨 Emergency/Violence (100+ keywords) | |
| - 🧠 Mental Health Distress (depression, anxiety) | |
| - 💔 Grief & Loss (bereavement support) | |
| - 💔 Relationship Distress (conflicts, breakup) | |
| **Automatic Enhancements:** | |
| - Crisis → Emergency helplines auto-added | |
| - Mental health → Counseling resources | |
| - Validation → Quality assurance | |
| - Caching → Faster repeated queries | |
| --- | |
| **⚡ Performance Optimizations:** | |
| - Batch audio preprocessing (3x faster) | |
| - PYIN pitch detection (5x faster) | |
| - Cached resampling & features | |
| - LLM response caching (1hr TTL) | |
| - Automatic retry logic | |
| **🔒 Privacy & Safety:** | |
| - No data stored permanently | |
| - All processing in-memory | |
| - HIPAA-compliant recommendations | |
| - Crisis prioritization system | |
| """) | |
| return demo | |
| # ============================================ | |
| # LAUNCH | |
| # ============================================ | |
| if __name__ == "__main__": | |
| if not HUGGINGFACE_TOKEN: | |
| logger.warning("⚠️ HF_TOKEN not set. Set it for Llama 3.1 access and better performance.") | |
| logger.info("💡 Get token from: https://huggingface.co/settings/tokens") | |
| logger.info("🌐 Starting Gradio interface...") | |
| demo = create_interface() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) |