JustNikunj commited on
Commit
9e93862
·
verified ·
1 Parent(s): 6a78c4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -110
app.py CHANGED
@@ -1,153 +1,156 @@
1
  import gradio as gr
2
  import torch
3
- import torchaudio
4
- from transformers import AutoModelForCTC, AutoProcessor, pipeline
5
- from pydub import AudioSegment
6
- import numpy as np
7
  import librosa
8
- import io
9
- import tempfile
 
10
 
11
- # Load ASR model and processor for Hindi speech recognition
12
- print("Loading ASR model...")
13
  try:
14
- # Try to load the Hindi model with language modeling
15
- from transformers import Wav2Vec2ProcessorWithLM
16
- asr_processor = Wav2Vec2ProcessorWithLM.from_pretrained("ai4bharat/indicwav2vec-hindi")
17
- asr_model = AutoModelForCTC.from_pretrained("ai4bharat/indicwav2vec-hindi")
18
- print("Loaded Hindi model with language modeling")
 
19
  except Exception as e:
20
- print(f"Failed to load Hindi model with LM: {e}")
21
- print("Falling back to basic processor...")
22
- # Fallback to basic processor
23
- from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
24
- asr_processor = Wav2Vec2Processor.from_pretrained("ai4bharat/indicwav2vec-hindi")
25
- asr_model = Wav2Vec2ForCTC.from_pretrained("ai4bharat/indicwav2vec-hindi")
26
- print("Loaded Hindi model with basic processor")
27
-
28
- # Load sentiment analysis pipeline for Hindi text
29
- print("Loading sentiment analysis model...")
30
- sentiment_pipeline = pipeline(
31
- "text-classification",
32
- model="LondonStory/txlm-roberta-hindi-sentiment",
33
- return_all_scores=True
34
- )
35
 
36
- # Move models to appropriate device (CPU for free Hugging Face Space)
37
- device = "cuda" if torch.cuda.is_available() else "cpu"
38
- asr_model.to(device)
39
- print(f"Models loaded on device: {device}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  def predict(audio_filepath):
42
  """
43
- Main prediction function that processes Hindi audio and returns sentiment analysis.
44
-
45
- Args:
46
- audio_filepath: Path to the uploaded audio file
47
-
48
- Returns:
49
- Dictionary with sentiment labels and confidence scores
50
  """
51
  try:
52
- # Load and preprocess audio
53
- print(f"Processing audio file: {audio_filepath}")
54
-
55
- # Load audio using librosa and resample to 16kHz as required by the ASR model
56
- audio_array, sample_rate = librosa.load(audio_filepath, sr=16000)
57
-
58
- # Ensure audio is in the correct format
59
- if len(audio_array.shape) > 1:
60
- audio_array = np.mean(audio_array, axis=1)
61
-
62
- # Process audio with ASR processor
63
- inputs = asr_processor(
64
- audio_array,
65
- sampling_rate=16000,
66
- return_tensors="pt",
67
- padding=True
68
- )
69
-
70
- # Move inputs to device
71
- inputs = {k: v.to(device) for k, v in inputs.items()}
72
-
73
- # Transcribe audio to Hindi text
74
- with torch.no_grad():
75
- logits = asr_model(**inputs).logits
76
-
77
- # Get predicted token IDs
78
- predicted_ids = torch.argmax(logits, dim=-1)
79
-
80
- # Decode the transcription
81
- transcription = asr_processor.batch_decode(predicted_ids)[0]
82
-
83
- print(f"Transcribed text: {transcription}")
84
-
85
- # Handle empty transcription
86
- if not transcription.strip():
87
- print("Empty transcription detected")
88
- return {"No Speech Detected": 1.0}
89
 
90
- # Perform sentiment analysis on the transcribed text
91
- sentiment_results = sentiment_pipeline(transcription)
 
92
 
93
- # Format results for Gradio
94
- result_dict = {}
95
- for result in sentiment_results[0]:
96
- label = result['label']
97
- score = result['score']
98
- result_dict[label] = float(score)
99
 
100
- # Add transcription info (but not as a score since Gradio Label expects numbers)
101
- print(f"Successfully processed. Transcription: {transcription}")
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
- return result_dict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  except Exception as e:
106
- print(f"Error processing audio: {str(e)}")
107
- # Return a properly formatted error response for Gradio
108
  return {"Processing Error": 1.0}
109
 
110
  # Create Gradio interface
111
  demo = gr.Interface(
112
  fn=predict,
113
  inputs=gr.Audio(
114
- type="filepath",
115
- label="Upload Hindi Speech",
116
  sources=["upload", "microphone"]
117
  ),
118
  outputs=gr.Label(
119
- label="Sentiment Analysis Result",
120
- num_top_classes=3
121
  ),
122
- title="🎤 Hindi Speech Sentiment Analysis",
123
  description="""
124
- ### Upload or record Hindi audio to analyze sentiment
125
 
126
- This app performs the following steps:
127
- 1. **Speech Recognition**: Converts your Hindi speech to text using AI4Bharat's IndicWav2Vec model
128
- 2. **Sentiment Analysis**: Analyzes the emotional tone using a specialized Hindi sentiment model
129
 
130
- **Instructions**:
131
- - Upload an audio file or record directly using the microphone
132
- - Speak clearly in Hindi for best results
133
- - The results show sentiment confidence scores
134
- - Check the logs below to see the transcribed text
 
135
 
136
- **Supported sentiments**: Positive, Negative, and Neutral with confidence scores
 
 
 
137
 
138
- **Test phrases**: Try "मैं बहुत खुश हूं" (positive) or "मुझे यह पसंद नहीं है" (negative)
 
 
 
 
139
  """,
140
  examples=None,
141
  theme=gr.themes.Soft(),
142
- allow_flagging="never"
143
  )
144
 
145
  # Launch the app
146
  if __name__ == "__main__":
147
- # Launch with share=True for public access, queue for handling multiple requests
148
  demo.launch(
149
- share=False, # Set to True if you want a public link for testing
150
- server_name="0.0.0.0", # Required for Hugging Face Spaces
151
- server_port=7860, # Default port for Hugging Face Spaces
152
  show_error=True
153
  )
 
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import pipeline
 
 
 
4
  import librosa
5
+ import numpy as np
6
+
7
+ print("🚀 Starting Hindi Speech Sentiment Analysis App...")
8
 
9
+ # Load sentiment analysis model
10
+ print("📚 Loading sentiment analysis model...")
11
  try:
12
+ sentiment_pipeline = pipeline(
13
+ "text-classification",
14
+ model="LondonStory/txlm-roberta-hindi-sentiment",
15
+ top_k=None
16
+ )
17
+ print("✅ Sentiment model loaded successfully")
18
  except Exception as e:
19
+ print(f" Error loading sentiment model: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # Use a simpler, more reliable ASR approach with Whisper
22
+ print("🎤 Loading Whisper ASR model...")
23
+ try:
24
+ # Use OpenAI Whisper for more reliable transcription
25
+ asr_pipeline = pipeline(
26
+ "automatic-speech-recognition",
27
+ model="openai/whisper-small",
28
+ chunk_length_s=30,
29
+ device="cpu"
30
+ )
31
+ print("✅ Whisper ASR model loaded successfully")
32
+ except Exception as e:
33
+ print(f"❌ Error loading Whisper model: {e}")
34
+ # Fallback to basic multilingual model
35
+ try:
36
+ asr_pipeline = pipeline(
37
+ "automatic-speech-recognition",
38
+ model="facebook/wav2vec2-base-960h",
39
+ device="cpu"
40
+ )
41
+ print("✅ Fallback ASR model loaded successfully")
42
+ except Exception as e2:
43
+ print(f"❌ Error loading fallback model: {e2}")
44
 
45
  def predict(audio_filepath):
46
  """
47
+ Process audio and return sentiment analysis using Whisper + LondonStory
 
 
 
 
 
 
48
  """
49
  try:
50
+ print(f"\n{'='*50}")
51
+ print(f"🎧 Processing new audio file...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ if audio_filepath is None:
54
+ print("❌ No audio file provided")
55
+ return {"⚠️ No Audio": 1.0}
56
 
57
+ print(f"📂 File path: {audio_filepath}")
 
 
 
 
 
58
 
59
+ # Transcribe audio using Whisper
60
+ print("🔄 Transcribing audio with Whisper...")
61
+ try:
62
+ result = asr_pipeline(audio_filepath)
63
+ transcription = result["text"].strip()
64
+ print(f"📝 Whisper transcription: '{transcription}'")
65
+
66
+ # Handle empty transcription
67
+ if not transcription:
68
+ print("⚠️ Empty transcription from Whisper")
69
+ return {"No Speech": 1.0}
70
+
71
+ except Exception as asr_error:
72
+ print(f"❌ Whisper ASR Error: {asr_error}")
73
+ return {"ASR Error": 1.0}
74
 
75
+ # Perform sentiment analysis
76
+ print("💭 Analyzing sentiment with LondonStory model...")
77
+ try:
78
+ sentiment_results = sentiment_pipeline(transcription)
79
+ print(f"📊 Raw sentiment results: {sentiment_results}")
80
+
81
+ # Format results for Gradio
82
+ result_dict = {}
83
+ for result in sentiment_results:
84
+ label = result['label']
85
+ score = result['score']
86
+ result_dict[label] = float(score)
87
+
88
+ # Log success details
89
+ print(f"✅ SUCCESS! Processing completed")
90
+ print(f"📝 Final transcription: '{transcription}'")
91
+ for label, score in result_dict.items():
92
+ print(f"📊 {label}: {score:.3f}")
93
+ print(f"{'='*50}\n")
94
+
95
+ return result_dict
96
+
97
+ except Exception as sentiment_error:
98
+ print(f"❌ Sentiment Analysis Error: {sentiment_error}")
99
+ return {"Sentiment Error": 1.0}
100
 
101
  except Exception as e:
102
+ print(f" General Error: {str(e)}")
 
103
  return {"Processing Error": 1.0}
104
 
105
  # Create Gradio interface
106
  demo = gr.Interface(
107
  fn=predict,
108
  inputs=gr.Audio(
109
+ type="filepath",
110
+ label="🎤 Record or Upload Hindi Audio",
111
  sources=["upload", "microphone"]
112
  ),
113
  outputs=gr.Label(
114
+ label="🎭 Sentiment Analysis Results",
115
+ num_top_classes=5
116
  ),
117
+ title="🎤 Hindi Speech Sentiment Analysis (Whisper + AI)",
118
  description="""
119
+ ## 🇮🇳 Analyze sentiment from Hindi speech using Whisper AI
120
 
121
+ ### 🔄 How it works:
122
+ 1. **🎤 Whisper ASR** Converts your Hindi speech to Devanagari text
123
+ 2. **💭 LondonStory AI** Analyzes sentiment with confidence scores
124
 
125
+ ### 🧪 Test Phrases (speak clearly):
126
+ - **😊 Happy**: "मैं बहुत खुश हूं" *(Main bahut khush hun)*
127
+ - **😠 Sad**: "मुझे दुख है" *(Mujhe dukh hai)*
128
+ - **😐 Neutral**: "यह ठीक है" *(Yeh theek hai)*
129
+ - **❤️ Love**: "मुझे यह पसंद है" *(Mujhe yeh pasand hai)*
130
+ - **👎 Dislike**: "यह अच्छा नहीं है" *(Yeh accha nahi hai)*
131
 
132
+ ### 📋 Instructions:
133
+ 1. Click the microphone to record or upload an audio file
134
+ 2. Speak clearly in Hindi for 3-5 seconds
135
+ 3. Click Submit and check results + logs below
136
 
137
+ ### 🔍 Features:
138
+ - **Powered by OpenAI Whisper** for accurate Hindi transcription
139
+ - **Specialized Hindi sentiment model** for emotion analysis
140
+ - **Real-time processing** with detailed logging
141
+ - **Supports various Hindi accents** and speaking styles
142
  """,
143
  examples=None,
144
  theme=gr.themes.Soft(),
145
+ flagging_mode="never"
146
  )
147
 
148
  # Launch the app
149
  if __name__ == "__main__":
150
+ print("🌐 Starting server...")
151
  demo.launch(
152
+ server_name="0.0.0.0",
153
+ server_port=7860,
 
154
  show_error=True
155
  )
156
+ print("🎉 Whisper + Hindi Sentiment Analysis App is ready!")