KavyaBansal commited on
Commit
5faa186
·
verified ·
1 Parent(s): c7e790a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -66
app.py CHANGED
@@ -12,7 +12,7 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
12
  print(f"Using device: {DEVICE}")
13
 
14
  class EmotionAwareTranscriber:
15
- def __init__(self, model_size="base"): # Fixed method name from _init_ to __init__
16
  print("Initializing models...")
17
 
18
  # Initialize Whisper
@@ -157,37 +157,26 @@ class EmotionAwareTranscriber:
157
  "audio": None
158
  }
159
 
160
- # Add installation code for Google Colab
161
- def install_dependencies():
162
- print("Installing required packages...")
163
- import subprocess
164
-
165
- # Install required packages
166
- subprocess.run(["pip", "install", "gradio", "torch", "transformers", "librosa", "gtts", "numpy"])
167
-
168
- # Check if ffmpeg is installed, and install if needed
169
- try:
170
- import ffmpeg
171
- except ImportError:
172
- print("Installing ffmpeg...")
173
- subprocess.run(["apt-get", "update", "-qq"])
174
- subprocess.run(["apt-get", "install", "-y", "-qq", "ffmpeg"])
175
-
176
- print("Dependencies installed successfully.")
177
 
178
- # Initialize the transcriber
179
- process_audio_wrapper.last_audio = None # Initialize the class attribute
180
 
 
181
  def process_audio_wrapper(audio_path, style):
 
 
182
  result = transcriber.process_audio(audio_path, style)
183
 
184
  # Clean up previous audio files
185
- if hasattr(process_audio_wrapper, "last_audio") and process_audio_wrapper.last_audio:
186
  try:
187
- os.unlink(process_audio_wrapper.last_audio)
188
- except:
189
- pass
190
- process_audio_wrapper.last_audio = result["audio"]
 
191
 
192
  return (
193
  result["transcription"],
@@ -196,46 +185,35 @@ def process_audio_wrapper(audio_path, style):
196
  result["audio"] if result["audio"] else None
197
  )
198
 
199
- # Main execution with proper checks for Colab
200
- if __name__ == "__main__": # Fixed double underscore
201
- # Check if running in Colab
202
- try:
203
- import google.colab
204
- IN_COLAB = True
205
- except:
206
- IN_COLAB = False
207
-
208
- if IN_COLAB:
209
- install_dependencies()
210
-
211
- # Initialize transcriber after dependencies are installed
212
- transcriber = EmotionAwareTranscriber()
213
-
214
- # Gradio interface
215
- with gr.Blocks(title="Emotion-Aware Audio Transcriber") as demo:
216
- gr.Markdown("# 🎤 Emotion-Aware Audio Transcriber")
217
- gr.Markdown("Upload an audio file to get a transcription with emotional analysis and response")
218
-
219
- with gr.Row():
220
- audio_input = gr.Audio(label="Upload Audio", type="filepath")
221
- style_selector = gr.Radio(
222
- ["motivational", "calm", "energetic", "angry"],
223
- label="Response Style",
224
- value="motivational"
225
- )
226
- submit_btn = gr.Button("Process", variant="primary")
227
-
228
- with gr.Column():
229
- transcription_output = gr.Textbox(label="Transcription")
230
- emotion_output = gr.Textbox(label="Detected Emotion")
231
- response_output = gr.Textbox(label="Generated Response")
232
- audio_output = gr.Audio(label="Spoken Response")
233
-
234
- submit_btn.click(
235
- fn=process_audio_wrapper,
236
- inputs=[audio_input, style_selector],
237
- outputs=[transcription_output, emotion_output, response_output, audio_output]
238
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
- # Launch with share=True for Colab to generate a public URL
241
- demo.launch(debug=True, share=True)
 
 
 
 
 
12
  print(f"Using device: {DEVICE}")
13
 
14
  class EmotionAwareTranscriber:
15
+ def __init__(self, model_size="base"):
16
  print("Initializing models...")
17
 
18
  # Initialize Whisper
 
157
  "audio": None
158
  }
159
 
160
+ # Initialize the transcriber first
161
+ transcriber = EmotionAwareTranscriber()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
+ # Define a global variable to store the last audio file path
164
+ last_audio_file = None
165
 
166
+ # Define the process_audio_wrapper function AFTER initializing the variable
167
  def process_audio_wrapper(audio_path, style):
168
+ global last_audio_file
169
+
170
  result = transcriber.process_audio(audio_path, style)
171
 
172
  # Clean up previous audio files
173
+ if last_audio_file and os.path.exists(last_audio_file):
174
  try:
175
+ os.unlink(last_audio_file)
176
+ except Exception as e:
177
+ print(f"Error cleaning up audio file: {e}")
178
+
179
+ last_audio_file = result["audio"]
180
 
181
  return (
182
  result["transcription"],
 
185
  result["audio"] if result["audio"] else None
186
  )
187
 
188
+ # Gradio interface
189
+ with gr.Blocks(title="Emotion-Aware Audio Transcriber") as demo:
190
+ gr.Markdown("# 🎤 Emotion-Aware Audio Transcriber")
191
+ gr.Markdown("Upload an audio file to get a transcription with emotional analysis and response")
192
+
193
+ with gr.Row():
194
+ audio_input = gr.Audio(label="Upload Audio", type="filepath")
195
+ style_selector = gr.Radio(
196
+ ["motivational", "calm", "energetic", "angry"],
197
+ label="Response Style",
198
+ value="motivational"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  )
200
+ submit_btn = gr.Button("Process", variant="primary")
201
+
202
+ with gr.Column():
203
+ transcription_output = gr.Textbox(label="Transcription")
204
+ emotion_output = gr.Textbox(label="Detected Emotion")
205
+ response_output = gr.Textbox(label="Generated Response")
206
+ audio_output = gr.Audio(label="Spoken Response")
207
+
208
+ submit_btn.click(
209
+ fn=process_audio_wrapper,
210
+ inputs=[audio_input, style_selector],
211
+ outputs=[transcription_output, emotion_output, response_output, audio_output]
212
+ )
213
 
214
+ # Launch the app
215
+ if __name__ == "__main__":
216
+ demo.launch()
217
+ else:
218
+ # This part is crucial for HuggingFace Spaces deployment
219
+ app = demo