Spaces:

omar1232
/

Advanced_Audio_Visualizer

Sleeping

omar1232 commited on Apr 23, 2025

Commit

d498a93

verified ·

1 Parent(s): 9fea2cb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,17 +7,22 @@ import os
 # Process audio and transcribe
 def process_audio(audio_input):
-    recognizer = sr.Recognizer()  # Correct usage of 'sr' as the module alias
-    if isinstance(audio_input, tuple):  # Recorded audio (sample_rate, numpy_array)
-        sample_rate, audio_data = audio_input  # Rename 'sr' to 'sample_rate'
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
             AudioSegment(audio_data, sample_rate=sample_rate, frame_rate=sample_rate, channels=1).export(temp_file.name, format="wav")
-            audio_file_path = temp_file.name
-    else:  # Uploaded audio file
-        audio_file_path = audio_input
-    with sr.AudioFile(audio_file_path) as source:  # Now 'sr' is correctly defined
         audio = recognizer.record(source)
         try:
             transcription = recognizer.recognize_google(audio)
@@ -37,8 +42,8 @@ def process_audio(audio_input):
         text_file.write(transcription)
         text_file_path = text_file.name
-    # Clean up temporary audio file (if created)
-    if isinstance(audio_input, tuple) and os.path.exists(audio_file_path):
         os.remove(audio_file_path)
     return language, transcription, text_file_path

 # Process audio and transcribe
 def process_audio(audio_input):
+    recognizer = sr.Recognizer()
+    # Convert all audio inputs to WAV format
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+        if isinstance(audio_input, tuple):  # Recorded audio (sample_rate, numpy_array)
+            sample_rate, audio_data = audio_input
             AudioSegment(audio_data, sample_rate=sample_rate, frame_rate=sample_rate, channels=1).export(temp_file.name, format="wav")
+        else:  # Uploaded audio file
+            # Load the uploaded audio file and convert it to WAV
+            audio = AudioSegment.from_file(audio_input)
+            audio = audio.set_channels(1)  # Convert to mono for consistency
+            audio.export(temp_file.name, format="wav")
+        audio_file_path = temp_file.name
+    # Transcribe the WAV file
+    with sr.AudioFile(audio_file_path) as source:
         audio = recognizer.record(source)
         try:
             transcription = recognizer.recognize_google(audio)
         text_file.write(transcription)
         text_file_path = text_file.name
+    # Clean up temporary WAV file
+    if os.path.exists(audio_file_path):
         os.remove(audio_file_path)
     return language, transcription, text_file_path