Spaces:

maria355
/

VoiceVision-Creative-AI

Sleeping

App Files Files Community

maria355 commited on Sep 14, 2025

Commit

aba5f52

verified ·

1 Parent(s): dfa3356

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -22

app.py CHANGED Viewed

@@ -110,41 +110,33 @@ def transcribe_audio(audio_file):
     recognizer = sr.Recognizer()
-    # Adjust for ambient noise
-    recognizer.energy_threshold = 300
-    recognizer.dynamic_energy_threshold = True
-    recognizer.pause_threshold = 0.8
     try:
         # Handle different audio file types
         audio_path = str(audio_file)
         # Load and process audio file
         with sr.AudioFile(audio_path) as source:
-            # Adjust for ambient noise
-            recognizer.adjust_for_ambient_noise(source, duration=0.5)
             audio = recognizer.record(source)
-        # Try Google Speech Recognition first (free tier)
         try:
             text = recognizer.recognize_google(audio, language='en-US')
-            return text
-        except sr.RequestError:
-            # Fallback to offline recognition if available
-            try:
-                text = recognizer.recognize_sphinx(audio)
                 return text
-            except (sr.RequestError, sr.UnknownValueError):
-                pass
-        return "Could not transcribe the audio. Please try speaking more clearly."
-    except sr.UnknownValueError:
-        return "Could not understand the audio. Please speak more clearly."
-    except sr.RequestError as e:
-        return f"Speech recognition service error: {str(e)}"
     except Exception as e:
-        return f"Error processing audio: {str(e)}"
 def enhance_prompt_with_gemini(text):
     """Enhance the prompt using Gemini API for better results"""

     recognizer = sr.Recognizer()
     try:
         # Handle different audio file types
         audio_path = str(audio_file)
         # Load and process audio file
         with sr.AudioFile(audio_path) as source:
+            # Adjust for ambient noise if possible
+            try:
+                recognizer.adjust_for_ambient_noise(source, duration=0.2)
+            except:
+                pass  # Skip if adjustment fails
             audio = recognizer.record(source)
+        # Try Google Speech Recognition (free tier)
         try:
             text = recognizer.recognize_google(audio, language='en-US')
+            if text.strip():
                 return text
+            else:
+                return "No speech detected in the audio"
+        except sr.UnknownValueError:
+            return "Could not understand the audio. Please speak more clearly and try again."
+        except sr.RequestError as e:
+            return f"Speech recognition service temporarily unavailable: {str(e)}"
     except Exception as e:
+        return f"Error processing audio file: {str(e)}. Please check your audio format."
 def enhance_prompt_with_gemini(text):
     """Enhance the prompt using Gemini API for better results"""