Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -110,41 +110,33 @@ def transcribe_audio(audio_file):
|
|
| 110 |
|
| 111 |
recognizer = sr.Recognizer()
|
| 112 |
|
| 113 |
-
# Adjust for ambient noise
|
| 114 |
-
recognizer.energy_threshold = 300
|
| 115 |
-
recognizer.dynamic_energy_threshold = True
|
| 116 |
-
recognizer.pause_threshold = 0.8
|
| 117 |
-
|
| 118 |
try:
|
| 119 |
# Handle different audio file types
|
| 120 |
audio_path = str(audio_file)
|
| 121 |
|
| 122 |
# Load and process audio file
|
| 123 |
with sr.AudioFile(audio_path) as source:
|
| 124 |
-
# Adjust for ambient noise
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
| 126 |
audio = recognizer.record(source)
|
| 127 |
|
| 128 |
-
# Try Google Speech Recognition
|
| 129 |
try:
|
| 130 |
text = recognizer.recognize_google(audio, language='en-US')
|
| 131 |
-
|
| 132 |
-
except sr.RequestError:
|
| 133 |
-
# Fallback to offline recognition if available
|
| 134 |
-
try:
|
| 135 |
-
text = recognizer.recognize_sphinx(audio)
|
| 136 |
return text
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
| 141 |
|
| 142 |
-
except sr.UnknownValueError:
|
| 143 |
-
return "Could not understand the audio. Please speak more clearly."
|
| 144 |
-
except sr.RequestError as e:
|
| 145 |
-
return f"Speech recognition service error: {str(e)}"
|
| 146 |
except Exception as e:
|
| 147 |
-
return f"Error processing audio: {str(e)}"
|
| 148 |
|
| 149 |
def enhance_prompt_with_gemini(text):
|
| 150 |
"""Enhance the prompt using Gemini API for better results"""
|
|
|
|
| 110 |
|
| 111 |
recognizer = sr.Recognizer()
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
try:
|
| 114 |
# Handle different audio file types
|
| 115 |
audio_path = str(audio_file)
|
| 116 |
|
| 117 |
# Load and process audio file
|
| 118 |
with sr.AudioFile(audio_path) as source:
|
| 119 |
+
# Adjust for ambient noise if possible
|
| 120 |
+
try:
|
| 121 |
+
recognizer.adjust_for_ambient_noise(source, duration=0.2)
|
| 122 |
+
except:
|
| 123 |
+
pass # Skip if adjustment fails
|
| 124 |
audio = recognizer.record(source)
|
| 125 |
|
| 126 |
+
# Try Google Speech Recognition (free tier)
|
| 127 |
try:
|
| 128 |
text = recognizer.recognize_google(audio, language='en-US')
|
| 129 |
+
if text.strip():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
return text
|
| 131 |
+
else:
|
| 132 |
+
return "No speech detected in the audio"
|
| 133 |
+
except sr.UnknownValueError:
|
| 134 |
+
return "Could not understand the audio. Please speak more clearly and try again."
|
| 135 |
+
except sr.RequestError as e:
|
| 136 |
+
return f"Speech recognition service temporarily unavailable: {str(e)}"
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
except Exception as e:
|
| 139 |
+
return f"Error processing audio file: {str(e)}. Please check your audio format."
|
| 140 |
|
| 141 |
def enhance_prompt_with_gemini(text):
|
| 142 |
"""Enhance the prompt using Gemini API for better results"""
|