maria355 commited on
Commit
aba5f52
·
verified ·
1 Parent(s): dfa3356

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -22
app.py CHANGED
@@ -110,41 +110,33 @@ def transcribe_audio(audio_file):
110
 
111
  recognizer = sr.Recognizer()
112
 
113
- # Adjust for ambient noise
114
- recognizer.energy_threshold = 300
115
- recognizer.dynamic_energy_threshold = True
116
- recognizer.pause_threshold = 0.8
117
-
118
  try:
119
  # Handle different audio file types
120
  audio_path = str(audio_file)
121
 
122
  # Load and process audio file
123
  with sr.AudioFile(audio_path) as source:
124
- # Adjust for ambient noise
125
- recognizer.adjust_for_ambient_noise(source, duration=0.5)
 
 
 
126
  audio = recognizer.record(source)
127
 
128
- # Try Google Speech Recognition first (free tier)
129
  try:
130
  text = recognizer.recognize_google(audio, language='en-US')
131
- return text
132
- except sr.RequestError:
133
- # Fallback to offline recognition if available
134
- try:
135
- text = recognizer.recognize_sphinx(audio)
136
  return text
137
- except (sr.RequestError, sr.UnknownValueError):
138
- pass
139
-
140
- return "Could not transcribe the audio. Please try speaking more clearly."
 
 
141
 
142
- except sr.UnknownValueError:
143
- return "Could not understand the audio. Please speak more clearly."
144
- except sr.RequestError as e:
145
- return f"Speech recognition service error: {str(e)}"
146
  except Exception as e:
147
- return f"Error processing audio: {str(e)}"
148
 
149
  def enhance_prompt_with_gemini(text):
150
  """Enhance the prompt using Gemini API for better results"""
 
110
 
111
  recognizer = sr.Recognizer()
112
 
 
 
 
 
 
113
  try:
114
  # Handle different audio file types
115
  audio_path = str(audio_file)
116
 
117
  # Load and process audio file
118
  with sr.AudioFile(audio_path) as source:
119
+ # Adjust for ambient noise if possible
120
+ try:
121
+ recognizer.adjust_for_ambient_noise(source, duration=0.2)
122
+ except:
123
+ pass # Skip if adjustment fails
124
  audio = recognizer.record(source)
125
 
126
+ # Try Google Speech Recognition (free tier)
127
  try:
128
  text = recognizer.recognize_google(audio, language='en-US')
129
+ if text.strip():
 
 
 
 
130
  return text
131
+ else:
132
+ return "No speech detected in the audio"
133
+ except sr.UnknownValueError:
134
+ return "Could not understand the audio. Please speak more clearly and try again."
135
+ except sr.RequestError as e:
136
+ return f"Speech recognition service temporarily unavailable: {str(e)}"
137
 
 
 
 
 
138
  except Exception as e:
139
+ return f"Error processing audio file: {str(e)}. Please check your audio format."
140
 
141
  def enhance_prompt_with_gemini(text):
142
  """Enhance the prompt using Gemini API for better results"""