Chia Woon Yap commited on
Commit
15bfc80
·
verified ·
1 Parent(s): a712245

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -2
app.py CHANGED
@@ -264,13 +264,41 @@ def process_document(file):
264
  return f"Error processing document: {str(e)}"
265
 
266
  # Function to handle speech-to-text conversion
 
 
 
 
 
 
 
 
 
 
 
267
  def transcribe_audio(audio):
268
  sr, y = audio
269
  if y.ndim > 1:
270
  y = y.mean(axis=1)
271
  y = y.astype(np.float32)
272
- y /= np.max(np.abs(y))
273
- return transcriber({"sampling_rate": sr, "raw": y})["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
  # Clear chat history function
276
  def clear_chat_history():
 
264
  return f"Error processing document: {str(e)}"
265
 
266
  # Function to handle speech-to-text conversion
267
+
268
+ #def transcribe_audio(audio):
269
+ # sr, y = audio
270
+ # if y.ndim > 1:
271
+ # y = y.mean(axis=1)
272
+ # y = y.astype(np.float32)
273
+ # y /= np.max(np.abs(y))
274
+ # return transcriber({"sampling_rate": sr, "raw": y})["text"]
275
+
276
+ #Quick Fixes You Can Try First:
277
+
278
  def transcribe_audio(audio):
279
  sr, y = audio
280
  if y.ndim > 1:
281
  y = y.mean(axis=1)
282
  y = y.astype(np.float32)
283
+
284
+ # Improved normalization
285
+ max_val = np.max(np.abs(y))
286
+ if max_val > 0:
287
+ y /= max_val
288
+
289
+ # Use better model
290
+ better_transcriber = pipeline(
291
+ "automatic-speech-recognition",
292
+ model="openai/whisper-small.en", # More accurate
293
+ chunk_length_s=30
294
+ )
295
+
296
+ return better_transcriber({"sampling_rate": sr, "raw": y})["text"]
297
+
298
+ # the remaining is the same
299
+
300
+
301
+
302
 
303
  # Clear chat history function
304
  def clear_chat_history():