Chia Woon Yap commited on
Commit
6e1c282
·
verified ·
1 Parent(s): 4d2fad2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -21
app.py CHANGED
@@ -265,39 +265,39 @@ def process_document(file):
265
 
266
  # Function to handle speech-to-text conversion
267
 
268
- #def transcribe_audio(audio):
269
- sr, y = audio
270
- if y.ndim > 1:
271
- y = y.mean(axis=1)
272
- y = y.astype(np.float32)
273
- y /= np.max(np.abs(y))
274
- return transcriber({"sampling_rate": sr, "raw": y})["text"]
275
-
276
- """
277
- #Quick Fixes You Can Try First:
278
-
279
  #def transcribe_audio(audio):
280
  # sr, y = audio
281
  # if y.ndim > 1:
282
  # y = y.mean(axis=1)
283
  # y = y.astype(np.float32)
 
 
 
 
 
 
 
 
 
 
 
284
 
285
  # Improved normalization
286
- # max_val = np.max(np.abs(y))
287
- # if max_val > 0:
288
- # y /= max_val
289
 
290
  # Use better model
291
- # better_transcriber = pipeline(
292
- # "automatic-speech-recognition",
293
- # model="openai/whisper-small.en", # More accurate
294
- # chunk_length_s=30
295
- # )
296
 
297
- # return better_transcriber({"sampling_rate": sr, "raw": y})["text"]
298
 
299
  # the remaining is the same
300
- """
301
 
302
 
303
 
 
265
 
266
  # Function to handle speech-to-text conversion
267
 
 
 
 
 
 
 
 
 
 
 
 
268
  #def transcribe_audio(audio):
269
  # sr, y = audio
270
  # if y.ndim > 1:
271
  # y = y.mean(axis=1)
272
  # y = y.astype(np.float32)
273
+ # y /= np.max(np.abs(y))
274
+ # return transcriber({"sampling_rate": sr, "raw": y})["text"]
275
+
276
+
277
+ #Quick Fixes You Can Try First:
278
+
279
+ def transcribe_audio(audio):
280
+ sr, y = audio
281
+ if y.ndim > 1:
282
+ y = y.mean(axis=1)
283
+ y = y.astype(np.float32)
284
 
285
  # Improved normalization
286
+ max_val = np.max(np.abs(y))
287
+ if max_val > 0:
288
+ y /= max_val
289
 
290
  # Use better model
291
+ better_transcriber = pipeline(
292
+ "automatic-speech-recognition",
293
+ model="openai/whisper-small.en", # More accurate
294
+ chunk_length_s=30
295
+ )
296
 
297
+ return better_transcriber({"sampling_rate": sr, "raw": y})["text"]
298
 
299
  # the remaining is the same
300
+
301
 
302
 
303