Chia Woon Yap commited on
Commit
19a0f6f
·
verified ·
1 Parent(s): 165d756

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -55
app.py CHANGED
@@ -299,80 +299,42 @@ def process_document(file):
299
 
300
 
301
  # Load model at startup
302
- #realtime_transcriber = load_realtime_whisper()
303
-
304
- #def transcribe_audio(audio):
305
- # """Real-time optimized transcription"""
306
- # if audio is None:
307
- # return ""
308
-
309
- # sr, y = audio
310
-
311
- # Quick preprocessing
312
- # if y.ndim > 1:
313
- # y = y.mean(axis=1) # Convert to mono
314
-
315
- # y = y.astype(np.float32)
316
- # max_val = np.max(np.abs(y))
317
- # if max_val > 0:
318
- # y = y / max_val
319
- #
320
- # try:
321
- # # Use real-time transcriber with optimized settings
322
- # result = realtime_transcriber(
323
- # {"sampling_rate": sr, "raw": y},
324
- # generate_kwargs={
325
- # "language": "english",
326
- # "task": "transcribe",
327
- # "temperature": 0.0, # More deterministic
328
- # "no_repeat_ngram_size": 2, # Reduce repetitions
329
- # }
330
- # )
331
- # return result["text"]
332
- # except Exception as e:
333
- # print(f"Transcription error: {e}")
334
- # return "Could not transcribe audio. Please try again."
335
-
336
-
337
-
338
-
339
- #Common Issue 1: Audio Format Problems
340
  def transcribe_audio(audio):
341
- """Fixed version - handles audio format issues"""
342
  if audio is None:
343
  return "Please record audio first"
344
 
345
  try:
346
  sr, y = audio
347
 
348
- # FIX: Handle different audio formats from Gradio
349
- if isinstance(y, np.ndarray):
350
- # Standard numpy array format
351
- if y.ndim > 1:
352
- y = y.mean(axis=1) # Stereo to mono
353
- y = y.astype(np.float32)
354
-
355
- # Normalize volume
356
- if np.max(np.abs(y)) > 0:
357
- y = y / np.max(np.abs(y))
358
- else:
359
- return "Unsupported audio format"
360
 
361
- # FIX: Use a more reliable approach
362
  transcriber = pipeline(
363
  "automatic-speech-recognition",
364
  model="openai/whisper-base.en"
365
  )
366
 
367
- # FIX: Ensure proper input format
368
  result = transcriber({"sampling_rate": sr, "raw": y})
369
  text = result["text"].strip()
370
 
371
- return text if text else "I heard audio but no clear speech. Try speaking louder."
372
 
373
  except Exception as e:
374
- return f"Please try again - {str(e)}"
375
 
 
 
 
 
376
 
377
 
378
 
 
299
 
300
 
301
  # Load model at startup
302
+ # Function to handle speech-to-text conversion
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  def transcribe_audio(audio):
304
+ """Simple working transcription"""
305
  if audio is None:
306
  return "Please record audio first"
307
 
308
  try:
309
  sr, y = audio
310
 
311
+ # Basic preprocessing
312
+ if y.ndim > 1:
313
+ y = y.mean(axis=1) # Convert to mono
314
+
315
+ y = y.astype(np.float32)
316
+ max_val = np.max(np.abs(y))
317
+ if max_val > 0:
318
+ y = y / max_val
 
 
 
 
319
 
320
+ # Simple pipeline call
321
  transcriber = pipeline(
322
  "automatic-speech-recognition",
323
  model="openai/whisper-base.en"
324
  )
325
 
 
326
  result = transcriber({"sampling_rate": sr, "raw": y})
327
  text = result["text"].strip()
328
 
329
+ return text if text else "No clear speech detected. Try speaking louder."
330
 
331
  except Exception as e:
332
+ return f"Recording error: {str(e)}"
333
 
334
+ # Clear chat history function
335
+ def clear_chat_history():
336
+ chat_memory.clear()
337
+ return [], None
338
 
339
 
340