Chia Woon Yap commited on
Commit
232c58d
·
verified ·
1 Parent(s): f2bf26d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -43
app.py CHANGED
@@ -123,7 +123,7 @@ def retrieve_documents(query):
123
 
124
  # Function to convert tuple format to message format
125
  def convert_to_message_format(chat_history):
126
- """Convert from [(user, bot)] format to [{"role": "user", "content": user}, {"role": "assistant", "content": bot}] format"""
127
  message_format = []
128
  for user_msg, bot_msg in chat_history:
129
  message_format.append({"role": "user", "content": user_msg})
@@ -132,7 +132,7 @@ def convert_to_message_format(chat_history):
132
 
133
  # Function to convert message format to tuple format for processing
134
  def convert_to_tuple_format(chat_history):
135
- """Convert from message format back to tuple format for processing"""
136
  tuple_format = []
137
  for i in range(0, len(chat_history), 2):
138
  if i+1 < len(chat_history):
@@ -278,57 +278,62 @@ def process_document(file):
278
  """
279
  # Real-time Whisper setup - cache the model
280
  #@gr.cache_resource
281
- def load_realtime_whisper():
282
- """Load optimized Whisper model for real-time transcription"""
283
- device = "cuda" if torch.cuda.is_available() else "cpu"
284
- torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
285
 
286
- # Use tiny model for real-time speed
287
- realtime_transcriber = pipeline(
288
- "automatic-speech-recognition",
289
- model="openai/whisper-tiny.en",
290
- device=device,
291
- torch_dtype=torch_dtype,
292
- )
293
 
294
- return realtime_transcriber
 
 
 
 
 
295
 
296
  # Load model at startup
297
- realtime_transcriber = load_realtime_whisper()
298
 
299
- def transcribe_audio(audio):
300
- """Real-time optimized transcription"""
301
- if audio is None:
302
- return ""
303
 
304
- sr, y = audio
305
 
306
  # Quick preprocessing
307
- if y.ndim > 1:
308
- y = y.mean(axis=1) # Convert to mono
309
-
310
- y = y.astype(np.float32)
311
- max_val = np.max(np.abs(y))
312
- if max_val > 0:
313
- y = y / max_val
314
 
315
- try:
316
- # Use real-time transcriber with optimized settings
317
- result = realtime_transcriber(
318
- {"sampling_rate": sr, "raw": y},
319
- generate_kwargs={
320
- "language": "english",
321
- "task": "transcribe",
322
- "temperature": 0.0, # More deterministic
323
- "no_repeat_ngram_size": 2, # Reduce repetitions
324
- }
325
- )
326
- return result["text"]
327
- except Exception as e:
328
- print(f"Transcription error: {e}")
329
- return "Could not transcribe audio. Please try again."
 
 
 
 
 
 
330
 
331
- """
332
 
333
 
334
  #Common Issue 1: Audio Format Problems
 
123
 
124
  # Function to convert tuple format to message format
125
  def convert_to_message_format(chat_history):
126
+ #Convert from [(user, bot)] format to [{"role": "user", "content": user}, {"role": "assistant", "content": bot}] format"""
127
  message_format = []
128
  for user_msg, bot_msg in chat_history:
129
  message_format.append({"role": "user", "content": user_msg})
 
132
 
133
  # Function to convert message format to tuple format for processing
134
  def convert_to_tuple_format(chat_history):
135
+ #Convert from message format back to tuple format for processing"""
136
  tuple_format = []
137
  for i in range(0, len(chat_history), 2):
138
  if i+1 < len(chat_history):
 
278
  """
279
  # Real-time Whisper setup - cache the model
280
  #@gr.cache_resource
281
+ #def load_realtime_whisper():
282
+ # """Load optimized Whisper model for real-time transcription"""
283
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
284
+ # torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
285
 
286
+ # # Use tiny model for real-time speed
287
+ # realtime_transcriber = pipeline(
288
+ # "automatic-speech-recognition",
289
+ # model="openai/whisper-tiny.en",
290
+ # device=device,
291
+ # torch_dtype=torch_dtype,
292
+ # )
293
 
294
+ # return realtime_transcriber
295
+
296
+
297
+
298
+
299
+
300
 
301
  # Load model at startup
302
+ #realtime_transcriber = load_realtime_whisper()
303
 
304
+ #def transcribe_audio(audio):
305
+ # """Real-time optimized transcription"""
306
+ # if audio is None:
307
+ # return ""
308
 
309
+ # sr, y = audio
310
 
311
  # Quick preprocessing
312
+ # if y.ndim > 1:
313
+ # y = y.mean(axis=1) # Convert to mono
 
 
 
 
 
314
 
315
+ # y = y.astype(np.float32)
316
+ # max_val = np.max(np.abs(y))
317
+ # if max_val > 0:
318
+ # y = y / max_val
319
+ #
320
+ # try:
321
+ # # Use real-time transcriber with optimized settings
322
+ # result = realtime_transcriber(
323
+ # {"sampling_rate": sr, "raw": y},
324
+ # generate_kwargs={
325
+ # "language": "english",
326
+ # "task": "transcribe",
327
+ # "temperature": 0.0, # More deterministic
328
+ # "no_repeat_ngram_size": 2, # Reduce repetitions
329
+ # }
330
+ # )
331
+ # return result["text"]
332
+ # except Exception as e:
333
+ # print(f"Transcription error: {e}")
334
+ # return "Could not transcribe audio. Please try again."
335
+
336
 
 
337
 
338
 
339
  #Common Issue 1: Audio Format Problems