DreamStream-1 commited on
Commit
0f50d71
·
verified ·
1 Parent(s): 6b9dfec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -24
app.py CHANGED
@@ -10,6 +10,7 @@ import time
10
  from dotenv import load_dotenv
11
  from gtts import gTTS
12
  import io
 
13
 
14
  # Load environment variables
15
  load_dotenv()
@@ -379,10 +380,20 @@ def process_voice_note(audio_file, history):
379
  if audio_file is None:
380
  return "Please record or upload an audio file.", history, "", None, None
381
  try:
 
382
  # If audio_file is a string (filepath), open it as a file
383
  if isinstance(audio_file, str):
384
  with open(audio_file, "rb") as f:
385
  transcript = rag.transcribe_audio(f)
 
 
 
 
 
 
 
 
 
386
  else:
387
  transcript = rag.transcribe_audio(audio_file)
388
  if not transcript or not str(transcript).strip():
@@ -414,37 +425,34 @@ with gr.Blocks(css=custom_css, title="Document Q&A System") as demo:
414
  audio_input = gr.Audio(type="filepath", label="Record or Upload Audio", elem_classes="gradio-audio", visible=False)
415
  tts_output = gr.Audio(label="Assistant Voice Reply", interactive=False, visible=False)
416
 
417
- with gr.Row():
418
- with gr.Column(scale=1, min_width=350):
419
- with gr.Group(elem_classes="compact-box"):
420
- gr.Markdown("<div class='section-title'>Document Q&A Controls</div>")
421
  file_input = gr.File(label="Upload Document", file_types=[".pdf", ".txt", ".doc", ".docx"], file_count="single", type="binary", elem_classes="upload-btn")
422
  mic_btn = gr.Button("🎤 Record Voice", elem_classes="audio-btn")
423
  audio_input
424
  send_voice_btn = gr.Button("Send Voice Note", elem_classes="send-btn", visible=False)
425
  reset_btn = gr.Button("Reset Chat & Upload New Document", elem_classes="reset-btn")
426
  file_output
427
- file_input.change(process_file, file_input, file_output)
428
- def reset_all():
429
- rag.thread_id = None
430
- return "", [], "", None, None
431
- reset_btn.click(reset_all, None, [file_output, chatbot, question, audio_input, tts_output])
432
- def show_audio():
433
- return {audio_input: gr.update(visible=True), send_voice_btn: gr.update(visible=True)}
434
- mic_btn.click(show_audio, None, [audio_input, send_voice_btn])
435
- def hide_audio():
436
- return {audio_input: gr.update(visible=False), send_voice_btn: gr.update(visible=False)}
437
- send_voice_btn.click(process_voice_note, [audio_input, chatbot], [file_output, chatbot, question, audio_input, tts_output])
438
- send_voice_btn.click(hide_audio, None, [audio_input, send_voice_btn])
439
- tts_output
440
- with gr.Column(scale=3, min_width=500):
441
- with gr.Group(elem_classes="compact-box"):
442
  chatbot
443
- with gr.Row():
444
- question
445
- send_btn = gr.Button("Send", elem_classes="send-btn")
446
- send_btn.click(process_question, [question, chatbot], [question, chatbot, question, audio_input])
447
- question.submit(process_question, [question, chatbot], [question, chatbot, question, audio_input])
 
 
 
 
 
 
 
 
 
 
 
448
 
449
  # Add JavaScript for audio handling
450
  demo.load(
 
10
  from dotenv import load_dotenv
11
  from gtts import gTTS
12
  import io
13
+ import numpy as np
14
 
15
  # Load environment variables
16
  load_dotenv()
 
380
  if audio_file is None:
381
  return "Please record or upload an audio file.", history, "", None, None
382
  try:
383
+ transcript = None
384
  # If audio_file is a string (filepath), open it as a file
385
  if isinstance(audio_file, str):
386
  with open(audio_file, "rb") as f:
387
  transcript = rag.transcribe_audio(f)
388
+ # If audio_file is a tuple (sample_rate, np.ndarray), save as temp WAV and open
389
+ elif isinstance(audio_file, tuple) and isinstance(audio_file[1], np.ndarray):
390
+ import soundfile as sf
391
+ sample_rate, audio_data = audio_file
392
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
393
+ sf.write(tmp.name, audio_data, sample_rate)
394
+ tmp.flush()
395
+ with open(tmp.name, "rb") as f:
396
+ transcript = rag.transcribe_audio(f)
397
  else:
398
  transcript = rag.transcribe_audio(audio_file)
399
  if not transcript or not str(transcript).strip():
 
425
  audio_input = gr.Audio(type="filepath", label="Record or Upload Audio", elem_classes="gradio-audio", visible=False)
426
  tts_output = gr.Audio(label="Assistant Voice Reply", interactive=False, visible=False)
427
 
428
+ with gr.Group(elem_classes="compact-box"):
429
+ gr.Markdown("<div class='section-title'>Document Q&A</div>")
430
+ with gr.Row():
431
+ with gr.Column(scale=1, min_width=350):
432
  file_input = gr.File(label="Upload Document", file_types=[".pdf", ".txt", ".doc", ".docx"], file_count="single", type="binary", elem_classes="upload-btn")
433
  mic_btn = gr.Button("🎤 Record Voice", elem_classes="audio-btn")
434
  audio_input
435
  send_voice_btn = gr.Button("Send Voice Note", elem_classes="send-btn", visible=False)
436
  reset_btn = gr.Button("Reset Chat & Upload New Document", elem_classes="reset-btn")
437
  file_output
438
+ with gr.Column(scale=3, min_width=500):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
  chatbot
440
+ with gr.Row():
441
+ question
442
+ file_input.change(process_file, file_input, file_output)
443
+ def reset_all():
444
+ rag.thread_id = None
445
+ return "", [], "", None, None
446
+ reset_btn.click(reset_all, None, [file_output, chatbot, question, audio_input, tts_output])
447
+ def show_audio():
448
+ return {audio_input: gr.update(visible=True), send_voice_btn: gr.update(visible=True)}
449
+ mic_btn.click(show_audio, None, [audio_input, send_voice_btn])
450
+ def hide_audio():
451
+ return {audio_input: gr.update(visible=False), send_voice_btn: gr.update(visible=False)}
452
+ send_voice_btn.click(process_voice_note, [audio_input, chatbot], [file_output, chatbot, question, audio_input, tts_output])
453
+ send_voice_btn.click(hide_audio, None, [audio_input, send_voice_btn])
454
+ question.submit(process_question, [question, chatbot], [question, chatbot, question, audio_input])
455
+ tts_output
456
 
457
  # Add JavaScript for audio handling
458
  demo.load(