Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,7 @@ import time
|
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
from gtts import gTTS
|
| 12 |
import io
|
|
|
|
| 13 |
|
| 14 |
# Load environment variables
|
| 15 |
load_dotenv()
|
|
@@ -379,10 +380,20 @@ def process_voice_note(audio_file, history):
|
|
| 379 |
if audio_file is None:
|
| 380 |
return "Please record or upload an audio file.", history, "", None, None
|
| 381 |
try:
|
|
|
|
| 382 |
# If audio_file is a string (filepath), open it as a file
|
| 383 |
if isinstance(audio_file, str):
|
| 384 |
with open(audio_file, "rb") as f:
|
| 385 |
transcript = rag.transcribe_audio(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
else:
|
| 387 |
transcript = rag.transcribe_audio(audio_file)
|
| 388 |
if not transcript or not str(transcript).strip():
|
|
@@ -414,37 +425,34 @@ with gr.Blocks(css=custom_css, title="Document Q&A System") as demo:
|
|
| 414 |
audio_input = gr.Audio(type="filepath", label="Record or Upload Audio", elem_classes="gradio-audio", visible=False)
|
| 415 |
tts_output = gr.Audio(label="Assistant Voice Reply", interactive=False, visible=False)
|
| 416 |
|
| 417 |
-
with gr.
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
file_input = gr.File(label="Upload Document", file_types=[".pdf", ".txt", ".doc", ".docx"], file_count="single", type="binary", elem_classes="upload-btn")
|
| 422 |
mic_btn = gr.Button("🎤 Record Voice", elem_classes="audio-btn")
|
| 423 |
audio_input
|
| 424 |
send_voice_btn = gr.Button("Send Voice Note", elem_classes="send-btn", visible=False)
|
| 425 |
reset_btn = gr.Button("Reset Chat & Upload New Document", elem_classes="reset-btn")
|
| 426 |
file_output
|
| 427 |
-
|
| 428 |
-
def reset_all():
|
| 429 |
-
rag.thread_id = None
|
| 430 |
-
return "", [], "", None, None
|
| 431 |
-
reset_btn.click(reset_all, None, [file_output, chatbot, question, audio_input, tts_output])
|
| 432 |
-
def show_audio():
|
| 433 |
-
return {audio_input: gr.update(visible=True), send_voice_btn: gr.update(visible=True)}
|
| 434 |
-
mic_btn.click(show_audio, None, [audio_input, send_voice_btn])
|
| 435 |
-
def hide_audio():
|
| 436 |
-
return {audio_input: gr.update(visible=False), send_voice_btn: gr.update(visible=False)}
|
| 437 |
-
send_voice_btn.click(process_voice_note, [audio_input, chatbot], [file_output, chatbot, question, audio_input, tts_output])
|
| 438 |
-
send_voice_btn.click(hide_audio, None, [audio_input, send_voice_btn])
|
| 439 |
-
tts_output
|
| 440 |
-
with gr.Column(scale=3, min_width=500):
|
| 441 |
-
with gr.Group(elem_classes="compact-box"):
|
| 442 |
chatbot
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 448 |
|
| 449 |
# Add JavaScript for audio handling
|
| 450 |
demo.load(
|
|
|
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
from gtts import gTTS
|
| 12 |
import io
|
| 13 |
+
import numpy as np
|
| 14 |
|
| 15 |
# Load environment variables
|
| 16 |
load_dotenv()
|
|
|
|
| 380 |
if audio_file is None:
|
| 381 |
return "Please record or upload an audio file.", history, "", None, None
|
| 382 |
try:
|
| 383 |
+
transcript = None
|
| 384 |
# If audio_file is a string (filepath), open it as a file
|
| 385 |
if isinstance(audio_file, str):
|
| 386 |
with open(audio_file, "rb") as f:
|
| 387 |
transcript = rag.transcribe_audio(f)
|
| 388 |
+
# If audio_file is a tuple (sample_rate, np.ndarray), save as temp WAV and open
|
| 389 |
+
elif isinstance(audio_file, tuple) and isinstance(audio_file[1], np.ndarray):
|
| 390 |
+
import soundfile as sf
|
| 391 |
+
sample_rate, audio_data = audio_file
|
| 392 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
| 393 |
+
sf.write(tmp.name, audio_data, sample_rate)
|
| 394 |
+
tmp.flush()
|
| 395 |
+
with open(tmp.name, "rb") as f:
|
| 396 |
+
transcript = rag.transcribe_audio(f)
|
| 397 |
else:
|
| 398 |
transcript = rag.transcribe_audio(audio_file)
|
| 399 |
if not transcript or not str(transcript).strip():
|
|
|
|
| 425 |
audio_input = gr.Audio(type="filepath", label="Record or Upload Audio", elem_classes="gradio-audio", visible=False)
|
| 426 |
tts_output = gr.Audio(label="Assistant Voice Reply", interactive=False, visible=False)
|
| 427 |
|
| 428 |
+
with gr.Group(elem_classes="compact-box"):
|
| 429 |
+
gr.Markdown("<div class='section-title'>Document Q&A</div>")
|
| 430 |
+
with gr.Row():
|
| 431 |
+
with gr.Column(scale=1, min_width=350):
|
| 432 |
file_input = gr.File(label="Upload Document", file_types=[".pdf", ".txt", ".doc", ".docx"], file_count="single", type="binary", elem_classes="upload-btn")
|
| 433 |
mic_btn = gr.Button("🎤 Record Voice", elem_classes="audio-btn")
|
| 434 |
audio_input
|
| 435 |
send_voice_btn = gr.Button("Send Voice Note", elem_classes="send-btn", visible=False)
|
| 436 |
reset_btn = gr.Button("Reset Chat & Upload New Document", elem_classes="reset-btn")
|
| 437 |
file_output
|
| 438 |
+
with gr.Column(scale=3, min_width=500):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
chatbot
|
| 440 |
+
with gr.Row():
|
| 441 |
+
question
|
| 442 |
+
file_input.change(process_file, file_input, file_output)
|
| 443 |
+
def reset_all():
|
| 444 |
+
rag.thread_id = None
|
| 445 |
+
return "", [], "", None, None
|
| 446 |
+
reset_btn.click(reset_all, None, [file_output, chatbot, question, audio_input, tts_output])
|
| 447 |
+
def show_audio():
|
| 448 |
+
return {audio_input: gr.update(visible=True), send_voice_btn: gr.update(visible=True)}
|
| 449 |
+
mic_btn.click(show_audio, None, [audio_input, send_voice_btn])
|
| 450 |
+
def hide_audio():
|
| 451 |
+
return {audio_input: gr.update(visible=False), send_voice_btn: gr.update(visible=False)}
|
| 452 |
+
send_voice_btn.click(process_voice_note, [audio_input, chatbot], [file_output, chatbot, question, audio_input, tts_output])
|
| 453 |
+
send_voice_btn.click(hide_audio, None, [audio_input, send_voice_btn])
|
| 454 |
+
question.submit(process_question, [question, chatbot], [question, chatbot, question, audio_input])
|
| 455 |
+
tts_output
|
| 456 |
|
| 457 |
# Add JavaScript for audio handling
|
| 458 |
demo.load(
|