import gradio as gr from chat import response_ai from voice import speak_text, transcribe_audio CSS = """ #record-box { background: rgb(39, 39, 42) !important; border-radius: 8px !important; padding: 8px !important; } #record-box #mic { width: 100% !important; background: transparent !important; } #record-box #mic .wrap, #record-box #mic .form, #record-box #mic .empty, #record-box #mic .audio-container { background: transparent !important; } #record-box #mic .empty, #record-box #mic .audio-container { min-height: 0 !important; } #reply-audio { position: absolute !important; width: 1px !important; height: 1px !important; overflow: hidden !important; opacity: 0 !important; pointer-events: none !important; } """ RESET_AUDIO_JS = """ () => { const app = document.querySelector('gradio-app'); const root = (app && app.shadowRoot) ? app.shadowRoot : document; const c = root.querySelector('#reply-audio'); if (!c) return; const m = c.querySelector('audio, video'); if (!m) return; const play = () => { try { m.currentTime = 0; } catch (e) {} const p = m.play(); if (p && p.catch) p.catch(() => {}); }; if (m.readyState >= 2) play(); else m.addEventListener('loadeddata', play, { once: true }); } """ def chat_message(role: str, text: str) -> dict: """Gradio 6 messages format with structured content blocks.""" return {"role": role, "content": [{"type": "text", "text": text}]} def voice_transcribe(audio, history): """Step 1: transcribe speech, show it as the user message, and clear the mic.""" history = history or [] if audio is None: return history, None try: transcript = transcribe_audio(audio) except Exception as e: return history + [chat_message("assistant", f"Error: {e}")], None if not transcript: return history, None return history + [chat_message("user", transcript)], None def voice_reply(history): """Step 2: LLM reply + autoplayed speech.""" history = history or [] if not history or history[-1]["role"] != "user": yield history, None return try: reply = response_ai(history) audio_path = speak_text(reply) except Exception as e: yield history + [chat_message("assistant", f"Error: {e}")], None return yield history + [chat_message("assistant", reply)], audio_path def add_user_message(message, history): """Step 1: show the user message and clear the textbox immediately.""" history = history or [] if not message or not message.strip(): return history, message return history + [chat_message("user", message.strip())], "" def bot_reply(history): """Step 2: generate the assistant reply in chat only (no TTS).""" history = history or [] if not history or history[-1]["role"] != "user": yield history return try: reply = response_ai(history) except Exception as e: yield history + [chat_message("assistant", f"Error: {e}")] return yield history + [chat_message("assistant", reply)] def create_demo(on_load=None) -> gr.Blocks: with gr.Blocks(title="Kush Digital Twin — Voice") as demo: chatbot = gr.Chatbot(show_label=False, height=320, autoscroll=True) text_in = gr.Textbox(show_label=False, placeholder="Hey I'm Kush, Type a message or record a message to start the conversation", container=False) with gr.Column(elem_id="record-box"): mic = gr.Audio( sources=["microphone"], type="filepath", show_label=False, container=False, elem_id="mic", ) audio_out = gr.Audio(autoplay=True, interactive=False, elem_id="reply-audio", buttons=[]) mic.stop_recording( voice_transcribe, [mic, chatbot], [chatbot, mic], queue=False, ).then(voice_reply, chatbot, [chatbot, audio_out]) text_in.submit( add_user_message, [text_in, chatbot], [chatbot, text_in], queue=False, ).then(bot_reply, chatbot, chatbot) audio_out.change(None, None, None, js=RESET_AUDIO_JS) if on_load is not None: demo.load(on_load, None, None, queue=False) demo.queue(default_concurrency_limit=1) demo._deprecated_css = CSS return demo