Spaces:
Running
Running
| """ | |
| app.py β Telecalling Agent β Gradio 6 UI | |
| Layout | |
| ββββββ | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β π AI Telecalling Agent [status badge] β | |
| ββββββββββββββββββββββββ¬βββββββββββββββββββββββββββββββ€ | |
| β π€ LIVE CALL β π EXTRACTED DATA β | |
| β ββββββββββββββββββ β [intent markdown table] β | |
| β β Audio stream β β β | |
| β ββββββββββββββββββ ββββββββββββββββββββββββββββββββ€ | |
| β [Start] [End] β π€ AGENT RESPONSE β | |
| β ββββββββββββββββββ β [spoken response box] β | |
| β β Transcript β β β | |
| β ββββββββββββββββββ β β BOOKING CONFIRMED β | |
| β β [booking details box] β | |
| ββββββββββββββββββββββββ΄βββββββββββββββββββββββββββββββ€ | |
| β π CALL LOG β | |
| β [dataframe β recent calls] β | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| """ | |
| import logging | |
| import os | |
| import json | |
| import gradio as gr | |
| import numpy as np | |
| from pipeline.transcriber import get_transcriber | |
| from pipeline.intent_parser import get_intent_parser | |
| from pipeline.evaluater import get_evaluator | |
| from config import APP_TITLE, APP_DESCRIPTION, SERVER_PORT, SERVER_NAME | |
| from pipeline.orchestrator import CallSession, PipelineUpdate | |
| from db import init_db | |
| # Load HuggingFace config and set token early | |
| try: | |
| with open("hf_config.json", "r") as f: | |
| hf_cfg = json.load(f) | |
| hf_token = hf_cfg.get("huggingface", {}).get("hub", {}).get("token", "") | |
| if hf_token and hf_token != "${HF_TOKEN}": | |
| os.environ["HF_TOKEN"] = hf_token | |
| except (FileNotFoundError, json.JSONDecodeError) as e: | |
| pass # hf_config.json not found or invalid, use env var if set | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # Suppress verbose logs from HuggingFace hub | |
| logging.getLogger("httpx").setLevel(logging.WARNING) | |
| logging.getLogger("huggingface_hub").setLevel(logging.WARNING) | |
| logging.getLogger("transformers.modeling_utils").setLevel(logging.WARNING) | |
| # Initialize database on startup | |
| init_db() | |
| # ββ CSS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CSS = """ | |
| /* Global */ | |
| .gradio-container { font-family: 'Inter', sans-serif; max-width: 1200px; } | |
| /* Status badge */ | |
| #status-badge textarea { | |
| font-size: 0.9rem; | |
| font-weight: 600; | |
| text-align: center; | |
| border-radius: 20px; | |
| padding: 4px 12px; | |
| background: #f0fdf4; | |
| border: 1px solid #86efac; | |
| color: #166534; | |
| } | |
| /* Agent response */ | |
| #agent-box textarea { | |
| font-size: 1.05rem; | |
| font-style: italic; | |
| background: #eff6ff; | |
| border: 1px solid #93c5fd; | |
| border-radius: 8px; | |
| color: #1e3a5f; | |
| min-height: 80px; | |
| } | |
| /* Booking confirmed */ | |
| #booking-box textarea { | |
| background: #f0fdf4; | |
| border: 1px solid #4ade80; | |
| border-radius: 8px; | |
| color: #14532d; | |
| font-weight: 500; | |
| } | |
| /* Transcript */ | |
| #transcript-box textarea { | |
| font-family: monospace; | |
| font-size: 0.85rem; | |
| background: #1e1e2e; | |
| color: #cdd6f4; | |
| border-radius: 8px; | |
| min-height: 180px; | |
| } | |
| /* VAD indicator dot */ | |
| #vad-dot { | |
| text-align: center; | |
| font-size: 1.2rem; | |
| } | |
| /* Call buttons */ | |
| .call-btn-start { background: #16a34a !important; color: white !important; } | |
| .call-btn-end { background: #dc2626 !important; color: white !important; } | |
| .call-btn-reset { background: #6b7280 !important; color: white !important; } | |
| /* Intent table inside Markdown */ | |
| #intent-panel table { width: 100%; border-collapse: collapse; font-size: 0.88rem; } | |
| #intent-panel th, #intent-panel td { | |
| padding: 5px 10px; | |
| border: 1px solid #e2e8f0; | |
| text-align: left; | |
| } | |
| #intent-panel tr:nth-child(even) { background: #f8fafc; } | |
| /* Call log */ | |
| #call-log { font-size: 0.82rem; } | |
| """ | |
| # ββ UI helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _format_transcript(lines: list[str]) -> str: | |
| if not lines: | |
| return "(waiting for speechβ¦)" | |
| return "\n".join(f"[{i+1}] {l}" for i, l in enumerate(lines)) | |
| def _format_booking(info: dict | None) -> str: | |
| if not info: | |
| return "" | |
| return ( | |
| f"β Booking #{info['booking_id']} confirmed!\n" | |
| f" π {info['date']} π {info['time']} " | |
| f"({info['duration']} min)\n" | |
| f" π€ {info['caller']} π {info['type'].replace('_', ' ').title()}" | |
| ) | |
| def _call_log_rows(records: list[dict]) -> list[list]: | |
| rows = [] | |
| for r in records: | |
| ts = r.get("timestamp", "")[:16].replace("T", " ") | |
| rows.append([ | |
| r.get("id", ""), | |
| ts, | |
| r.get("caller_name") or "β", | |
| r.get("intent") or "β", | |
| r.get("decision") or "β", | |
| r.get("status") or "β", | |
| ]) | |
| return rows | |
| # ββ Gradio App ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_app() -> gr.Blocks: | |
| with gr.Blocks(css=CSS, title=APP_TITLE, theme=gr.themes.Soft()) as demo: | |
| # ββ Per-session state ββββββββββββββββββββββββββββββββββββββββββββββ | |
| # gr.State holds one CallSession object per browser tab. | |
| session_state = gr.State(value=None) | |
| # ββ Header βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.Markdown(f"# {APP_TITLE}\n_{APP_DESCRIPTION}_") | |
| status_badge = gr.Textbox( | |
| value = "π’ Ready β press Start Call", | |
| label = "", | |
| interactive = False, | |
| elem_id = "status-badge", | |
| ) | |
| # ββ Main row βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Row(): | |
| # ββ Left column: call controls + transcript ββββββββββββββββββββ | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π€ Live Call") | |
| audio_input = gr.Audio( | |
| sources = ["microphone"], | |
| streaming = True, | |
| type = "numpy", | |
| label = "Microphone input", | |
| interactive= True, | |
| elem_id = "audio-input", | |
| ) | |
| gr.Markdown( | |
| "_Tip: click the microphone widget to grant browser permission, then speak. " | |
| "The call will start automatically on the first live audio input, or you can press π Start Call._" | |
| ) | |
| vad_dot = gr.Markdown("β« _mic idle_", elem_id="vad-dot") | |
| with gr.Row(): | |
| btn_start = gr.Button( | |
| "π Start Call", variant="primary", | |
| elem_classes=["call-btn-start"], | |
| ) | |
| btn_end = gr.Button( | |
| "π΅ End Call", variant="stop", | |
| elem_classes=["call-btn-end"], | |
| ) | |
| btn_reset = gr.Button( | |
| "π Reset", variant="secondary", | |
| elem_classes=["call-btn-reset"], | |
| ) | |
| transcript_box = gr.Textbox( | |
| label = "π Live Transcript", | |
| value = "(waiting for speechβ¦)", | |
| lines = 8, | |
| max_lines = 20, | |
| interactive = False, | |
| elem_id = "transcript-box", | |
| ) | |
| # ββ Right column: intent + agent response + booking ββββββββββββ | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Extracted Data") | |
| intent_panel = gr.Markdown( | |
| "_No data yet β waiting for first utteranceβ¦_", | |
| elem_id = "intent-panel", | |
| ) | |
| gr.Markdown("### π€ Agent Response") | |
| agent_box = gr.Textbox( | |
| value = "", | |
| label = "", | |
| lines = 3, | |
| interactive = False, | |
| elem_id = "agent-box", | |
| placeholder = "Agent will respond hereβ¦", | |
| ) | |
| booking_box = gr.Textbox( | |
| value = "", | |
| label = "π Booking Status", | |
| lines = 3, | |
| interactive = False, | |
| elem_id = "booking-box", | |
| visible = False, | |
| ) | |
| # ββ Call log βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.Markdown("### π Call Log") | |
| call_log_table = gr.Dataframe( | |
| headers = ["ID", "Timestamp", "Caller", "Intent", "Decision", "Status"], | |
| datatype = ["number", "str", "str", "str", "str", "str"], | |
| value = [], | |
| interactive = False, | |
| elem_id = "call-log", | |
| row_count = (5, "dynamic"), | |
| ) | |
| # ββ Helper: unpack PipelineUpdate β tuple of component values βββββ | |
| def _unpack(u: PipelineUpdate): | |
| """Return values in the exact order of outputs lists below.""" | |
| vad_label = "π΄ _Speakingβ¦_" if u.vad_speaking else "β« _mic idle_" | |
| booking_text = _format_booking(u.booking_confirmed) | |
| booking_visible = bool(booking_text) | |
| return ( | |
| u.status, # status_badge | |
| vad_label, # vad_dot | |
| _format_transcript(u.transcript_lines), # transcript_box | |
| u.intent_md, # intent_panel | |
| u.agent_response, # agent_box | |
| booking_text, # booking_box value | |
| gr.update(visible=booking_visible), # booking_box visible | |
| _call_log_rows(u.call_log), # call_log_table | |
| ) | |
| # ββ All output components in one list (matches _unpack order) βββββ | |
| ALL_OUTPUTS = [ | |
| status_badge, | |
| vad_dot, | |
| transcript_box, | |
| intent_panel, | |
| agent_box, | |
| booking_box, | |
| booking_box, # second entry β gr.update(visible=β¦) | |
| call_log_table, | |
| ] | |
| # ββ Session factory ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _get_or_create_session(state): | |
| if state is None: | |
| state = CallSession() | |
| return state | |
| # ββ Button callbacks βββββββββββββββββββββββββββββββββββββββββββββββ | |
| def on_start(state): | |
| state = _get_or_create_session(state) | |
| update = state.start_call() | |
| return (state, *_unpack(update)) | |
| def on_end(state): | |
| state = _get_or_create_session(state) | |
| update = state.end_call() | |
| return (state, *_unpack(update)) | |
| def on_reset(state): | |
| state = _get_or_create_session(state) | |
| update = state.reset() | |
| return (state, *_unpack(update)) | |
| BTN_OUTPUTS = [session_state] + ALL_OUTPUTS | |
| btn_start.click(on_start, inputs=[session_state], outputs=BTN_OUTPUTS) | |
| btn_end.click (on_end, inputs=[session_state], outputs=BTN_OUTPUTS) | |
| btn_reset.click(on_reset, inputs=[session_state], outputs=BTN_OUTPUTS) | |
| # ββ Audio streaming callback βββββββββββββββββββββββββββββββββββββββ | |
| # Fires every `stream_every` seconds with (sample_rate, np.ndarray). | |
| # We pass the current session state in and get it back (updated). | |
| def on_audio_stream(audio_chunk, state): | |
| """ | |
| Called by Gradio every 0.5 s while the mic is active. | |
| audio_chunk: (sample_rate: int, data: np.ndarray) | None | |
| """ | |
| state = _get_or_create_session(state) | |
| if audio_chunk is not None and not state.call_active: | |
| logger.info("Auto-starting call session on first live audio input.") | |
| state.start_call() | |
| if not state.call_active: | |
| # Return current state without processing | |
| u = state._build_update() | |
| return (state, *_unpack(u)) | |
| if audio_chunk is None: | |
| u = state._build_update() | |
| return (state, *_unpack(u)) | |
| sample_rate, audio_np = audio_chunk | |
| # Ensure float32 mono | |
| audio_np = np.array(audio_np, dtype=np.float32) | |
| if audio_np.ndim == 2: | |
| audio_np = audio_np.mean(axis=1) | |
| update = state.process_audio_chunk(sample_rate, audio_np) | |
| return (state, *_unpack(update)) | |
| audio_input.stream( | |
| fn = on_audio_stream, | |
| inputs = [audio_input, session_state], | |
| outputs = [session_state] + ALL_OUTPUTS, | |
| stream_every = 0.5, # seconds β half-second chunks | |
| time_limit = 3600, # allow up to 1-hour calls | |
| ) | |
| return demo | |
| # ββ Entry point βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if __name__ == "__main__": | |
| logger.info("Starting Gradio app; prefetching deployed ASR model if needed...") | |
| try: | |
| get_transcriber().prefetch() | |
| except Exception as exc: | |
| logger.error( | |
| "ASR prefetch failed at startup; continuing with lazy loading: %s", | |
| exc, | |
| ) | |
| app = build_app() | |
| app.launch( | |
| server_name = SERVER_NAME, | |
| server_port = SERVER_PORT, | |
| show_error = True, | |
| ) |