Spaces:

build-small-hackathon
/

TeleAgent

Running

844d508 15 days ago

15.8 kB

	"""
	app.py — Telecalling Agent — Gradio 6 UI

	Layout
	──────
	┌─────────────────────────────────────────────────────┐
	│ 📞 AI Telecalling Agent [status badge] │
	├──────────────────────┬──────────────────────────────┤
	│ 🎤 LIVE CALL │ 📋 EXTRACTED DATA │
	│ ┌────────────────┐ │ [intent markdown table] │
	│ │ Audio stream │ │ │
	│ └────────────────┘ ├──────────────────────────────┤
	│ [Start] [End] │ 🤖 AGENT RESPONSE │
	│ ┌────────────────┐ │ [spoken response box] │
	│ │ Transcript │ │ │
	│ └────────────────┘ │ ✅ BOOKING CONFIRMED │
	│ │ [booking details box] │
	├──────────────────────┴──────────────────────────────┤
	│ 📁 CALL LOG │
	│ [dataframe — recent calls] │
	└─────────────────────────────────────────────────────┘
	"""

	import logging
	import os
	import json

	import gradio as gr
	import numpy as np
	from pipeline.transcriber import get_transcriber
	from pipeline.intent_parser import get_intent_parser
	from pipeline.evaluater import get_evaluator

	from config import APP_TITLE, APP_DESCRIPTION, SERVER_PORT, SERVER_NAME
	from pipeline.orchestrator import CallSession, PipelineUpdate
	from db import init_db

	# Load HuggingFace config and set token early
	try:
	with open("hf_config.json", "r") as f:
	hf_cfg = json.load(f)
	hf_token = hf_cfg.get("huggingface", {}).get("hub", {}).get("token", "")
	if hf_token and hf_token != "${HF_TOKEN}":
	os.environ["HF_TOKEN"] = hf_token
	except (FileNotFoundError, json.JSONDecodeError) as e:
	pass # hf_config.json not found or invalid, use env var if set

	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
	)
	logger = logging.getLogger(__name__)

	# Suppress verbose logs from HuggingFace hub
	logging.getLogger("httpx").setLevel(logging.WARNING)
	logging.getLogger("huggingface_hub").setLevel(logging.WARNING)
	logging.getLogger("transformers.modeling_utils").setLevel(logging.WARNING)

	# Initialize database on startup
	init_db()


	# ── CSS ───────────────────────────────────────────────────────────────────────

	CSS = """
	/* Global */
	.gradio-container { font-family: 'Inter', sans-serif; max-width: 1200px; }

	/* Status badge */
	#status-badge textarea {
	font-size: 0.9rem;
	font-weight: 600;
	text-align: center;
	border-radius: 20px;
	padding: 4px 12px;
	background: #f0fdf4;
	border: 1px solid #86efac;
	color: #166534;
	}

	/* Agent response */
	#agent-box textarea {
	font-size: 1.05rem;
	font-style: italic;
	background: #eff6ff;
	border: 1px solid #93c5fd;
	border-radius: 8px;
	color: #1e3a5f;
	min-height: 80px;
	}

	/* Booking confirmed */
	#booking-box textarea {
	background: #f0fdf4;
	border: 1px solid #4ade80;
	border-radius: 8px;
	color: #14532d;
	font-weight: 500;
	}

	/* Transcript */
	#transcript-box textarea {
	font-family: monospace;
	font-size: 0.85rem;
	background: #1e1e2e;
	color: #cdd6f4;
	border-radius: 8px;
	min-height: 180px;
	}

	/* VAD indicator dot */
	#vad-dot {
	text-align: center;
	font-size: 1.2rem;
	}

	/* Call buttons */
	.call-btn-start { background: #16a34a !important; color: white !important; }
	.call-btn-end { background: #dc2626 !important; color: white !important; }
	.call-btn-reset { background: #6b7280 !important; color: white !important; }

	/* Intent table inside Markdown */
	#intent-panel table { width: 100%; border-collapse: collapse; font-size: 0.88rem; }
	#intent-panel th, #intent-panel td {
	padding: 5px 10px;
	border: 1px solid #e2e8f0;
	text-align: left;
	}
	#intent-panel tr:nth-child(even) { background: #f8fafc; }

	/* Call log */
	#call-log { font-size: 0.82rem; }
	"""


	# ── UI helpers ────────────────────────────────────────────────────────────────

	def _format_transcript(lines: list[str]) -> str:
	if not lines:
	return "(waiting for speech…)"
	return "\n".join(f"[{i+1}] {l}" for i, l in enumerate(lines))


	def _format_booking(info: dict \| None) -> str:
	if not info:
	return ""
	return (
	f"✅ Booking #{info['booking_id']} confirmed!\n"
	f" 📅 {info['date']} 🕐 {info['time']} "
	f"({info['duration']} min)\n"
	f" 👤 {info['caller']} 📞 {info['type'].replace('_', ' ').title()}"
	)


	def _call_log_rows(records: list[dict]) -> list[list]:
	rows = []
	for r in records:
	ts = r.get("timestamp", "")[:16].replace("T", " ")
	rows.append([
	r.get("id", ""),
	ts,
	r.get("caller_name") or "—",
	r.get("intent") or "—",
	r.get("decision") or "—",
	r.get("status") or "—",
	])
	return rows


	# ── Gradio App ────────────────────────────────────────────────────────────────

	def build_app() -> gr.Blocks:

	with gr.Blocks(css=CSS, title=APP_TITLE, theme=gr.themes.Soft()) as demo:

	# ── Per-session state ──────────────────────────────────────────────
	# gr.State holds one CallSession object per browser tab.
	session_state = gr.State(value=None)

	# ── Header ─────────────────────────────────────────────────────────
	gr.Markdown(f"# {APP_TITLE}\n_{APP_DESCRIPTION}_")

	status_badge = gr.Textbox(
	value = "🟢 Ready — press Start Call",
	label = "",
	interactive = False,
	elem_id = "status-badge",
	)

	# ── Main row ───────────────────────────────────────────────────────
	with gr.Row():

	# ── Left column: call controls + transcript ────────────────────
	with gr.Column(scale=1):
	gr.Markdown("### 🎤 Live Call")

	audio_input = gr.Audio(
	sources = ["microphone"],
	streaming = True,
	type = "numpy",
	label = "Microphone input",
	interactive= True,
	elem_id = "audio-input",
	)

	gr.Markdown(
	"_Tip: click the microphone widget to grant browser permission, then speak. "
	"The call will start automatically on the first live audio input, or you can press 📞 Start Call._"
	)

	vad_dot = gr.Markdown("⚫ _mic idle_", elem_id="vad-dot")

	with gr.Row():
	btn_start = gr.Button(
	"📞 Start Call", variant="primary",
	elem_classes=["call-btn-start"],
	)
	btn_end = gr.Button(
	"📵 End Call", variant="stop",
	elem_classes=["call-btn-end"],
	)
	btn_reset = gr.Button(
	"🔄 Reset", variant="secondary",
	elem_classes=["call-btn-reset"],
	)

	transcript_box = gr.Textbox(
	label = "📝 Live Transcript",
	value = "(waiting for speech…)",
	lines = 8,
	max_lines = 20,
	interactive = False,
	elem_id = "transcript-box",
	)

	# ── Right column: intent + agent response + booking ────────────
	with gr.Column(scale=1):
	gr.Markdown("### 📋 Extracted Data")

	intent_panel = gr.Markdown(
	"_No data yet — waiting for first utterance…_",
	elem_id = "intent-panel",
	)

	gr.Markdown("### 🤖 Agent Response")

	agent_box = gr.Textbox(
	value = "",
	label = "",
	lines = 3,
	interactive = False,
	elem_id = "agent-box",
	placeholder = "Agent will respond here…",
	)

	booking_box = gr.Textbox(
	value = "",
	label = "📅 Booking Status",
	lines = 3,
	interactive = False,
	elem_id = "booking-box",
	visible = False,
	)

	# ── Call log ───────────────────────────────────────────────────────
	gr.Markdown("### 📁 Call Log")

	call_log_table = gr.Dataframe(
	headers = ["ID", "Timestamp", "Caller", "Intent", "Decision", "Status"],
	datatype = ["number", "str", "str", "str", "str", "str"],
	value = [],
	interactive = False,
	elem_id = "call-log",
	row_count = (5, "dynamic"),
	)

	# ── Helper: unpack PipelineUpdate → tuple of component values ─────
	def _unpack(u: PipelineUpdate):
	"""Return values in the exact order of outputs lists below."""
	vad_label = "🔴 _Speaking…_" if u.vad_speaking else "⚫ _mic idle_"
	booking_text = _format_booking(u.booking_confirmed)
	booking_visible = bool(booking_text)
	return (
	u.status, # status_badge
	vad_label, # vad_dot
	_format_transcript(u.transcript_lines), # transcript_box
	u.intent_md, # intent_panel
	u.agent_response, # agent_box
	booking_text, # booking_box value
	gr.update(visible=booking_visible), # booking_box visible
	_call_log_rows(u.call_log), # call_log_table
	)

	# ── All output components in one list (matches _unpack order) ─────
	ALL_OUTPUTS = [
	status_badge,
	vad_dot,
	transcript_box,
	intent_panel,
	agent_box,
	booking_box,
	booking_box, # second entry → gr.update(visible=…)
	call_log_table,
	]

	# ── Session factory ────────────────────────────────────────────────
	def _get_or_create_session(state):
	if state is None:
	state = CallSession()
	return state

	# ── Button callbacks ───────────────────────────────────────────────

	def on_start(state):
	state = _get_or_create_session(state)
	update = state.start_call()
	return (state, *_unpack(update))

	def on_end(state):
	state = _get_or_create_session(state)
	update = state.end_call()
	return (state, *_unpack(update))

	def on_reset(state):
	state = _get_or_create_session(state)
	update = state.reset()
	return (state, *_unpack(update))

	BTN_OUTPUTS = [session_state] + ALL_OUTPUTS

	btn_start.click(on_start, inputs=[session_state], outputs=BTN_OUTPUTS)
	btn_end.click (on_end, inputs=[session_state], outputs=BTN_OUTPUTS)
	btn_reset.click(on_reset, inputs=[session_state], outputs=BTN_OUTPUTS)

	# ── Audio streaming callback ───────────────────────────────────────
	# Fires every `stream_every` seconds with (sample_rate, np.ndarray).
	# We pass the current session state in and get it back (updated).

	def on_audio_stream(audio_chunk, state):
	"""
	Called by Gradio every 0.5 s while the mic is active.
	audio_chunk: (sample_rate: int, data: np.ndarray) \| None
	"""
	state = _get_or_create_session(state)

	if audio_chunk is not None and not state.call_active:
	logger.info("Auto-starting call session on first live audio input.")
	state.start_call()

	if not state.call_active:
	# Return current state without processing
	u = state._build_update()
	return (state, *_unpack(u))

	if audio_chunk is None:
	u = state._build_update()
	return (state, *_unpack(u))

	sample_rate, audio_np = audio_chunk

	# Ensure float32 mono
	audio_np = np.array(audio_np, dtype=np.float32)
	if audio_np.ndim == 2:
	audio_np = audio_np.mean(axis=1)

	update = state.process_audio_chunk(sample_rate, audio_np)
	return (state, *_unpack(update))

	audio_input.stream(
	fn = on_audio_stream,
	inputs = [audio_input, session_state],
	outputs = [session_state] + ALL_OUTPUTS,
	stream_every = 0.5, # seconds — half-second chunks
	time_limit = 3600, # allow up to 1-hour calls
	)

	return demo


	# ── Entry point ───────────────────────────────────────────────────────────────

	if __name__ == "__main__":
	logger.info("Starting Gradio app; prefetching deployed ASR model if needed...")
	try:
	get_transcriber().prefetch()
	except Exception as exc:
	logger.error(
	"ASR prefetch failed at startup; continuing with lazy loading: %s",
	exc,
	)

	app = build_app()
	app.launch(
	server_name = SERVER_NAME,
	server_port = SERVER_PORT,
	show_error = True,
	)