Spaces:

NinjainPJs
/

VoiceVault

Running

App Files Files Community

VoiceVault / ui /tabs /ask_tab.py

NinjainPJs

Initial release: VoiceVault v1.0.0 — Voice-First RAG Knowledge Agent

85f900d 3 months ago

raw

history blame contribute delete

10.3 kB

	"""
	ui.tabs.ask_tab
	===============
	Tab 1 — Ask VoiceVault

	Full voice-to-answer pipeline:
	1. Microphone → Whisper ASR → editable transcript
	2. KB multi-selector + Ask button
	3. HybridRetriever search → LLM answer (via AnswerChain)
	4. Chatbot history display
	5. Citation panel (collapsible)
	6. Web Speech API TTS speak button
	7. Conversation history via gr.State
	"""

	from __future__ import annotations

	import hashlib
	import logging
	import uuid
	from pathlib import Path
	from typing import Optional

	import gradio as gr

	from ui.components.citation_panel import format_citations_markdown

	logger = logging.getLogger(__name__)

	# ------------------------------------------------------------------ #
	# Public Builder #
	# ------------------------------------------------------------------ #


	def build_ask_tab(
	transcriber,
	answer_chain,
	kb_manager,
	db_path: Path,
	) -> None:
	"""
	Build and render the Ask VoiceVault tab.

	Args:
	transcriber: WhisperTranscriber instance (lazy-loads model).
	answer_chain: AnswerChain instance (Groq → Gemini fallback).
	kb_manager: KBManager for listing available KBs.
	db_path: Path to the central SQLite database (for query logging).
	"""
	history_state: gr.State = gr.State([])

	# ── Layout ─────────────────────────────────────────────────────────
	gr.Markdown("## 🎙️ Ask VoiceVault")
	gr.Markdown("Record audio or type your question, select knowledge bases, then click Ask.")

	with gr.Row():
	# Left column — input controls
	with gr.Column(scale=2):
	audio_input = gr.Audio(
	label="🎤 Speak your question",
	sources=["microphone"],
	type="filepath",
	interactive=True,
	)
	transcript_box = gr.Textbox(
	label="Question (editable — modify transcript if needed)",
	placeholder="Record audio above, or type your question here...",
	interactive=True,
	lines=2,
	)

	with gr.Row():
	kb_selector = gr.Dropdown(
	label="Knowledge Bases to query",
	choices=_get_kb_choices(kb_manager),
	multiselect=True,
	interactive=True,
	scale=5,
	)
	refresh_btn = gr.Button("🔄", scale=0, min_width=48, variant="secondary")

	ask_btn = gr.Button("🔍 Ask VoiceVault", variant="primary", size="lg")

	# Right column — output
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(
	label="Answer",
	height=430,
	value=[],
	)
	with gr.Accordion("📋 Citations", open=False):
	citations_md = gr.Markdown("No citations yet.")

	with gr.Row():
	speak_btn = gr.Button("🔊 Speak Answer", variant="secondary", scale=2)
	clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary", scale=1)

	# Hidden state for last answer (for TTS)
	last_answer_state: gr.State = gr.State("")

	# ── Event Handlers ──────────────────────────────────────────────────

	# 1. Audio recorded → transcribe
	audio_input.stop_recording(
	fn=_make_transcribe_fn(transcriber),
	inputs=[audio_input],
	outputs=[transcript_box],
	)

	# 2. Refresh KB list
	refresh_btn.click(
	fn=lambda: gr.update(choices=_get_kb_choices(kb_manager)),
	outputs=[kb_selector],
	)

	# 3. Ask button → full pipeline
	ask_btn.click(
	fn=_make_query_fn(answer_chain, db_path),
	inputs=[transcript_box, kb_selector, history_state, chatbot],
	outputs=[chatbot, citations_md, history_state, last_answer_state],
	)

	# 4. Clear chat
	clear_btn.click(
	fn=lambda: ([], "No citations yet.", [], ""),
	outputs=[chatbot, citations_md, history_state, last_answer_state],
	)

	# 5. Speak answer — passes text to JS via a hidden gr.Textbox trigger
	speak_output = gr.Textbox(visible=False, elem_id="vv-tts-trigger")
	speak_btn.click(
	fn=lambda answer: answer,
	inputs=[last_answer_state],
	outputs=[speak_output],
	)


	# ------------------------------------------------------------------ #
	# Handler Factories #
	# ------------------------------------------------------------------ #


	def _make_transcribe_fn(transcriber):
	"""Return an audio transcription handler that captures `transcriber` in closure."""

	def _transcribe(audio_path: Optional[str]) -> str:
	if audio_path is None:
	return ""
	try:
	from voicevault.asr.whisper_transcriber import WhisperTranscriberError
	result = transcriber.transcribe(Path(audio_path))
	logger.info("Transcribed: '%s' (%s)", result.transcript[:60], result.query_type)
	return result.transcript
	except Exception as exc:
	logger.warning("Transcription failed: %s", exc)
	return f"[Transcription error: {exc}]"

	return _transcribe


	def _make_query_fn(answer_chain, db_path: Path):
	"""Return a query handler that captures `answer_chain` and `db_path` in closure."""

	def _query(
	query: str,
	kb_names: list[str],
	history: list[tuple[str, str]],
	chatbot: list[dict],
	) -> tuple[list[dict], str, list[tuple[str, str]], str]:
	"""
	Run the full RAG pipeline for a single query.

	Returns:
	(updated_chatbot, citations_markdown, updated_history, tts_text)
	"""
	if not query or not query.strip():
	return chatbot, "Please enter or speak a question.", history, ""

	if not kb_names:
	new_chatbot = _append_chat(chatbot, query, "⚠️ Please select at least one Knowledge Base.")
	return new_chatbot, "No Knowledge Base selected.", history, ""

	try:
	from voicevault.asr.query_preprocessor import QueryPreprocessor
	from voicevault.retrieval.context_builder import ContextBuilder
	from voicevault.retrieval.hybrid_retriever import HybridRetriever
	from voicevault.storage import sqlite_store as db_mod
	from voicevault.tts.web_speech import prepare_for_tts

	# Pre-process the query text
	pq = QueryPreprocessor().process(query)
	search_query = pq.processed_query or query

	# Retrieve + build context
	retriever = HybridRetriever(kb_names=list(kb_names))
	results = retriever.retrieve(search_query)
	context, citation_map = ContextBuilder().build(results)

	# Generate answer
	generation = answer_chain.generate(
	query=search_query,
	context=context,
	citation_map=citation_map,
	history=history,
	query_type=pq.query_type,
	)

	# Log the query (SHA-256 hash only — no raw query stored)
	try:
	db_mod.log_query(
	db_path=db_path,
	log_id=str(uuid.uuid4()),
	session_id=str(uuid.uuid4()),
	kb_names=list(kb_names),
	voice_query_hash=hashlib.sha256(query.encode()).hexdigest(),
	processed_query=pq.processed_query,
	query_type=pq.query_type,
	answer_length=len(generation.answer),
	citation_count=len(generation.citations),
	latency_asr_ms=0,
	latency_ret_ms=0,
	latency_llm_ms=generation.latency_ms,
	total_latency_ms=generation.latency_ms,
	groq_tokens_used=generation.tokens_used,
	)
	except Exception as log_exc:
	logger.warning("Query logging failed (non-critical): %s", log_exc)

	# Update conversation state
	new_history = list(history) + [(query, generation.answer)]
	new_chatbot = _append_chat(chatbot, query, generation.answer)
	citations_text = format_citations_markdown(generation.citations)
	tts_text = prepare_for_tts(generation.answer, generation.is_refusal)

	confidence_badge = {
	"high": "🟢", "medium": "🟡", "low": "🔴"
	}.get(generation.confidence_level, "⚪")
	logger.info(
	"Query answered \| confidence: %s %s \| model: %s \| citations: %d",
	confidence_badge, generation.confidence_level,
	generation.model_used, len(generation.citations),
	)

	return new_chatbot, citations_text, new_history, tts_text

	except Exception as exc:
	logger.exception("Query pipeline failed")
	error_answer = f"⚠️ An error occurred: {exc}"
	new_chatbot = _append_chat(chatbot, query, error_answer)
	return new_chatbot, "Error during query.", history, ""

	return _query


	# ------------------------------------------------------------------ #
	# Helpers #
	# ------------------------------------------------------------------ #


	def _get_kb_choices(kb_manager) -> list[str]:
	"""Return a list of KB names from the manager."""
	try:
	kbs = kb_manager.list_kbs()
	return [kb.kb_name for kb in kbs]
	except Exception:
	return []


	def _append_chat(
	chatbot: list[dict],
	user_msg: str,
	assistant_msg: str,
	) -> list[dict]:
	"""Append a user + assistant message pair to the chatbot history."""
	return list(chatbot) + [
	{"role": "user", "content": user_msg},
	{"role": "assistant", "content": assistant_msg},
	]