VoiceVault / ui /tabs /ask_tab.py
NinjainPJs's picture
Initial release: VoiceVault v1.0.0 — Voice-First RAG Knowledge Agent
85f900d
"""
ui.tabs.ask_tab
===============
Tab 1 — Ask VoiceVault
Full voice-to-answer pipeline:
1. Microphone → Whisper ASR → editable transcript
2. KB multi-selector + Ask button
3. HybridRetriever search → LLM answer (via AnswerChain)
4. Chatbot history display
5. Citation panel (collapsible)
6. Web Speech API TTS speak button
7. Conversation history via gr.State
"""
from __future__ import annotations
import hashlib
import logging
import uuid
from pathlib import Path
from typing import Optional
import gradio as gr
from ui.components.citation_panel import format_citations_markdown
logger = logging.getLogger(__name__)
# ------------------------------------------------------------------ #
# Public Builder #
# ------------------------------------------------------------------ #
def build_ask_tab(
transcriber,
answer_chain,
kb_manager,
db_path: Path,
) -> None:
"""
Build and render the Ask VoiceVault tab.
Args:
transcriber: WhisperTranscriber instance (lazy-loads model).
answer_chain: AnswerChain instance (Groq → Gemini fallback).
kb_manager: KBManager for listing available KBs.
db_path: Path to the central SQLite database (for query logging).
"""
history_state: gr.State = gr.State([])
# ── Layout ─────────────────────────────────────────────────────────
gr.Markdown("## 🎙️ Ask VoiceVault")
gr.Markdown("*Record audio or type your question, select knowledge bases, then click Ask.*")
with gr.Row():
# Left column — input controls
with gr.Column(scale=2):
audio_input = gr.Audio(
label="🎤 Speak your question",
sources=["microphone"],
type="filepath",
interactive=True,
)
transcript_box = gr.Textbox(
label="Question (editable — modify transcript if needed)",
placeholder="Record audio above, or type your question here...",
interactive=True,
lines=2,
)
with gr.Row():
kb_selector = gr.Dropdown(
label="Knowledge Bases to query",
choices=_get_kb_choices(kb_manager),
multiselect=True,
interactive=True,
scale=5,
)
refresh_btn = gr.Button("🔄", scale=0, min_width=48, variant="secondary")
ask_btn = gr.Button("🔍 Ask VoiceVault", variant="primary", size="lg")
# Right column — output
with gr.Column(scale=3):
chatbot = gr.Chatbot(
label="Answer",
height=430,
value=[],
)
with gr.Accordion("📋 Citations", open=False):
citations_md = gr.Markdown("*No citations yet.*")
with gr.Row():
speak_btn = gr.Button("🔊 Speak Answer", variant="secondary", scale=2)
clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary", scale=1)
# Hidden state for last answer (for TTS)
last_answer_state: gr.State = gr.State("")
# ── Event Handlers ──────────────────────────────────────────────────
# 1. Audio recorded → transcribe
audio_input.stop_recording(
fn=_make_transcribe_fn(transcriber),
inputs=[audio_input],
outputs=[transcript_box],
)
# 2. Refresh KB list
refresh_btn.click(
fn=lambda: gr.update(choices=_get_kb_choices(kb_manager)),
outputs=[kb_selector],
)
# 3. Ask button → full pipeline
ask_btn.click(
fn=_make_query_fn(answer_chain, db_path),
inputs=[transcript_box, kb_selector, history_state, chatbot],
outputs=[chatbot, citations_md, history_state, last_answer_state],
)
# 4. Clear chat
clear_btn.click(
fn=lambda: ([], "*No citations yet.*", [], ""),
outputs=[chatbot, citations_md, history_state, last_answer_state],
)
# 5. Speak answer — passes text to JS via a hidden gr.Textbox trigger
speak_output = gr.Textbox(visible=False, elem_id="vv-tts-trigger")
speak_btn.click(
fn=lambda answer: answer,
inputs=[last_answer_state],
outputs=[speak_output],
)
# ------------------------------------------------------------------ #
# Handler Factories #
# ------------------------------------------------------------------ #
def _make_transcribe_fn(transcriber):
"""Return an audio transcription handler that captures `transcriber` in closure."""
def _transcribe(audio_path: Optional[str]) -> str:
if audio_path is None:
return ""
try:
from voicevault.asr.whisper_transcriber import WhisperTranscriberError
result = transcriber.transcribe(Path(audio_path))
logger.info("Transcribed: '%s' (%s)", result.transcript[:60], result.query_type)
return result.transcript
except Exception as exc:
logger.warning("Transcription failed: %s", exc)
return f"[Transcription error: {exc}]"
return _transcribe
def _make_query_fn(answer_chain, db_path: Path):
"""Return a query handler that captures `answer_chain` and `db_path` in closure."""
def _query(
query: str,
kb_names: list[str],
history: list[tuple[str, str]],
chatbot: list[dict],
) -> tuple[list[dict], str, list[tuple[str, str]], str]:
"""
Run the full RAG pipeline for a single query.
Returns:
(updated_chatbot, citations_markdown, updated_history, tts_text)
"""
if not query or not query.strip():
return chatbot, "*Please enter or speak a question.*", history, ""
if not kb_names:
new_chatbot = _append_chat(chatbot, query, "⚠️ Please select at least one Knowledge Base.")
return new_chatbot, "*No Knowledge Base selected.*", history, ""
try:
from voicevault.asr.query_preprocessor import QueryPreprocessor
from voicevault.retrieval.context_builder import ContextBuilder
from voicevault.retrieval.hybrid_retriever import HybridRetriever
from voicevault.storage import sqlite_store as db_mod
from voicevault.tts.web_speech import prepare_for_tts
# Pre-process the query text
pq = QueryPreprocessor().process(query)
search_query = pq.processed_query or query
# Retrieve + build context
retriever = HybridRetriever(kb_names=list(kb_names))
results = retriever.retrieve(search_query)
context, citation_map = ContextBuilder().build(results)
# Generate answer
generation = answer_chain.generate(
query=search_query,
context=context,
citation_map=citation_map,
history=history,
query_type=pq.query_type,
)
# Log the query (SHA-256 hash only — no raw query stored)
try:
db_mod.log_query(
db_path=db_path,
log_id=str(uuid.uuid4()),
session_id=str(uuid.uuid4()),
kb_names=list(kb_names),
voice_query_hash=hashlib.sha256(query.encode()).hexdigest(),
processed_query=pq.processed_query,
query_type=pq.query_type,
answer_length=len(generation.answer),
citation_count=len(generation.citations),
latency_asr_ms=0,
latency_ret_ms=0,
latency_llm_ms=generation.latency_ms,
total_latency_ms=generation.latency_ms,
groq_tokens_used=generation.tokens_used,
)
except Exception as log_exc:
logger.warning("Query logging failed (non-critical): %s", log_exc)
# Update conversation state
new_history = list(history) + [(query, generation.answer)]
new_chatbot = _append_chat(chatbot, query, generation.answer)
citations_text = format_citations_markdown(generation.citations)
tts_text = prepare_for_tts(generation.answer, generation.is_refusal)
confidence_badge = {
"high": "🟢", "medium": "🟡", "low": "🔴"
}.get(generation.confidence_level, "⚪")
logger.info(
"Query answered | confidence: %s %s | model: %s | citations: %d",
confidence_badge, generation.confidence_level,
generation.model_used, len(generation.citations),
)
return new_chatbot, citations_text, new_history, tts_text
except Exception as exc:
logger.exception("Query pipeline failed")
error_answer = f"⚠️ An error occurred: {exc}"
new_chatbot = _append_chat(chatbot, query, error_answer)
return new_chatbot, "*Error during query.*", history, ""
return _query
# ------------------------------------------------------------------ #
# Helpers #
# ------------------------------------------------------------------ #
def _get_kb_choices(kb_manager) -> list[str]:
"""Return a list of KB names from the manager."""
try:
kbs = kb_manager.list_kbs()
return [kb.kb_name for kb in kbs]
except Exception:
return []
def _append_chat(
chatbot: list[dict],
user_msg: str,
assistant_msg: str,
) -> list[dict]:
"""Append a user + assistant message pair to the chatbot history."""
return list(chatbot) + [
{"role": "user", "content": user_msg},
{"role": "assistant", "content": assistant_msg},
]