""" ui.tabs.ask_tab =============== Tab 1 — Ask VoiceVault Full voice-to-answer pipeline: 1. Microphone → Whisper ASR → editable transcript 2. KB multi-selector + Ask button 3. HybridRetriever search → LLM answer (via AnswerChain) 4. Chatbot history display 5. Citation panel (collapsible) 6. Web Speech API TTS speak button 7. Conversation history via gr.State """ from __future__ import annotations import hashlib import logging import uuid from pathlib import Path from typing import Optional import gradio as gr from ui.components.citation_panel import format_citations_markdown logger = logging.getLogger(__name__) # ------------------------------------------------------------------ # # Public Builder # # ------------------------------------------------------------------ # def build_ask_tab( transcriber, answer_chain, kb_manager, db_path: Path, ) -> None: """ Build and render the Ask VoiceVault tab. Args: transcriber: WhisperTranscriber instance (lazy-loads model). answer_chain: AnswerChain instance (Groq → Gemini fallback). kb_manager: KBManager for listing available KBs. db_path: Path to the central SQLite database (for query logging). """ history_state: gr.State = gr.State([]) # ── Layout ───────────────────────────────────────────────────────── gr.Markdown("## 🎙️ Ask VoiceVault") gr.Markdown("*Record audio or type your question, select knowledge bases, then click Ask.*") with gr.Row(): # Left column — input controls with gr.Column(scale=2): audio_input = gr.Audio( label="🎤 Speak your question", sources=["microphone"], type="filepath", interactive=True, ) transcript_box = gr.Textbox( label="Question (editable — modify transcript if needed)", placeholder="Record audio above, or type your question here...", interactive=True, lines=2, ) with gr.Row(): kb_selector = gr.Dropdown( label="Knowledge Bases to query", choices=_get_kb_choices(kb_manager), multiselect=True, interactive=True, scale=5, ) refresh_btn = gr.Button("🔄", scale=0, min_width=48, variant="secondary") ask_btn = gr.Button("🔍 Ask VoiceVault", variant="primary", size="lg") # Right column — output with gr.Column(scale=3): chatbot = gr.Chatbot( label="Answer", height=430, value=[], ) with gr.Accordion("📋 Citations", open=False): citations_md = gr.Markdown("*No citations yet.*") with gr.Row(): speak_btn = gr.Button("🔊 Speak Answer", variant="secondary", scale=2) clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary", scale=1) # Hidden state for last answer (for TTS) last_answer_state: gr.State = gr.State("") # ── Event Handlers ────────────────────────────────────────────────── # 1. Audio recorded → transcribe audio_input.stop_recording( fn=_make_transcribe_fn(transcriber), inputs=[audio_input], outputs=[transcript_box], ) # 2. Refresh KB list refresh_btn.click( fn=lambda: gr.update(choices=_get_kb_choices(kb_manager)), outputs=[kb_selector], ) # 3. Ask button → full pipeline ask_btn.click( fn=_make_query_fn(answer_chain, db_path), inputs=[transcript_box, kb_selector, history_state, chatbot], outputs=[chatbot, citations_md, history_state, last_answer_state], ) # 4. Clear chat clear_btn.click( fn=lambda: ([], "*No citations yet.*", [], ""), outputs=[chatbot, citations_md, history_state, last_answer_state], ) # 5. Speak answer — passes text to JS via a hidden gr.Textbox trigger speak_output = gr.Textbox(visible=False, elem_id="vv-tts-trigger") speak_btn.click( fn=lambda answer: answer, inputs=[last_answer_state], outputs=[speak_output], ) # ------------------------------------------------------------------ # # Handler Factories # # ------------------------------------------------------------------ # def _make_transcribe_fn(transcriber): """Return an audio transcription handler that captures `transcriber` in closure.""" def _transcribe(audio_path: Optional[str]) -> str: if audio_path is None: return "" try: from voicevault.asr.whisper_transcriber import WhisperTranscriberError result = transcriber.transcribe(Path(audio_path)) logger.info("Transcribed: '%s' (%s)", result.transcript[:60], result.query_type) return result.transcript except Exception as exc: logger.warning("Transcription failed: %s", exc) return f"[Transcription error: {exc}]" return _transcribe def _make_query_fn(answer_chain, db_path: Path): """Return a query handler that captures `answer_chain` and `db_path` in closure.""" def _query( query: str, kb_names: list[str], history: list[tuple[str, str]], chatbot: list[dict], ) -> tuple[list[dict], str, list[tuple[str, str]], str]: """ Run the full RAG pipeline for a single query. Returns: (updated_chatbot, citations_markdown, updated_history, tts_text) """ if not query or not query.strip(): return chatbot, "*Please enter or speak a question.*", history, "" if not kb_names: new_chatbot = _append_chat(chatbot, query, "⚠️ Please select at least one Knowledge Base.") return new_chatbot, "*No Knowledge Base selected.*", history, "" try: from voicevault.asr.query_preprocessor import QueryPreprocessor from voicevault.retrieval.context_builder import ContextBuilder from voicevault.retrieval.hybrid_retriever import HybridRetriever from voicevault.storage import sqlite_store as db_mod from voicevault.tts.web_speech import prepare_for_tts # Pre-process the query text pq = QueryPreprocessor().process(query) search_query = pq.processed_query or query # Retrieve + build context retriever = HybridRetriever(kb_names=list(kb_names)) results = retriever.retrieve(search_query) context, citation_map = ContextBuilder().build(results) # Generate answer generation = answer_chain.generate( query=search_query, context=context, citation_map=citation_map, history=history, query_type=pq.query_type, ) # Log the query (SHA-256 hash only — no raw query stored) try: db_mod.log_query( db_path=db_path, log_id=str(uuid.uuid4()), session_id=str(uuid.uuid4()), kb_names=list(kb_names), voice_query_hash=hashlib.sha256(query.encode()).hexdigest(), processed_query=pq.processed_query, query_type=pq.query_type, answer_length=len(generation.answer), citation_count=len(generation.citations), latency_asr_ms=0, latency_ret_ms=0, latency_llm_ms=generation.latency_ms, total_latency_ms=generation.latency_ms, groq_tokens_used=generation.tokens_used, ) except Exception as log_exc: logger.warning("Query logging failed (non-critical): %s", log_exc) # Update conversation state new_history = list(history) + [(query, generation.answer)] new_chatbot = _append_chat(chatbot, query, generation.answer) citations_text = format_citations_markdown(generation.citations) tts_text = prepare_for_tts(generation.answer, generation.is_refusal) confidence_badge = { "high": "🟢", "medium": "🟡", "low": "🔴" }.get(generation.confidence_level, "⚪") logger.info( "Query answered | confidence: %s %s | model: %s | citations: %d", confidence_badge, generation.confidence_level, generation.model_used, len(generation.citations), ) return new_chatbot, citations_text, new_history, tts_text except Exception as exc: logger.exception("Query pipeline failed") error_answer = f"⚠️ An error occurred: {exc}" new_chatbot = _append_chat(chatbot, query, error_answer) return new_chatbot, "*Error during query.*", history, "" return _query # ------------------------------------------------------------------ # # Helpers # # ------------------------------------------------------------------ # def _get_kb_choices(kb_manager) -> list[str]: """Return a list of KB names from the manager.""" try: kbs = kb_manager.list_kbs() return [kb.kb_name for kb in kbs] except Exception: return [] def _append_chat( chatbot: list[dict], user_msg: str, assistant_msg: str, ) -> list[dict]: """Append a user + assistant message pair to the chatbot history.""" return list(chatbot) + [ {"role": "user", "content": user_msg}, {"role": "assistant", "content": assistant_msg}, ]