VoiceVault / app.py
NinjainPJs's picture
Initial release: VoiceVault v1.0.0 β€” Voice-First RAG Knowledge Agent
85f900d
"""
VoiceVault β€” Gradio Application Entry Point
============================================
HuggingFace Spaces entry point and local development server.
Run locally: python app.py
Deploy: Push to HuggingFace Spaces (app.py detected automatically)
Architecture:
4-tab Gradio Blocks layout:
Tab 1 β€” Ask VoiceVault (voice query + streaming answer + TTS)
Tab 2 β€” Knowledge Bases (upload + index + manage)
Tab 3 β€” Analytics (query stats + KB inventory)
Tab 4 β€” Settings (runtime parameters)
Startup:
1. Ensure data directories exist (cfg.ensure_directories)
2. Initialize KBManager (central SQLite DB)
3. Lazy-init WhisperTranscriber and AnswerChain (no model downloads at startup)
4. Build 4-tab Gradio Blocks UI
5. Launch with Soft theme
"""
from __future__ import annotations
import logging
import sys
import gradio as gr
from config import cfg
from voicevault import __version__
from ui.tabs.ask_tab import build_ask_tab
from ui.tabs.kb_tab import build_kb_tab
from ui.tabs.analytics_tab import build_analytics_tab
from ui.tabs.settings_tab import build_settings_tab
from ui.components.audio_controls import get_tts_html
# ------------------------------------------------------------------ #
# Logging #
# ------------------------------------------------------------------ #
logging.basicConfig(
level=logging.DEBUG if cfg.debug else logging.INFO,
format="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger = logging.getLogger(__name__)
# ------------------------------------------------------------------ #
# Startup #
# ------------------------------------------------------------------ #
_CENTRAL_DB_PATH = cfg.data_dir / "voicevault.db"
def _startup() -> tuple:
"""
Initialize all pipeline components at startup.
Returns:
(kb_manager, transcriber, answer_chain) tuple β€” passed to UI builders.
"""
cfg.ensure_directories()
logger.info("=" * 60)
logger.info("VoiceVault v%s β€” Starting up", __version__)
logger.info("Data directory : %s", cfg.data_dir.resolve())
logger.info("Central DB : %s", _CENTRAL_DB_PATH)
logger.info("Groq key : %s", "βœ“ configured" if cfg.has_groq_key() else "βœ— not set")
logger.info("Gemini key : %s", "βœ“ configured" if cfg.has_gemini_key() else "βœ— not set")
logger.info("Debug mode : %s", cfg.debug)
logger.info("=" * 60)
if not cfg.has_any_llm_key():
logger.warning(
"No LLM API key found. Set GROQ_API_KEY or GEMINI_API_KEY in .env "
"to enable answer generation."
)
# ── KBManager (initializes SQLite schema) ──────────────────────────
from voicevault.kb.kb_manager import KBManager
kb_manager = KBManager(db_path=_CENTRAL_DB_PATH)
logger.info("KBManager initialized β€” %d KB(s) found", len(kb_manager.list_kbs()))
# ── WhisperTranscriber (lazy β€” no model loaded until first query) ──
from voicevault.asr.whisper_transcriber import WhisperTranscriber
transcriber = WhisperTranscriber()
logger.info("WhisperTranscriber ready (lazy β€” model loads on first use)")
# ── AnswerChain (lazy β€” LLM clients created per call) ─────────────
from voicevault.generation.answer_chain import AnswerChain
answer_chain = AnswerChain()
logger.info("AnswerChain ready (Groq primary / Gemini fallback)")
return kb_manager, transcriber, answer_chain
# ------------------------------------------------------------------ #
# Gradio App Builder #
# ------------------------------------------------------------------ #
def build_app(kb_manager, transcriber, answer_chain) -> gr.Blocks:
"""
Construct and return the full Gradio Blocks application.
Args:
kb_manager: KBManager singleton for KB operations.
transcriber: WhisperTranscriber singleton (lazy-loaded model).
answer_chain: AnswerChain singleton (Groq β†’ Gemini).
Returns:
Configured gr.Blocks instance (not yet launched).
"""
with gr.Blocks(
title="VoiceVault β€” Voice-First RAG Knowledge Agent",
analytics_enabled=False,
) as demo:
# ── Header ─────────────────────────────────────────────────
gr.Markdown(
f"""
# πŸŽ™οΈ VoiceVault
**Voice-First RAG Knowledge Agent**  Β· 
Speak to your documents. Get cited answers out.
  `v{__version__}`
"""
)
# ── Web Speech API JS bridge ────────────────────────────────
gr.HTML(get_tts_html(), visible=False)
# ── Tabs ───────────────────────────────────────────────────
with gr.Tabs():
with gr.Tab("πŸŽ™οΈ Ask VoiceVault"):
build_ask_tab(
transcriber=transcriber,
answer_chain=answer_chain,
kb_manager=kb_manager,
db_path=_CENTRAL_DB_PATH,
)
with gr.Tab("πŸ“‚ Knowledge Bases"):
build_kb_tab(
kb_manager=kb_manager,
db_path=_CENTRAL_DB_PATH,
)
with gr.Tab("πŸ“Š Analytics"):
build_analytics_tab(
kb_manager=kb_manager,
db_path=_CENTRAL_DB_PATH,
)
with gr.Tab("βš™οΈ Settings"):
build_settings_tab()
# ── Footer ─────────────────────────────────────────────────
gr.Markdown(
"""
---
<div style="text-align:center; color:#888; font-size:0.8em;">
VoiceVault Β· Navnit Amrutharaj Β·
<a href="https://github.com/ninjacode911" target="_blank">github.com/ninjacode911</a> Β·
Stack: Whisper Β· ChromaDB Β· LangChain Β· Groq Β· Gradio Β· $0/month
</div>
"""
)
return demo
# ------------------------------------------------------------------ #
# Entry Point #
# ------------------------------------------------------------------ #
if __name__ == "__main__":
kb_manager, transcriber, answer_chain = _startup()
app = build_app(kb_manager, transcriber, answer_chain)
theme = gr.themes.Soft(
primary_hue="violet",
secondary_hue="purple",
neutral_hue="slate",
font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui"],
)
app.launch(
server_name=cfg.host,
server_port=cfg.port,
share=False,
show_error=True,
theme=theme,
)