"""Module 6 - Voice Notes: browser dictation + Saathi response. Streamlit 1.32 does not include a native microphone-to-text widget. To stay dependency-light for HF Spaces, this module uses the browser Web Speech API inside a local HTML component. The transcript stays in the browser until the user pastes it into the Streamlit text area. """ from __future__ import annotations import json from typing import Dict, List import streamlit as st import streamlit.components.v1 as components from backend.claude_client import chat from backend.i18n import claude_language_name, t from backend.safeguards import check_crisis, render_crisis_banner MODULE_NAME = "voice_notes" NOTE_KEY = "voice_note" RESPONSE_KEY = "voice_response" HISTORY_KEY = "voice_history" VOICE_LANG_TAGS = { "en": "en-IN", "hi": "hi-IN", "bn": "bn-IN", "ta": "ta-IN", "te": "te-IN", "mr": "mr-IN", "ur": "ur-IN", } def _init_state() -> None: if NOTE_KEY not in st.session_state: st.session_state[NOTE_KEY] = "" if RESPONSE_KEY not in st.session_state: st.session_state[RESPONSE_KEY] = "" if HISTORY_KEY not in st.session_state: st.session_state[HISTORY_KEY] = [] def _append_history(role: str, content: str) -> None: history = list(st.session_state.get(HISTORY_KEY, [])) history.append({"role": role, "content": content}) st.session_state[HISTORY_KEY] = history def _render_voice_component(lang: str) -> None: cfg = { "lang": VOICE_LANG_TAGS.get(lang, "en-IN"), "start": t("voice_start_button", lang), "stop": t("voice_stop_button", lang), "copy": t("voice_copy_button", lang), "clear": t("voice_clear_button", lang), "transcript": t("voice_transcript_label", lang), "unsupported": t("voice_not_supported", lang), } cfg_json = json.dumps(cfg, ensure_ascii=False) components.html( f"""

Transcript

""", height=310, ) def render(lang: str) -> None: _init_state() st.header(t("voice_header", lang)) st.caption(t("voice_sub", lang)) st.info(t("voice_copy_note", lang)) _render_voice_component(lang) note = st.text_area( t("voice_note_label", lang), value=st.session_state[NOTE_KEY], placeholder=t("voice_paste_placeholder", lang), height=140, key="voice_note_input", ) cols = st.columns([1, 1, 3]) with cols[0]: save_clicked = st.button(t("voice_save_button", lang), key="voice_save_button", type="secondary") with cols[1]: ask_clicked = st.button(t("voice_ask_button", lang), key="voice_ask_button", type="primary") if save_clicked and note.strip(): st.session_state[NOTE_KEY] = note.strip() _append_history("user", note.strip()) st.success(t("voice_saved", lang)) if ask_clicked and note.strip(): if check_crisis(note): render_crisis_banner(lang) return st.session_state[NOTE_KEY] = note.strip() _append_history("user", note.strip()) with st.spinner("..."): try: response = chat( module=MODULE_NAME, user_text=note.strip(), language_name=claude_language_name(lang), max_tokens=1200, ) except Exception as e: response = f"(Could not reach the model right now: {e})" st.session_state[RESPONSE_KEY] = response _append_history("assistant", response) if st.session_state[RESPONSE_KEY]: st.markdown(st.session_state[RESPONSE_KEY]) st.markdown(f"##### {t('voice_history_heading', lang)}") history: List[Dict[str, str]] = st.session_state.get(HISTORY_KEY, []) if not history: st.caption(t("voice_no_history", lang)) for msg in history[-6:]: with st.chat_message(msg.get("role", "assistant")): st.markdown(msg.get("content", ""))