Spaces:
Sleeping
Sleeping
| """Module 6 - Voice Notes: browser dictation + Saathi response. | |
| Streamlit 1.32 does not include a native microphone-to-text widget. To stay | |
| dependency-light for HF Spaces, this module uses the browser Web Speech API | |
| inside a local HTML component. The transcript stays in the browser until the | |
| user pastes it into the Streamlit text area. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| from typing import Dict, List | |
| import streamlit as st | |
| import streamlit.components.v1 as components | |
| from backend.claude_client import chat | |
| from backend.i18n import claude_language_name, t | |
| from backend.safeguards import check_crisis, render_crisis_banner | |
| MODULE_NAME = "voice_notes" | |
| NOTE_KEY = "voice_note" | |
| RESPONSE_KEY = "voice_response" | |
| HISTORY_KEY = "voice_history" | |
| VOICE_LANG_TAGS = { | |
| "en": "en-IN", | |
| "hi": "hi-IN", | |
| "bn": "bn-IN", | |
| "ta": "ta-IN", | |
| "te": "te-IN", | |
| "mr": "mr-IN", | |
| "ur": "ur-IN", | |
| } | |
| def _init_state() -> None: | |
| if NOTE_KEY not in st.session_state: | |
| st.session_state[NOTE_KEY] = "" | |
| if RESPONSE_KEY not in st.session_state: | |
| st.session_state[RESPONSE_KEY] = "" | |
| if HISTORY_KEY not in st.session_state: | |
| st.session_state[HISTORY_KEY] = [] | |
| def _append_history(role: str, content: str) -> None: | |
| history = list(st.session_state.get(HISTORY_KEY, [])) | |
| history.append({"role": role, "content": content}) | |
| st.session_state[HISTORY_KEY] = history | |
| def _render_voice_component(lang: str) -> None: | |
| cfg = { | |
| "lang": VOICE_LANG_TAGS.get(lang, "en-IN"), | |
| "start": t("voice_start_button", lang), | |
| "stop": t("voice_stop_button", lang), | |
| "copy": t("voice_copy_button", lang), | |
| "clear": t("voice_clear_button", lang), | |
| "transcript": t("voice_transcript_label", lang), | |
| "unsupported": t("voice_not_supported", lang), | |
| } | |
| cfg_json = json.dumps(cfg, ensure_ascii=False) | |
| components.html( | |
| f""" | |
| <div style="font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;"> | |
| <div id="voiceStatus" style="margin-bottom: 8px; color: #374151;"></div> | |
| <div style="display: flex; flex-wrap: wrap; gap: 8px; margin-bottom: 10px;"> | |
| <button id="startBtn" style="border-radius: 8px; padding: 8px 12px; border: 1px solid #4f46e5; background: #4f46e5; color: white;">Start</button> | |
| <button id="stopBtn" style="border-radius: 8px; padding: 8px 12px; border: 1px solid #6b7280; background: white; color: #111827;">Stop</button> | |
| <button id="copyBtn" style="border-radius: 8px; padding: 8px 12px; border: 1px solid #6b7280; background: white; color: #111827;">Copy</button> | |
| <button id="clearBtn" style="border-radius: 8px; padding: 8px 12px; border: 1px solid #6b7280; background: white; color: #111827;">Clear</button> | |
| </div> | |
| <label for="transcriptBox" style="display:block; font-weight: 600; margin-bottom: 6px;">Transcript</label> | |
| <textarea id="transcriptBox" style="width: 100%; min-height: 150px; border: 1px solid #d1d5db; border-radius: 8px; padding: 10px; font-size: 15px; line-height: 1.45;"></textarea> | |
| </div> | |
| <script> | |
| const cfg = {cfg_json}; | |
| const statusEl = document.getElementById("voiceStatus"); | |
| const transcriptEl = document.getElementById("transcriptBox"); | |
| const startBtn = document.getElementById("startBtn"); | |
| const stopBtn = document.getElementById("stopBtn"); | |
| const copyBtn = document.getElementById("copyBtn"); | |
| const clearBtn = document.getElementById("clearBtn"); | |
| const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; | |
| startBtn.textContent = cfg.start; | |
| stopBtn.textContent = cfg.stop; | |
| copyBtn.textContent = cfg.copy; | |
| clearBtn.textContent = cfg.clear; | |
| document.querySelector("label[for='transcriptBox']").textContent = cfg.transcript; | |
| let recognition = null; | |
| let finalTranscript = ""; | |
| if (!SpeechRecognition) {{ | |
| statusEl.textContent = cfg.unsupported; | |
| startBtn.disabled = true; | |
| stopBtn.disabled = true; | |
| }} else {{ | |
| recognition = new SpeechRecognition(); | |
| recognition.lang = cfg.lang; | |
| recognition.continuous = true; | |
| recognition.interimResults = true; | |
| recognition.onstart = () => {{ | |
| statusEl.textContent = "Listening..."; | |
| }}; | |
| recognition.onerror = (event) => {{ | |
| statusEl.textContent = "Voice capture stopped: " + event.error; | |
| }}; | |
| recognition.onend = () => {{ | |
| statusEl.textContent = "Stopped. Copy the transcript, then paste it below."; | |
| }}; | |
| recognition.onresult = (event) => {{ | |
| let interim = ""; | |
| for (let i = event.resultIndex; i < event.results.length; i++) {{ | |
| const piece = event.results[i][0].transcript; | |
| if (event.results[i].isFinal) {{ | |
| finalTranscript += piece + " "; | |
| }} else {{ | |
| interim += piece; | |
| }} | |
| }} | |
| transcriptEl.value = (finalTranscript + interim).trim(); | |
| }}; | |
| }} | |
| startBtn.onclick = () => {{ | |
| if (recognition) recognition.start(); | |
| }}; | |
| stopBtn.onclick = () => {{ | |
| if (recognition) recognition.stop(); | |
| }}; | |
| copyBtn.onclick = async () => {{ | |
| try {{ | |
| await navigator.clipboard.writeText(transcriptEl.value); | |
| statusEl.textContent = "Copied. Paste it into the Streamlit box below."; | |
| }} catch (err) {{ | |
| transcriptEl.select(); | |
| document.execCommand("copy"); | |
| statusEl.textContent = "Copied. Paste it into the Streamlit box below."; | |
| }} | |
| }}; | |
| clearBtn.onclick = () => {{ | |
| finalTranscript = ""; | |
| transcriptEl.value = ""; | |
| statusEl.textContent = ""; | |
| }}; | |
| </script> | |
| """, | |
| height=310, | |
| ) | |
| def render(lang: str) -> None: | |
| _init_state() | |
| st.header(t("voice_header", lang)) | |
| st.caption(t("voice_sub", lang)) | |
| st.info(t("voice_copy_note", lang)) | |
| _render_voice_component(lang) | |
| note = st.text_area( | |
| t("voice_note_label", lang), | |
| value=st.session_state[NOTE_KEY], | |
| placeholder=t("voice_paste_placeholder", lang), | |
| height=140, | |
| key="voice_note_input", | |
| ) | |
| cols = st.columns([1, 1, 3]) | |
| with cols[0]: | |
| save_clicked = st.button(t("voice_save_button", lang), key="voice_save_button", type="secondary") | |
| with cols[1]: | |
| ask_clicked = st.button(t("voice_ask_button", lang), key="voice_ask_button", type="primary") | |
| if save_clicked and note.strip(): | |
| st.session_state[NOTE_KEY] = note.strip() | |
| _append_history("user", note.strip()) | |
| st.success(t("voice_saved", lang)) | |
| if ask_clicked and note.strip(): | |
| if check_crisis(note): | |
| render_crisis_banner(lang) | |
| return | |
| st.session_state[NOTE_KEY] = note.strip() | |
| _append_history("user", note.strip()) | |
| with st.spinner("..."): | |
| try: | |
| response = chat( | |
| module=MODULE_NAME, | |
| user_text=note.strip(), | |
| language_name=claude_language_name(lang), | |
| max_tokens=1200, | |
| ) | |
| except Exception as e: | |
| response = f"(Could not reach the model right now: {e})" | |
| st.session_state[RESPONSE_KEY] = response | |
| _append_history("assistant", response) | |
| if st.session_state[RESPONSE_KEY]: | |
| st.markdown(st.session_state[RESPONSE_KEY]) | |
| st.markdown(f"##### {t('voice_history_heading', lang)}") | |
| history: List[Dict[str, str]] = st.session_state.get(HISTORY_KEY, []) | |
| if not history: | |
| st.caption(t("voice_no_history", lang)) | |
| for msg in history[-6:]: | |
| with st.chat_message(msg.get("role", "assistant")): | |
| st.markdown(msg.get("content", "")) | |