File size: 8,084 Bytes
0ae7ad3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bbcf98
0ae7ad3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
"""Module 6 - Voice Notes: browser dictation + Saathi response.

Streamlit 1.32 does not include a native microphone-to-text widget. To stay
dependency-light for HF Spaces, this module uses the browser Web Speech API
inside a local HTML component. The transcript stays in the browser until the
user pastes it into the Streamlit text area.
"""
from __future__ import annotations

import json
from typing import Dict, List

import streamlit as st
import streamlit.components.v1 as components

from backend.claude_client import chat
from backend.i18n import claude_language_name, t
from backend.safeguards import check_crisis, render_crisis_banner

MODULE_NAME = "voice_notes"
NOTE_KEY = "voice_note"
RESPONSE_KEY = "voice_response"
HISTORY_KEY = "voice_history"

VOICE_LANG_TAGS = {
    "en": "en-IN",
    "hi": "hi-IN",
    "bn": "bn-IN",
    "ta": "ta-IN",
    "te": "te-IN",
    "mr": "mr-IN",
    "ur": "ur-IN",
}


def _init_state() -> None:
    if NOTE_KEY not in st.session_state:
        st.session_state[NOTE_KEY] = ""
    if RESPONSE_KEY not in st.session_state:
        st.session_state[RESPONSE_KEY] = ""
    if HISTORY_KEY not in st.session_state:
        st.session_state[HISTORY_KEY] = []


def _append_history(role: str, content: str) -> None:
    history = list(st.session_state.get(HISTORY_KEY, []))
    history.append({"role": role, "content": content})
    st.session_state[HISTORY_KEY] = history


def _render_voice_component(lang: str) -> None:
    cfg = {
        "lang": VOICE_LANG_TAGS.get(lang, "en-IN"),
        "start": t("voice_start_button", lang),
        "stop": t("voice_stop_button", lang),
        "copy": t("voice_copy_button", lang),
        "clear": t("voice_clear_button", lang),
        "transcript": t("voice_transcript_label", lang),
        "unsupported": t("voice_not_supported", lang),
    }
    cfg_json = json.dumps(cfg, ensure_ascii=False)
    components.html(
        f"""
        <div style="font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;">
          <div id="voiceStatus" style="margin-bottom: 8px; color: #374151;"></div>
          <div style="display: flex; flex-wrap: wrap; gap: 8px; margin-bottom: 10px;">
            <button id="startBtn" style="border-radius: 8px; padding: 8px 12px; border: 1px solid #4f46e5; background: #4f46e5; color: white;">Start</button>
            <button id="stopBtn" style="border-radius: 8px; padding: 8px 12px; border: 1px solid #6b7280; background: white; color: #111827;">Stop</button>
            <button id="copyBtn" style="border-radius: 8px; padding: 8px 12px; border: 1px solid #6b7280; background: white; color: #111827;">Copy</button>
            <button id="clearBtn" style="border-radius: 8px; padding: 8px 12px; border: 1px solid #6b7280; background: white; color: #111827;">Clear</button>
          </div>
          <label for="transcriptBox" style="display:block; font-weight: 600; margin-bottom: 6px;">Transcript</label>
          <textarea id="transcriptBox" style="width: 100%; min-height: 150px; border: 1px solid #d1d5db; border-radius: 8px; padding: 10px; font-size: 15px; line-height: 1.45;"></textarea>
        </div>
        <script>
        const cfg = {cfg_json};
        const statusEl = document.getElementById("voiceStatus");
        const transcriptEl = document.getElementById("transcriptBox");
        const startBtn = document.getElementById("startBtn");
        const stopBtn = document.getElementById("stopBtn");
        const copyBtn = document.getElementById("copyBtn");
        const clearBtn = document.getElementById("clearBtn");
        const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;

        startBtn.textContent = cfg.start;
        stopBtn.textContent = cfg.stop;
        copyBtn.textContent = cfg.copy;
        clearBtn.textContent = cfg.clear;
        document.querySelector("label[for='transcriptBox']").textContent = cfg.transcript;

        let recognition = null;
        let finalTranscript = "";

        if (!SpeechRecognition) {{
          statusEl.textContent = cfg.unsupported;
          startBtn.disabled = true;
          stopBtn.disabled = true;
        }} else {{
          recognition = new SpeechRecognition();
          recognition.lang = cfg.lang;
          recognition.continuous = true;
          recognition.interimResults = true;

          recognition.onstart = () => {{
            statusEl.textContent = "Listening...";
          }};
          recognition.onerror = (event) => {{
            statusEl.textContent = "Voice capture stopped: " + event.error;
          }};
          recognition.onend = () => {{
            statusEl.textContent = "Stopped. Copy the transcript, then paste it below.";
          }};
          recognition.onresult = (event) => {{
            let interim = "";
            for (let i = event.resultIndex; i < event.results.length; i++) {{
              const piece = event.results[i][0].transcript;
              if (event.results[i].isFinal) {{
                finalTranscript += piece + " ";
              }} else {{
                interim += piece;
              }}
            }}
            transcriptEl.value = (finalTranscript + interim).trim();
          }};
        }}

        startBtn.onclick = () => {{
          if (recognition) recognition.start();
        }};
        stopBtn.onclick = () => {{
          if (recognition) recognition.stop();
        }};
        copyBtn.onclick = async () => {{
          try {{
            await navigator.clipboard.writeText(transcriptEl.value);
            statusEl.textContent = "Copied. Paste it into the Streamlit box below.";
          }} catch (err) {{
            transcriptEl.select();
            document.execCommand("copy");
            statusEl.textContent = "Copied. Paste it into the Streamlit box below.";
          }}
        }};
        clearBtn.onclick = () => {{
          finalTranscript = "";
          transcriptEl.value = "";
          statusEl.textContent = "";
        }};
        </script>
        """,
        height=310,
    )


def render(lang: str) -> None:
    _init_state()

    st.header(t("voice_header", lang))
    st.caption(t("voice_sub", lang))
    st.info(t("voice_copy_note", lang))
    _render_voice_component(lang)

    note = st.text_area(
        t("voice_note_label", lang),
        value=st.session_state[NOTE_KEY],
        placeholder=t("voice_paste_placeholder", lang),
        height=140,
        key="voice_note_input",
    )

    cols = st.columns([1, 1, 3])
    with cols[0]:
        save_clicked = st.button(t("voice_save_button", lang), key="voice_save_button", type="secondary")
    with cols[1]:
        ask_clicked = st.button(t("voice_ask_button", lang), key="voice_ask_button", type="primary")

    if save_clicked and note.strip():
        st.session_state[NOTE_KEY] = note.strip()
        _append_history("user", note.strip())
        st.success(t("voice_saved", lang))

    if ask_clicked and note.strip():
        if check_crisis(note):
            render_crisis_banner(lang)
            return
        st.session_state[NOTE_KEY] = note.strip()
        _append_history("user", note.strip())
        with st.spinner("..."):
            try:
                response = chat(
                    module=MODULE_NAME,
                    user_text=note.strip(),
                    language_name=claude_language_name(lang),
                    max_tokens=1200,
                )
            except Exception as e:
                response = f"(Could not reach the model right now: {e})"
        st.session_state[RESPONSE_KEY] = response
        _append_history("assistant", response)

    if st.session_state[RESPONSE_KEY]:
        st.markdown(st.session_state[RESPONSE_KEY])

    st.markdown(f"##### {t('voice_history_heading', lang)}")
    history: List[Dict[str, str]] = st.session_state.get(HISTORY_KEY, [])
    if not history:
        st.caption(t("voice_no_history", lang))
    for msg in history[-6:]:
        with st.chat_message(msg.get("role", "assistant")):
            st.markdown(msg.get("content", ""))