umaradnaan commited on
Commit
bfc42c8
·
verified ·
1 Parent(s): 1623ce9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -290
app.py CHANGED
@@ -1,314 +1,135 @@
1
- # app.py
2
  import streamlit as st
3
-
4
- # -----------------------------
5
- # Fix/session-state initialization for streamlit-webrtc internal callbacks
6
- # (prevents: st.session_state has no attribute "_components_callbacks")
7
- # -----------------------------
8
- if "_components_callbacks" not in st.session_state:
9
- st.session_state["_components_callbacks"] = {}
10
- # streamlit-webrtc also expects an ordered list mapping; initialize conservatively
11
- if "_component_value" not in st.session_state:
12
- st.session_state["_component_value"] = {}
13
-
14
- # now import the rest
15
- import math
16
- import re
17
- import time
18
- import queue
19
  import numpy as np
20
  import speech_recognition as sr
21
- from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
22
-
23
- # -----------------------------
24
- # Helper: Spoken → expression parser
25
- # -----------------------------
26
- SIMPLE = {
27
- "zero":0,"one":1,"two":2,"three":3,"four":4,"five":5,
28
- "six":6,"seven":7,"eight":8,"nine":9,"ten":10,
29
- "eleven":11,"twelve":12,"thirteen":13,"fourteen":14,
30
- "fifteen":15,"sixteen":16,"seventeen":17,"eighteen":18,
31
- "nineteen":19,"twenty":20,"thirty":30,"forty":40,"fifty":50,
32
- "sixty":60,"seventy":70,"eighty":80,"ninety":90
33
- }
34
- SCALE = {"hundred":100, "thousand":1000, "million":1000000}
35
- OPERATORS = {
36
- "plus":"+","add":"+","added":"+",
37
- "minus":"-","subtract":"-","less":"-",
38
- "into":"*","times":"*","multiply":"*","x":"*",
39
- "divide":"/","divided":"/","over":"/","by":"/"
40
- }
41
- FUNCTION_WORDS = {
42
- "sin":"math.sin(", "sine":"math.sin(",
43
- "cos":"math.cos(", "cosine":"math.cos(",
44
- "tan":"math.tan(", "tangent":"math.tan(",
45
- "log":"math.log10(", "ln":"math.log(",
46
- "sqrt":"math.sqrt(", "square root":"math.sqrt("
 
47
  }
48
- SUFFIXES = {"square":"**2","squared":"**2","cube":"**3","cubed":"**3","factorial":"!"}
49
- EQUAL_WORDS = {"equal","equals","equal to","="}
50
- IGNORES = {"of","the","and","to","a","please"}
51
 
52
- def number_words_to_str(tokens):
53
- if not tokens:
54
- return ""
55
- # numeric tokens case
56
- if all(re.fullmatch(r"\d+(\.\d+)?", t) for t in tokens):
57
- return "".join(tokens)
58
- total=0; current=0; i=0; decimal_mode=False; decimal_digits=[]
59
- while i < len(tokens):
60
- t = tokens[i]
61
- if t in ("point","dot"):
62
- decimal_mode=True; i+=1
63
- while i < len(tokens):
64
- d = tokens[i]
65
- if d in SIMPLE:
66
- decimal_digits.append(str(SIMPLE[d]))
67
- elif re.fullmatch(r"\d", d):
68
- decimal_digits.append(d)
69
- else:
70
- break
71
- i += 1
72
- break
73
- if t in SIMPLE:
74
- current += SIMPLE[t]
75
- elif t == "hundred":
76
- if current == 0: current = 1
77
- current *= 100
78
- elif t in ("thousand","million"):
79
- scale_val = SCALE[t]
80
- if current == 0: current = 1
81
- total += current * scale_val
82
- current = 0
83
- elif re.fullmatch(r"\d+(\.\d+)?", t):
84
- current = current * 10 + int(float(t))
85
- else:
86
- break
87
- i += 1
88
- total += current
89
- if decimal_mode:
90
- return f"{total}.{''.join(decimal_digits) if decimal_digits else '0'}"
91
- return str(total)
92
 
93
- def parse_spoken_to_expr(text: str) -> str:
94
- text = text.lower()
95
- # remove filler words early, but keep needed phrases
96
- text = text.replace("to the power of", " power ")
97
- text = text.replace("to the power", " power ")
98
- text = text.replace("power of", " power ")
99
- text = text.replace("raised to the power of", " power ")
100
- text = text.replace("square root of", " sqrt ")
101
- text = re.sub(r"\b(of|the|and|please|a)\b", " ", text)
102
- for eq in EQUAL_WORDS:
103
- text = text.replace(eq, " ")
104
- raw = [t for t in re.split(r"\s+", text) if t]
105
- parts=[]; num_buf=[]; i=0
106
- while i < len(raw):
107
- w = raw[i]
108
- if w in OPERATORS:
109
- if num_buf:
110
- parts.append(number_words_to_str(num_buf)); num_buf=[]
111
- parts.append(OPERATORS[w]); i+=1; continue
112
- if w in SIMPLE or w in SCALE or re.fullmatch(r"\d+(\.\d+)?", w) or w in ("point","dot"):
113
- num_buf.append(w); i+=1; continue
114
- if w in FUNCTION_WORDS:
115
- if num_buf:
116
- parts.append(number_words_to_str(num_buf)); num_buf=[]
117
- func = FUNCTION_WORDS[w]
118
- # try immediate number after function
119
- j=i+1; arg_buf=[]
120
- while j < len(raw) and (raw[j] in SIMPLE or raw[j] in SCALE or re.fullmatch(r"\d+(\.\d+)?", raw[j]) or raw[j] in ("point","dot")):
121
- arg_buf.append(raw[j]); j+=1
122
- if arg_buf:
123
- parts.append(f"{func}{number_words_to_str(arg_buf)})"); i=j; continue
124
- else:
125
- parts.append(func); i+=1; continue
126
- if w == "reciprocal":
127
- if num_buf:
128
- parts.append(number_words_to_str(num_buf)); num_buf=[]
129
- j=i+1; arg_buf=[]
130
- while j < len(raw) and (raw[j] in SIMPLE or raw[j] in SCALE or re.fullmatch(r"\d+(\.\d+)?", raw[j]) or raw[j] in ("point","dot")):
131
- arg_buf.append(raw[j]); j+=1
132
- if arg_buf:
133
- parts.append(f"(1/({number_words_to_str(arg_buf)}))"); i=j; continue
134
- else:
135
- i+=1; continue
136
- if w in ("power","^","**","to"):
137
- if num_buf:
138
- parts.append(number_words_to_str(num_buf)); num_buf=[]
139
- parts.append("**"); i+=1; continue
140
- if w in SUFFIXES:
141
- if num_buf:
142
- parts.append(number_words_to_str(num_buf)); num_buf=[]
143
- parts.append(SUFFIXES[w]); i+=1; continue
144
- if w in ("percent","percentage","%"):
145
- if num_buf:
146
- parts.append(number_words_to_str(num_buf)); num_buf=[]
147
- parts.append("/100"); i+=1; continue
148
- # unknown token flush number buffer then skip
149
- if num_buf:
150
- parts.append(number_words_to_str(num_buf)); num_buf=[]
151
- i += 1
152
- if num_buf:
153
- parts.append(number_words_to_str(num_buf)); num_buf=[]
154
- expr = "".join(parts)
155
- expr = re.sub(r"\s+", "", expr).strip()
156
- return expr
157
 
158
- # -----------------------------
159
- # Safe evaluator mapping
160
- # -----------------------------
161
- ALLOWED = {
162
- "sin": math.sin, "cos": math.cos, "tan": math.tan,
163
- "log": lambda x: math.log10(x), "ln": lambda x: math.log(x),
164
- "sqrt": math.sqrt, "factorial": math.factorial
165
- }
166
 
167
- def safe_eval(expr: str):
168
- if not expr or expr.strip() == "":
169
- raise ValueError("Empty expression")
170
- # convert n! -> factorial(n)
171
- expr2 = re.sub(r"(\d+(\.\d+)?|\([^\)]+\))\!", r"factorial(\1)", expr)
172
- expr2 = expr2.replace("%", "/100")
173
- if not re.fullmatch(r"[0-9a-zA-Z_\+\-\*\/\.\(\),%!]+", expr2):
174
- raise ValueError("Invalid characters")
175
- return eval(expr2, {"__builtins__": None}, ALLOWED)
176
 
177
- # -----------------------------
178
- # Audio processor for streamlit-webrtc
179
- # -----------------------------
180
- class AudioProcessor(AudioProcessorBase):
181
- def __init__(self):
182
- self.q = queue.Queue()
183
 
184
- def recv_audio(self, frame):
185
- # frame.to_ndarray() returns shape (n_channels, n_samples)
186
- arr = frame.to_ndarray()
187
- # convert to mono if needed
188
- if arr.ndim > 1:
189
- arr = np.mean(arr, axis=0)
190
- self.q.put(arr.astype(np.float32))
191
- return frame
192
 
193
- # -----------------------------
194
- # Streamlit UI + webrtc
195
- # -----------------------------
196
- st.set_page_config(page_title="Voice Scientific Calculator", layout="wide")
197
- st.markdown("<h2>🎙️ Voice Scientific Calculator — Continuous (auto-transcribe)</h2>", unsafe_allow_html=True)
198
- st.markdown("Say math naturally and say **equal / equals / equal to** to evaluate. This runs continuously (short chunks are transcribed).")
199
 
200
- if "history" not in st.session_state:
201
- st.session_state.history = []
 
 
 
 
 
 
 
 
202
 
203
- # Start the webrtc streamer (SENDONLY: we only send audio from browser to python)
204
- webrtc_ctx = webrtc_streamer(
205
- key="voice-calculator",
206
- mode=WebRtcMode.SENDONLY,
207
- audio_processor_factory=AudioProcessor,
208
- media_stream_constraints={"audio": True, "video": False},
209
- async_processing=True,
210
- desired_playing_state=True
211
- )
212
 
213
- transcript_box = st.empty()
214
- expr_box = st.empty()
215
- result_box = st.empty()
216
- status_box = st.empty()
217
 
 
218
  recognizer = sr.Recognizer()
219
 
220
- # Buffer to accumulate audio chunks (per session)
221
- if "audio_buffer" not in st.session_state:
222
- st.session_state.audio_buffer = []
223
 
224
- # Main loop: poll audio queue and try to transcribe short chunks
225
- if webrtc_ctx and webrtc_ctx.state.playing:
226
- status_box.info("🎧 Listening... (auto-transcribe). Say 'equal' to compute.")
227
- processor = webrtc_ctx.audio_processor
228
- if processor:
229
- try:
230
- # drain up to some frames from processor.q
231
- collected = []
232
- while True:
233
- # non-blocking get
234
- arr = processor.q.get_nowait()
235
- collected.append(arr)
236
- # stop after collecting enough
237
- if sum(a.size for a in collected) > 16000 * 2: # ~2 seconds at 16k
238
- break
239
- except queue.Empty:
240
- collected = collected # may be empty or partial
241
 
242
- if collected:
243
- # concatenate and convert to 16k mono int16 PCM bytes for SpeechRecognition
244
- audio_float = np.concatenate(collected)
245
- # normalize to int16
246
- audio_int16 = np.int16(np.clip(audio_float * 32767, -32768, 32767))
247
- import io, wave
248
- bio = io.BytesIO()
249
- wf = wave.open(bio, 'wb')
250
- wf.setnchannels(1)
251
- wf.setsampwidth(2)
252
- wf.setframerate(16000)
253
- wf.writeframes(audio_int16.tobytes())
254
- wf.close()
255
- wav_bytes = bio.getvalue()
256
 
257
- # send to SpeechRecognition
258
- audio_data = sr.AudioData(wav_bytes, 16000, 2)
259
- try:
260
- text = recognizer.recognize_google(audio_data)
261
- except sr.UnknownValueError:
262
- text = ""
263
- except sr.RequestError as e:
264
- status_box.error(f"Speech API error: {e}")
265
- text = ""
266
 
267
- if text:
268
- # append to rolling transcript
269
- prev = st.session_state.get("rolling_transcript", "")
270
- prev = (prev + " " + text).strip()
271
- st.session_state.rolling_transcript = prev
272
- transcript_box.markdown(f"**Transcript:** {prev}")
273
 
274
- # if user said 'equal' in the newest chunk, evaluate
275
- if any(eq in text.lower() for eq in EQUAL_WORDS):
276
- # parse the whole rolling transcript
277
- expr = parse_spoken_to_expr(prev)
278
- expr_box.markdown(f"**Expression:** `{expr}`")
279
- try:
280
- value = safe_eval(expr)
281
- result_box.success(f"Result: {value}")
282
- # store history
283
- st.session_state.history.append({
284
- "time": time.strftime("%Y-%m-%d %H:%M:%S"),
285
- "transcript": prev,
286
- "expression": expr,
287
- "result": str(value)
288
- })
289
- # reset rolling transcript
290
- st.session_state.rolling_transcript = ""
291
- except Exception as e:
292
- result_box.error(f"Eval error: {e}")
293
- st.session_state.rolling_transcript = ""
294
 
295
- # Controls / History UI
296
- st.markdown("---")
297
- c1, c2 = st.columns([1,1])
298
- with c1:
299
- if st.button("Clear History"):
300
- st.session_state.history = []
301
- st.success("History cleared")
302
- with c2:
303
- if st.session_state.history:
304
- import pandas as pd, base64, io
305
- df = pd.DataFrame(st.session_state.history)
306
- csv = df.to_csv(index=False).encode()
307
- st.download_button("Download history CSV", csv, file_name="history.csv")
308
 
309
- st.markdown("### History")
310
- if st.session_state.history:
311
- for it in reversed(st.session_state.history[-40:]):
312
- st.markdown(f"**{it['time']}** — `{it['transcript']}` → `{it['expression']}` = **{it['result']}**")
313
- else:
314
- st.info("No history yet.")
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import numpy as np
3
  import speech_recognition as sr
4
+ import base64
5
+ import re
6
+ import math
7
+ import io
8
+ import time
9
+ import pandas as pd
10
+
11
+ st.set_page_config(page_title="Voice Calculator", layout="wide")
12
+
13
+ # ---------------- UI ----------------
14
+ st.markdown("<h2>🎙️ Voice Scientific Calculator (HF Spaces Compatible)</h2>", unsafe_allow_html=True)
15
+ st.markdown("Record voice → auto convert → evaluate when you say **equal**")
16
+
17
+ # ---------------- Recorder UI (JS) ----------------
18
+ record_js = """
19
+ <script>
20
+ let chunks = [];
21
+ let mediaRecorder;
22
+
23
+ function startRecording() {
24
+ navigator.mediaDevices.getUserMedia({ audio: true })
25
+ .then(stream => {
26
+ mediaRecorder = new MediaRecorder(stream);
27
+ mediaRecorder.start();
28
+ chunks = [];
29
+ mediaRecorder.ondataavailable = e => { chunks.push(e.data); };
30
+ });
31
  }
 
 
 
32
 
33
+ function stopRecording() {
34
+ mediaRecorder.stop();
35
+ mediaRecorder.onstop = e => {
36
+ let blob = new Blob(chunks, { type: 'audio/webm' });
37
+ let reader = new FileReader();
38
+ reader.readAsDataURL(blob);
39
+ reader.onloadend = () => {
40
+ let base64data = reader.result.split(',')[1];
41
+ const streamlitEvent = new CustomEvent("streamlit:audio_recorded", {
42
+ detail: { data: base64data }
43
+ });
44
+ window.parent.document.dispatchEvent(streamlitEvent);
45
+ };
46
+ };
47
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ </script>
50
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
+ st.markdown(record_js, unsafe_allow_html=True)
 
 
 
 
 
 
 
53
 
54
+ st.button("🎙 Start Recording", on_click=lambda: st.session_state.update({"record": "start"}))
55
+ st.button("⏹ Stop & Process", on_click=lambda: st.session_state.update({"record": "stop"}))
 
 
 
 
 
 
 
56
 
57
+ # ---------------------------------------
58
+ # LISTEN FOR JS → PYTHON AUDIO TRANSFER
59
+ # ---------------------------------------
60
+ audio_slot = st.empty()
 
 
61
 
62
+ if "audio" not in st.session_state:
63
+ st.session_state.audio = None
 
 
 
 
 
 
64
 
65
+ def on_audio_received():
66
+ pass
 
 
 
 
67
 
68
+ # Inject listener
69
+ audio_script = """
70
+ <script>
71
+ window.parent.document.addEventListener("streamlit:audio_recorded", (e) => {
72
+ const data = e.detail.data;
73
+ window.parent.postMessage({isStreamlitMessage: true, type: "streamlit:setComponentValue", value: data}, "*");
74
+ });
75
+ </script>
76
+ """
77
+ audio_slot.markdown(audio_script, unsafe_allow_html=True)
78
 
79
+ audio_data = st.experimental_get_query_params().get("componentValue", [None])[0]
 
 
 
 
 
 
 
 
80
 
81
+ if audio_data:
82
+ st.session_state.audio = audio_data
 
 
83
 
84
+ # ---------------- PROCESS AUDIO ----------------
85
  recognizer = sr.Recognizer()
86
 
87
+ def decode_audio(b64):
88
+ raw = base64.b64decode(b64)
89
+ return raw
90
 
91
+ if st.session_state.audio:
92
+ st.info("Processing audio…")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
+ audio_bytes = decode_audio(st.session_state.audio)
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ audio = sr.AudioData(audio_bytes, 48000, 2)
97
+ try:
98
+ text = recognizer.recognize_google(audio)
99
+ except:
100
+ text = ""
 
 
 
 
101
 
102
+ st.success(f"Transcript: **{text}**")
 
 
 
 
 
103
 
104
+ # EXPRESSION PARSER (simple version)
105
+ text_lower = text.lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
+ # Replace words
108
+ ops = {
109
+ "plus": "+",
110
+ "minus": "-",
111
+ "times": "*",
112
+ "into": "*",
113
+ "x": "*",
114
+ "divide": "/",
115
+ "divided by": "/",
116
+ "power": "**",
117
+ "to the power of": "**",
118
+ "square root of": "math.sqrt",
119
+ }
120
 
121
+ expr = text_lower
122
+ for k, v in ops.items():
123
+ expr = expr.replace(k, v)
124
+
125
+ expr = re.sub(r"[^0-9\+\-\*\/\.\(\)a-z ]", "", expr)
126
+
127
+ if "equal" in expr:
128
+ expr = expr.replace("equal", "")
129
+ st.write(f"Expression: `{expr}`")
130
+
131
+ try:
132
+ result = eval(expr, {"math": math, "__builtins__": {}})
133
+ st.success(f"Result = **{result}**")
134
+ except Exception as e:
135
+ st.error(f"Error: {e}")