umaradnaan commited on
Commit
1623ce9
·
verified ·
1 Parent(s): 9903cca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +283 -114
app.py CHANGED
@@ -1,145 +1,314 @@
 
1
  import streamlit as st
2
- import speech_recognition as sr
3
- import numpy as np
4
- from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
5
- import queue
 
 
 
 
 
 
 
 
6
  import math
7
  import re
 
 
 
 
 
8
 
9
- # -----------------------------------------
10
- # Expression Fixing / Parsing
11
- # -----------------------------------------
12
- def spoken_to_expression(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  text = text.lower()
14
-
15
- replacements = {
16
- r"plus": "+",
17
- r"minus": "-",
18
- r"(into|times|multiply|multiplied by)": "*",
19
- r"(divided by|divide)": "/",
20
- r"(to the power of|power of|power)": "**",
21
- r"square root of": "math.sqrt",
22
- r"root of": "math.sqrt",
23
- r"sin": "math.sin",
24
- r"cos": "math.cos",
25
- r"tan": "math.tan",
26
- r"log": "math.log10",
27
- r"ln": "math.log",
28
- }
29
-
30
- for pattern, repl in replacements.items():
31
- text = re.sub(pattern, repl, text)
32
-
33
- text = re.sub(r"point", ".", text)
34
-
35
- text = re.sub(r"[a-zA-Z]+", "", text)
36
-
37
- return text.strip()
38
-
39
-
40
- # -----------------------------------------
41
- # WebRTC Audio Processor
42
- # -----------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  class AudioProcessor(AudioProcessorBase):
44
  def __init__(self):
45
  self.q = queue.Queue()
46
 
47
  def recv_audio(self, frame):
48
- audio = frame.to_ndarray().flatten().astype(np.float32)
49
- self.q.put(audio)
 
 
 
 
50
  return frame
51
 
 
 
 
 
 
 
52
 
53
- # -----------------------------------------
54
- # UI DESIGN
55
- # -----------------------------------------
56
- st.set_page_config(page_title="Voice Calculator", layout="wide")
57
-
58
- st.markdown("""
59
- <h1 style='text-align:center;color:white;'>🎙️ Voice Scientific Calculator</h1>
60
- """, unsafe_allow_html=True)
61
-
62
- st.markdown("""
63
- <div style="background:#111;padding:20px;border-radius:15px;color:white;">
64
- Speak expressions like:
65
- - 10 plus 20
66
- - 10 to the power of 2
67
- - square root of 81
68
- - sin 45
69
- - ... then say **equal**
70
- </div>
71
- """, unsafe_allow_html=True)
72
-
73
-
74
- # -----------------------------------------
75
- # WebRTC
76
- # -----------------------------------------
77
-
78
- st.subheader("🎤 Voice Input")
79
 
 
80
  webrtc_ctx = webrtc_streamer(
81
- key="voice",
82
  mode=WebRtcMode.SENDONLY,
83
  audio_processor_factory=AudioProcessor,
84
  media_stream_constraints={"audio": True, "video": False},
 
 
85
  )
86
 
87
- recognizer = sr.Recognizer()
88
-
89
  transcript_box = st.empty()
90
  expr_box = st.empty()
91
  result_box = st.empty()
 
92
 
93
- buffer_audio = []
94
-
95
- # -----------------------------------------
96
- # Audio Capture + Recognition
97
- # -----------------------------------------
98
-
99
- def convert_to_wav(float_audio):
100
- import io
101
- import wave
102
-
103
- bio = io.BytesIO()
104
- wav = wave.open(bio, "wb")
105
- wav.setnchannels(1)
106
- wav.setsampwidth(2)
107
- wav.setframerate(16000)
108
- wav.writeframes((float_audio * 32767).astype(np.int16).tobytes())
109
- wav.close()
110
- return bio.getvalue()
111
 
 
 
 
112
 
 
113
  if webrtc_ctx and webrtc_ctx.state.playing:
 
114
  processor = webrtc_ctx.audio_processor
115
-
116
  if processor:
117
  try:
118
- audio_chunk = processor.q.get(timeout=1)
119
- buffer_audio.extend(audio_chunk)
120
-
121
- audio_data = np.array(buffer_audio, dtype=np.float32)
122
-
123
- if len(audio_data) > 16000 * 4:
124
- wav_bytes = convert_to_wav(audio_data)
125
- audio_source = sr.AudioData(wav_bytes, 16000, 2)
126
-
127
- try:
128
- text = recognizer.recognize_google(audio_source)
129
- transcript_box.markdown(f"### 🎧 Transcript\n{text}")
130
-
131
- if "equal" in text.lower():
132
- expr = spoken_to_expression(text)
133
- expr_box.markdown(f"### 🧮 Expression\n`{expr}`")
134
-
135
- try:
136
- result = eval(expr)
137
- result_box.markdown(f"### 📊 Result\n**{result}**")
138
- except Exception as e:
139
- result_box.markdown(f"### 📊 Result\n❌ Error: {e}")
140
-
141
- except Exception:
142
- pass
143
-
144
  except queue.Empty:
145
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
  import streamlit as st
3
+
4
+ # -----------------------------
5
+ # Fix/session-state initialization for streamlit-webrtc internal callbacks
6
+ # (prevents: st.session_state has no attribute "_components_callbacks")
7
+ # -----------------------------
8
+ if "_components_callbacks" not in st.session_state:
9
+ st.session_state["_components_callbacks"] = {}
10
+ # streamlit-webrtc also expects an ordered list mapping; initialize conservatively
11
+ if "_component_value" not in st.session_state:
12
+ st.session_state["_component_value"] = {}
13
+
14
+ # now import the rest
15
  import math
16
  import re
17
+ import time
18
+ import queue
19
+ import numpy as np
20
+ import speech_recognition as sr
21
+ from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
22
 
23
+ # -----------------------------
24
+ # Helper: Spoken expression parser
25
+ # -----------------------------
26
+ SIMPLE = {
27
+ "zero":0,"one":1,"two":2,"three":3,"four":4,"five":5,
28
+ "six":6,"seven":7,"eight":8,"nine":9,"ten":10,
29
+ "eleven":11,"twelve":12,"thirteen":13,"fourteen":14,
30
+ "fifteen":15,"sixteen":16,"seventeen":17,"eighteen":18,
31
+ "nineteen":19,"twenty":20,"thirty":30,"forty":40,"fifty":50,
32
+ "sixty":60,"seventy":70,"eighty":80,"ninety":90
33
+ }
34
+ SCALE = {"hundred":100, "thousand":1000, "million":1000000}
35
+ OPERATORS = {
36
+ "plus":"+","add":"+","added":"+",
37
+ "minus":"-","subtract":"-","less":"-",
38
+ "into":"*","times":"*","multiply":"*","x":"*",
39
+ "divide":"/","divided":"/","over":"/","by":"/"
40
+ }
41
+ FUNCTION_WORDS = {
42
+ "sin":"math.sin(", "sine":"math.sin(",
43
+ "cos":"math.cos(", "cosine":"math.cos(",
44
+ "tan":"math.tan(", "tangent":"math.tan(",
45
+ "log":"math.log10(", "ln":"math.log(",
46
+ "sqrt":"math.sqrt(", "square root":"math.sqrt("
47
+ }
48
+ SUFFIXES = {"square":"**2","squared":"**2","cube":"**3","cubed":"**3","factorial":"!"}
49
+ EQUAL_WORDS = {"equal","equals","equal to","="}
50
+ IGNORES = {"of","the","and","to","a","please"}
51
+
52
+ def number_words_to_str(tokens):
53
+ if not tokens:
54
+ return ""
55
+ # numeric tokens case
56
+ if all(re.fullmatch(r"\d+(\.\d+)?", t) for t in tokens):
57
+ return "".join(tokens)
58
+ total=0; current=0; i=0; decimal_mode=False; decimal_digits=[]
59
+ while i < len(tokens):
60
+ t = tokens[i]
61
+ if t in ("point","dot"):
62
+ decimal_mode=True; i+=1
63
+ while i < len(tokens):
64
+ d = tokens[i]
65
+ if d in SIMPLE:
66
+ decimal_digits.append(str(SIMPLE[d]))
67
+ elif re.fullmatch(r"\d", d):
68
+ decimal_digits.append(d)
69
+ else:
70
+ break
71
+ i += 1
72
+ break
73
+ if t in SIMPLE:
74
+ current += SIMPLE[t]
75
+ elif t == "hundred":
76
+ if current == 0: current = 1
77
+ current *= 100
78
+ elif t in ("thousand","million"):
79
+ scale_val = SCALE[t]
80
+ if current == 0: current = 1
81
+ total += current * scale_val
82
+ current = 0
83
+ elif re.fullmatch(r"\d+(\.\d+)?", t):
84
+ current = current * 10 + int(float(t))
85
+ else:
86
+ break
87
+ i += 1
88
+ total += current
89
+ if decimal_mode:
90
+ return f"{total}.{''.join(decimal_digits) if decimal_digits else '0'}"
91
+ return str(total)
92
+
93
+ def parse_spoken_to_expr(text: str) -> str:
94
  text = text.lower()
95
+ # remove filler words early, but keep needed phrases
96
+ text = text.replace("to the power of", " power ")
97
+ text = text.replace("to the power", " power ")
98
+ text = text.replace("power of", " power ")
99
+ text = text.replace("raised to the power of", " power ")
100
+ text = text.replace("square root of", " sqrt ")
101
+ text = re.sub(r"\b(of|the|and|please|a)\b", " ", text)
102
+ for eq in EQUAL_WORDS:
103
+ text = text.replace(eq, " ")
104
+ raw = [t for t in re.split(r"\s+", text) if t]
105
+ parts=[]; num_buf=[]; i=0
106
+ while i < len(raw):
107
+ w = raw[i]
108
+ if w in OPERATORS:
109
+ if num_buf:
110
+ parts.append(number_words_to_str(num_buf)); num_buf=[]
111
+ parts.append(OPERATORS[w]); i+=1; continue
112
+ if w in SIMPLE or w in SCALE or re.fullmatch(r"\d+(\.\d+)?", w) or w in ("point","dot"):
113
+ num_buf.append(w); i+=1; continue
114
+ if w in FUNCTION_WORDS:
115
+ if num_buf:
116
+ parts.append(number_words_to_str(num_buf)); num_buf=[]
117
+ func = FUNCTION_WORDS[w]
118
+ # try immediate number after function
119
+ j=i+1; arg_buf=[]
120
+ while j < len(raw) and (raw[j] in SIMPLE or raw[j] in SCALE or re.fullmatch(r"\d+(\.\d+)?", raw[j]) or raw[j] in ("point","dot")):
121
+ arg_buf.append(raw[j]); j+=1
122
+ if arg_buf:
123
+ parts.append(f"{func}{number_words_to_str(arg_buf)})"); i=j; continue
124
+ else:
125
+ parts.append(func); i+=1; continue
126
+ if w == "reciprocal":
127
+ if num_buf:
128
+ parts.append(number_words_to_str(num_buf)); num_buf=[]
129
+ j=i+1; arg_buf=[]
130
+ while j < len(raw) and (raw[j] in SIMPLE or raw[j] in SCALE or re.fullmatch(r"\d+(\.\d+)?", raw[j]) or raw[j] in ("point","dot")):
131
+ arg_buf.append(raw[j]); j+=1
132
+ if arg_buf:
133
+ parts.append(f"(1/({number_words_to_str(arg_buf)}))"); i=j; continue
134
+ else:
135
+ i+=1; continue
136
+ if w in ("power","^","**","to"):
137
+ if num_buf:
138
+ parts.append(number_words_to_str(num_buf)); num_buf=[]
139
+ parts.append("**"); i+=1; continue
140
+ if w in SUFFIXES:
141
+ if num_buf:
142
+ parts.append(number_words_to_str(num_buf)); num_buf=[]
143
+ parts.append(SUFFIXES[w]); i+=1; continue
144
+ if w in ("percent","percentage","%"):
145
+ if num_buf:
146
+ parts.append(number_words_to_str(num_buf)); num_buf=[]
147
+ parts.append("/100"); i+=1; continue
148
+ # unknown token flush number buffer then skip
149
+ if num_buf:
150
+ parts.append(number_words_to_str(num_buf)); num_buf=[]
151
+ i += 1
152
+ if num_buf:
153
+ parts.append(number_words_to_str(num_buf)); num_buf=[]
154
+ expr = "".join(parts)
155
+ expr = re.sub(r"\s+", "", expr).strip()
156
+ return expr
157
+
158
+ # -----------------------------
159
+ # Safe evaluator mapping
160
+ # -----------------------------
161
+ ALLOWED = {
162
+ "sin": math.sin, "cos": math.cos, "tan": math.tan,
163
+ "log": lambda x: math.log10(x), "ln": lambda x: math.log(x),
164
+ "sqrt": math.sqrt, "factorial": math.factorial
165
+ }
166
+
167
+ def safe_eval(expr: str):
168
+ if not expr or expr.strip() == "":
169
+ raise ValueError("Empty expression")
170
+ # convert n! -> factorial(n)
171
+ expr2 = re.sub(r"(\d+(\.\d+)?|\([^\)]+\))\!", r"factorial(\1)", expr)
172
+ expr2 = expr2.replace("%", "/100")
173
+ if not re.fullmatch(r"[0-9a-zA-Z_\+\-\*\/\.\(\),%!]+", expr2):
174
+ raise ValueError("Invalid characters")
175
+ return eval(expr2, {"__builtins__": None}, ALLOWED)
176
+
177
+ # -----------------------------
178
+ # Audio processor for streamlit-webrtc
179
+ # -----------------------------
180
  class AudioProcessor(AudioProcessorBase):
181
  def __init__(self):
182
  self.q = queue.Queue()
183
 
184
  def recv_audio(self, frame):
185
+ # frame.to_ndarray() returns shape (n_channels, n_samples)
186
+ arr = frame.to_ndarray()
187
+ # convert to mono if needed
188
+ if arr.ndim > 1:
189
+ arr = np.mean(arr, axis=0)
190
+ self.q.put(arr.astype(np.float32))
191
  return frame
192
 
193
+ # -----------------------------
194
+ # Streamlit UI + webrtc
195
+ # -----------------------------
196
+ st.set_page_config(page_title="Voice Scientific Calculator", layout="wide")
197
+ st.markdown("<h2>🎙️ Voice Scientific Calculator — Continuous (auto-transcribe)</h2>", unsafe_allow_html=True)
198
+ st.markdown("Say math naturally and say **equal / equals / equal to** to evaluate. This runs continuously (short chunks are transcribed).")
199
 
200
+ if "history" not in st.session_state:
201
+ st.session_state.history = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
+ # Start the webrtc streamer (SENDONLY: we only send audio from browser to python)
204
  webrtc_ctx = webrtc_streamer(
205
+ key="voice-calculator",
206
  mode=WebRtcMode.SENDONLY,
207
  audio_processor_factory=AudioProcessor,
208
  media_stream_constraints={"audio": True, "video": False},
209
+ async_processing=True,
210
+ desired_playing_state=True
211
  )
212
 
 
 
213
  transcript_box = st.empty()
214
  expr_box = st.empty()
215
  result_box = st.empty()
216
+ status_box = st.empty()
217
 
218
+ recognizer = sr.Recognizer()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
+ # Buffer to accumulate audio chunks (per session)
221
+ if "audio_buffer" not in st.session_state:
222
+ st.session_state.audio_buffer = []
223
 
224
+ # Main loop: poll audio queue and try to transcribe short chunks
225
  if webrtc_ctx and webrtc_ctx.state.playing:
226
+ status_box.info("🎧 Listening... (auto-transcribe). Say 'equal' to compute.")
227
  processor = webrtc_ctx.audio_processor
 
228
  if processor:
229
  try:
230
+ # drain up to some frames from processor.q
231
+ collected = []
232
+ while True:
233
+ # non-blocking get
234
+ arr = processor.q.get_nowait()
235
+ collected.append(arr)
236
+ # stop after collecting enough
237
+ if sum(a.size for a in collected) > 16000 * 2: # ~2 seconds at 16k
238
+ break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  except queue.Empty:
240
+ collected = collected # may be empty or partial
241
+
242
+ if collected:
243
+ # concatenate and convert to 16k mono int16 PCM bytes for SpeechRecognition
244
+ audio_float = np.concatenate(collected)
245
+ # normalize to int16
246
+ audio_int16 = np.int16(np.clip(audio_float * 32767, -32768, 32767))
247
+ import io, wave
248
+ bio = io.BytesIO()
249
+ wf = wave.open(bio, 'wb')
250
+ wf.setnchannels(1)
251
+ wf.setsampwidth(2)
252
+ wf.setframerate(16000)
253
+ wf.writeframes(audio_int16.tobytes())
254
+ wf.close()
255
+ wav_bytes = bio.getvalue()
256
+
257
+ # send to SpeechRecognition
258
+ audio_data = sr.AudioData(wav_bytes, 16000, 2)
259
+ try:
260
+ text = recognizer.recognize_google(audio_data)
261
+ except sr.UnknownValueError:
262
+ text = ""
263
+ except sr.RequestError as e:
264
+ status_box.error(f"Speech API error: {e}")
265
+ text = ""
266
+
267
+ if text:
268
+ # append to rolling transcript
269
+ prev = st.session_state.get("rolling_transcript", "")
270
+ prev = (prev + " " + text).strip()
271
+ st.session_state.rolling_transcript = prev
272
+ transcript_box.markdown(f"**Transcript:** {prev}")
273
+
274
+ # if user said 'equal' in the newest chunk, evaluate
275
+ if any(eq in text.lower() for eq in EQUAL_WORDS):
276
+ # parse the whole rolling transcript
277
+ expr = parse_spoken_to_expr(prev)
278
+ expr_box.markdown(f"**Expression:** `{expr}`")
279
+ try:
280
+ value = safe_eval(expr)
281
+ result_box.success(f"Result: {value}")
282
+ # store history
283
+ st.session_state.history.append({
284
+ "time": time.strftime("%Y-%m-%d %H:%M:%S"),
285
+ "transcript": prev,
286
+ "expression": expr,
287
+ "result": str(value)
288
+ })
289
+ # reset rolling transcript
290
+ st.session_state.rolling_transcript = ""
291
+ except Exception as e:
292
+ result_box.error(f"Eval error: {e}")
293
+ st.session_state.rolling_transcript = ""
294
+
295
+ # Controls / History UI
296
+ st.markdown("---")
297
+ c1, c2 = st.columns([1,1])
298
+ with c1:
299
+ if st.button("Clear History"):
300
+ st.session_state.history = []
301
+ st.success("History cleared")
302
+ with c2:
303
+ if st.session_state.history:
304
+ import pandas as pd, base64, io
305
+ df = pd.DataFrame(st.session_state.history)
306
+ csv = df.to_csv(index=False).encode()
307
+ st.download_button("Download history CSV", csv, file_name="history.csv")
308
+
309
+ st.markdown("### History")
310
+ if st.session_state.history:
311
+ for it in reversed(st.session_state.history[-40:]):
312
+ st.markdown(f"**{it['time']}** — `{it['transcript']}` → `{it['expression']}` = **{it['result']}**")
313
+ else:
314
+ st.info("No history yet.")