umaradnaan commited on
Commit
46b7ea5
·
verified ·
1 Parent(s): d52a822

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -374
app.py CHANGED
@@ -1,387 +1,145 @@
1
- # app.py
2
  import streamlit as st
3
  import speech_recognition as sr
 
 
 
4
  import math
5
  import re
6
- import time
7
- import pandas as pd
8
- import base64
9
-
10
- # -------------------------
11
- # Word maps
12
- # -------------------------
13
- SIMPLE = {
14
- "zero":0,"one":1,"two":2,"three":3,"four":4,"five":5,
15
- "six":6,"seven":7,"eight":8,"nine":9,"ten":10,
16
- "eleven":11,"twelve":12,"thirteen":13,"fourteen":14,
17
- "fifteen":15,"sixteen":16,"seventeen":17,"eighteen":18,
18
- "nineteen":19,"twenty":20,"thirty":30,"forty":40,"fifty":50,
19
- "sixty":60,"seventy":70,"eighty":80,"ninety":90
20
- }
21
- SCALE = {"hundred":100, "thousand":1000, "million":1000000}
22
-
23
- OPERATORS = {
24
- "plus": "+", "add": "+", "added": "+", "+": "+",
25
- "minus": "-", "subtract": "-", "less": "-", "-": "-",
26
- "into": "*", "times": "*", "multiply": "*", "x": "*", "*": "*",
27
- "divide": "/", "divided": "/", "over": "/", "by": "/", "/": "/",
28
- }
29
-
30
- FUNCTION_WORDS = {
31
- "sin": "sin(", "sine": "sin(",
32
- "cos": "cos(", "cosine": "cos(",
33
- "tan": "tan(", "tangent": "tan(",
34
- "log": "log(", # base-10
35
- "ln": "ln(",
36
- "sqrt": "sqrt(", "square root": "sqrt("
37
- }
38
-
39
- SUFFIXES = {
40
- "square": "**2", "squared": "**2",
41
- "cube": "**3", "cubed": "**3",
42
- "factorial": "!"
43
- }
44
-
45
- RECIPROCAL_WORDS = {"reciprocal", "reciprocal of", "one over", "one by"}
46
- EQUAL_WORDS = {"equal", "equals", "equal to", "=", "is"}
47
- IGNORES = {"of","the","and"}
48
-
49
- # -------------------------
50
- # number words -> numeric string
51
- # supports decimals (point/dot)
52
- # -------------------------
53
- def number_words_to_str(tokens):
54
- if not tokens:
55
- return ""
56
- # if already numeric tokens (digits), join them
57
- if all(re.fullmatch(r"\d+(\.\d+)?", t) for t in tokens):
58
- return "".join(tokens)
59
-
60
- total = 0
61
- current = 0
62
- i = 0
63
- decimal_mode = False
64
- decimal_digits = []
65
-
66
- while i < len(tokens):
67
- w = tokens[i]
68
- if w in ("point","dot"):
69
- decimal_mode = True
70
- i += 1
71
- while i < len(tokens):
72
- d = tokens[i]
73
- if d in SIMPLE:
74
- decimal_digits.append(str(SIMPLE[d]))
75
- elif re.fullmatch(r"\d", d):
76
- decimal_digits.append(d)
77
- else:
78
- break
79
- i += 1
80
- break
81
- if w in SIMPLE:
82
- current += SIMPLE[w]
83
- elif w == "hundred":
84
- if current == 0:
85
- current = 1
86
- current *= 100
87
- elif w in ("thousand","million"):
88
- scale_val = SCALE[w]
89
- if current == 0:
90
- current = 1
91
- total += current * scale_val
92
- current = 0
93
- elif re.fullmatch(r"\d+(\.\d+)?", w):
94
- # numeric token
95
- current = current * 10 + int(float(w))
96
- else:
97
- break
98
- i += 1
99
-
100
- total += current
101
- if decimal_mode:
102
- dec = "".join(decimal_digits) if decimal_digits else "0"
103
- return f"{total}.{dec}"
104
- return str(total)
105
-
106
- # -------------------------
107
- # main parser: turns spoken text -> valid python expression string
108
- # -------------------------
109
- def parse_transcript_to_expr(text):
110
  text = text.lower()
111
- # normalize some multiword phrases
112
- text = text.replace("to the power of", " power ")
113
- text = text.replace("to the power", " power ")
114
- text = text.replace("power of", " power ")
115
- text = text.replace("raised to the power of", " power ")
116
- text = text.replace("square root of", " sqrt ")
117
- text = text.replace("reciprocal of", " reciprocal ")
118
- # remove filler words (we already normalized phrases above)
119
- text = re.sub(r"\b(of|the|and)\b", " ", text)
120
-
121
- # remove equal words
122
- for eq in EQUAL_WORDS:
123
- text = text.replace(eq, " ")
124
-
125
- raw = [t for t in re.split(r"\s+", text) if t]
126
-
127
- parts = []
128
- num_buf = []
129
- i = 0
130
- while i < len(raw):
131
- w = raw[i]
132
-
133
- # ignore filler tokens
134
- if w in IGNORES:
135
- i += 1
136
- continue
137
-
138
- # parentheses words
139
- if w in ("open","open_bracket","open-bracket") and i+1 < len(raw) and raw[i+1] in ("bracket","parenthesis"):
140
- if num_buf:
141
- parts.append(number_words_to_str(num_buf)); num_buf=[]
142
- parts.append("("); i += 2; continue
143
- if w in ("close","close_bracket","close-bracket") and i+1 < len(raw) and raw[i+1] in ("bracket","parenthesis"):
144
- if num_buf:
145
- parts.append(number_words_to_str(num_buf)); num_buf=[]
146
- parts.append(")"); i += 2; continue
147
- if w in ("open","open bracket","open parenthesis"):
148
- if num_buf:
149
- parts.append(number_words_to_str(num_buf)); num_buf=[]
150
- parts.append("("); i += 1; continue
151
- if w in ("close","close bracket","close parenthesis"):
152
- if num_buf:
153
- parts.append(number_words_to_str(num_buf)); num_buf=[]
154
- parts.append(")"); i += 1; continue
155
-
156
- # functions (sin, cos, tan, log, ln, sqrt)
157
- if w in FUNCTION_WORDS:
158
- if num_buf:
159
- parts.append(number_words_to_str(num_buf)); num_buf=[]
160
- func = FUNCTION_WORDS[w] # e.g. 'sin(' or 'sqrt('
161
- # attempt to immediately capture numeric argument
162
- j = i + 1
163
- arg_buf = []
164
- while j < len(raw) and (raw[j] in SIMPLE or raw[j] in SCALE or re.fullmatch(r"\d+(\.\d+)?", raw[j]) or raw[j] in ("point","dot","-")):
165
- arg_buf.append(raw[j]); j += 1
166
- if arg_buf:
167
- arg = number_words_to_str(arg_buf)
168
- parts.append(f"{func}{arg})")
169
- i = j
170
- continue
171
- else:
172
- # append function start, we'll let a following '(' or tokens fill it
173
- parts.append(func)
174
- i += 1
175
- continue
176
-
177
- # reciprocal handling
178
- if w == "reciprocal":
179
- if num_buf:
180
- parts.append(number_words_to_str(num_buf)); num_buf=[]
181
- # capture next numeric group
182
- j = i + 1
183
- arg_buf = []
184
- while j < len(raw) and (raw[j] in SIMPLE or raw[j] in SCALE or re.fullmatch(r"\d+(\.\d+)?", raw[j]) or raw[j] in ("point","dot")):
185
- arg_buf.append(raw[j]); j += 1
186
- if arg_buf:
187
- arg = number_words_to_str(arg_buf)
188
- parts.append(f"(1/({arg}))")
189
- i = j
190
- continue
191
- else:
192
- i += 1
193
- continue
194
-
195
- # power keywords -> '**'
196
- if w in ("power","^","**","to"):
197
- if num_buf:
198
- parts.append(number_words_to_str(num_buf)); num_buf=[]
199
- parts.append("**")
200
- i += 1
201
- continue
202
-
203
- # suffixes that apply to previous number
204
- if w in SUFFIXES:
205
- if num_buf:
206
- parts.append(number_words_to_str(num_buf)); num_buf=[]
207
- parts.append(SUFFIXES[w])
208
- i += 1
209
- continue
210
-
211
- # percent (treated as /100 suffix)
212
- if w in ("percent","percentage","%"):
213
- if num_buf:
214
- parts.append(number_words_to_str(num_buf)); num_buf=[]
215
- parts.append("/100")
216
- i += 1
217
- continue
218
-
219
- # operators
220
- if w in OPERATORS:
221
- if num_buf:
222
- parts.append(number_words_to_str(num_buf)); num_buf=[]
223
- parts.append(OPERATORS[w])
224
- i += 1
225
- continue
226
-
227
- # decimal inside number
228
- if w in ("point","dot"):
229
- num_buf.append(w); i += 1; continue
230
-
231
- # digits
232
- if re.fullmatch(r"\d+(\.\d+)?", w):
233
- num_buf.append(w); i += 1; continue
234
-
235
- # number words
236
- if w in SIMPLE or w in SCALE:
237
- num_buf.append(w); i += 1; continue
238
-
239
- # unknown token => flush numbers and skip
240
- if num_buf:
241
- parts.append(number_words_to_str(num_buf)); num_buf=[]
242
- i += 1
243
-
244
- # flush remaining number buffer
245
- if num_buf:
246
- parts.append(number_words_to_str(num_buf)); num_buf=[]
247
-
248
- # join parts into expression string
249
- expr = "".join(parts) # no spaces needed: e.g. '10+2' or '10**2'
250
- # small cleanup: multiple operators collapse
251
- expr = re.sub(r"\s+", "", expr)
252
- expr = re.sub(r"\+\++", "+", expr)
253
- expr = re.sub(r"\-\-+", "-", expr)
254
- return expr
255
-
256
- # -------------------------
257
- # safe-ish evaluation
258
- # -------------------------
259
- def evaluate_expression(expr):
260
- allowed = {
261
- "sin": math.sin,
262
- "cos": math.cos,
263
- "tan": math.tan,
264
- "log": lambda x: math.log10(x),
265
- "ln": lambda x: math.log(x),
266
- "sqrt": math.sqrt,
267
- "factorial": math.factorial
268
  }
269
 
270
- if not expr or expr.strip() == "":
271
- raise ValueError("Empty expression")
272
-
273
- # convert 'n!' to factorial(n)
274
- expr2 = re.sub(r"(\d+(\.\d+)?|\([^\)]+\))\!", r"factorial(\1)", expr)
275
- # replace stray % with /100
276
- expr2 = expr2.replace("%", "/100")
277
-
278
- # sanitize allowed chars
279
- if not re.fullmatch(r"[0-9a-zA-Z_\+\-\*\/\.\(\),%!]+", expr2):
280
- raise ValueError("Invalid characters in expression")
281
-
282
- # evaluate with allowed functions only
283
- try:
284
- return eval(expr2, {"__builtins__": None}, allowed)
285
- except Exception as e:
286
- raise
287
-
288
- # -------------------------
289
- # utilities
290
- # -------------------------
291
- def df_to_download_link(df, name="history.csv"):
292
- csv = df.to_csv(index=False).encode()
293
- b64 = base64.b64encode(csv).decode()
294
- return f'<a href="data:file/csv;base64,{b64}" download="{name}">Download CSV</a>'
295
-
296
- # -------------------------
297
- # Streamlit UI
298
- # -------------------------
299
- st.set_page_config(page_title="Voice Scientific Calculator", layout="wide")
300
- st.title("🎙️ Voice Scientific Calculator")
301
- st.write("Speak natural math expressions and say 'equal' (or 'equals') to evaluate. Examples: 'ten plus two', '10 to the power of 3', 'square root of nine'.")
302
-
303
- if "history" not in st.session_state:
304
- st.session_state.history = []
305
-
306
- left, right = st.columns([2,1])
307
-
308
- with right:
309
- st.subheader("Settings")
310
- use_mic = st.checkbox("Use Microphone (local)", value=True)
311
- if st.button("Clear History"):
312
- st.session_state.history = []
313
- st.success("History cleared")
314
- if st.button("Download History CSV"):
315
- if st.session_state.history:
316
- df = pd.DataFrame(st.session_state.history)
317
- st.markdown(df_to_download_link(df), unsafe_allow_html=True)
318
- else:
319
- st.info("No history yet")
320
-
321
- st.subheader("History")
322
- if st.session_state.history:
323
- for it in reversed(st.session_state.history[-30:]):
324
- st.markdown(f"**{it['time']}** \n• `{it['transcript']}` → `{it['expression']}` = **{it['result']}**")
325
- else:
326
- st.info("History empty")
327
-
328
- with left:
329
- st.subheader("Transcript")
330
- transcript_box = st.empty()
331
- st.subheader("Expression")
332
- expr_box = st.empty()
333
- st.subheader("Result")
334
- result_box = st.empty()
335
-
336
- start = st.button("🎤 Start Listening")
337
- if start:
338
- if not use_mic:
339
- st.warning("Enable local microphone in Settings.")
340
- else:
341
- r = sr.Recognizer()
342
- full_text = ""
343
- status = st.empty()
344
- status.info("Listening... say 'equal' to finish")
345
-
346
- try:
347
- with sr.Microphone() as mic:
348
- r.adjust_for_ambient_noise(mic, duration=0.6)
349
- while True:
350
- try:
351
- audio = r.listen(mic, timeout=4, phrase_time_limit=6)
352
- except sr.WaitTimeoutError:
353
- transcript_box.write("...")
354
- continue
355
 
356
- try:
357
- chunk = r.recognize_google(audio).lower()
358
- except sr.UnknownValueError:
359
- continue
360
- except sr.RequestError as e:
361
- status.error(f"Speech API error: {e}")
362
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
 
364
- full_text += " " + chunk
365
- transcript_box.write(full_text.strip())
366
 
367
- if any(eq in chunk for eq in EQUAL_WORDS):
368
- status.success("Equal detected — parsing...")
369
- break
 
 
 
370
 
371
- expr = parse_transcript_to_expr(full_text)
372
- expr_box.code(expr)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
 
374
  try:
375
- val = evaluate_expression(expr)
376
- result_box.success(val)
377
- st.session_state.history.append({
378
- "time": time.strftime("%Y-%m-%d %H:%M:%S"),
379
- "transcript": full_text.strip(),
380
- "expression": expr,
381
- "result": str(val)
382
- })
383
- except Exception as e:
384
- result_box.error(f"Evaluation error: {e}")
385
-
386
- except Exception as e:
387
- st.error(f"Microphone error: {e}")
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import speech_recognition as sr
3
+ import numpy as np
4
+ from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
5
+ import queue
6
  import math
7
  import re
8
+
9
+ # -----------------------------------------
10
+ # Expression Fixing / Parsing
11
+ # -----------------------------------------
12
+ def spoken_to_expression(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  text = text.lower()
14
+
15
+ replacements = {
16
+ r"plus": "+",
17
+ r"minus": "-",
18
+ r"(into|times|multiply|multiplied by)": "*",
19
+ r"(divided by|divide)": "/",
20
+ r"(to the power of|power of|power)": "**",
21
+ r"square root of": "math.sqrt",
22
+ r"root of": "math.sqrt",
23
+ r"sin": "math.sin",
24
+ r"cos": "math.cos",
25
+ r"tan": "math.tan",
26
+ r"log": "math.log10",
27
+ r"ln": "math.log",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  }
29
 
30
+ for pattern, repl in replacements.items():
31
+ text = re.sub(pattern, repl, text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ text = re.sub(r"point", ".", text)
34
+
35
+ text = re.sub(r"[a-zA-Z]+", "", text)
36
+
37
+ return text.strip()
38
+
39
+
40
+ # -----------------------------------------
41
+ # WebRTC Audio Processor
42
+ # -----------------------------------------
43
+ class AudioProcessor(AudioProcessorBase):
44
+ def __init__(self):
45
+ self.q = queue.Queue()
46
+
47
+ def recv_audio(self, frame):
48
+ audio = frame.to_ndarray().flatten().astype(np.float32)
49
+ self.q.put(audio)
50
+ return frame
51
+
52
+
53
+ # -----------------------------------------
54
+ # UI DESIGN
55
+ # -----------------------------------------
56
+ st.set_page_config(page_title="Voice Calculator", layout="wide")
57
+
58
+ st.markdown("""
59
+ <h1 style='text-align:center;color:white;'>🎙️ Voice Scientific Calculator</h1>
60
+ """, unsafe_allow_html=True)
61
+
62
+ st.markdown("""
63
+ <div style="background:#111;padding:20px;border-radius:15px;color:white;">
64
+ Speak expressions like:
65
+ - 10 plus 20
66
+ - 10 to the power of 2
67
+ - square root of 81
68
+ - sin 45
69
+ - ... then say **equal**
70
+ </div>
71
+ """, unsafe_allow_html=True)
72
+
73
+
74
+ # -----------------------------------------
75
+ # WebRTC
76
+ # -----------------------------------------
77
 
78
+ st.subheader("🎤 Voice Input")
 
79
 
80
+ webrtc_ctx = webrtc_streamer(
81
+ key="voice",
82
+ mode=WebRtcMode.SENDONLY,
83
+ audio_processor_factory=AudioProcessor,
84
+ media_stream_constraints={"audio": True, "video": False},
85
+ )
86
 
87
+ recognizer = sr.Recognizer()
88
+
89
+ transcript_box = st.empty()
90
+ expr_box = st.empty()
91
+ result_box = st.empty()
92
+
93
+ buffer_audio = []
94
+
95
+ # -----------------------------------------
96
+ # Audio Capture + Recognition
97
+ # -----------------------------------------
98
+
99
+ def convert_to_wav(float_audio):
100
+ import io
101
+ import wave
102
+
103
+ bio = io.BytesIO()
104
+ wav = wave.open(bio, "wb")
105
+ wav.setnchannels(1)
106
+ wav.setsampwidth(2)
107
+ wav.setframerate(16000)
108
+ wav.writeframes((float_audio * 32767).astype(np.int16).tobytes())
109
+ wav.close()
110
+ return bio.getvalue()
111
+
112
+
113
+ if webrtc_ctx and webrtc_ctx.state.playing:
114
+ processor = webrtc_ctx.audio_processor
115
+
116
+ if processor:
117
+ try:
118
+ audio_chunk = processor.q.get(timeout=1)
119
+ buffer_audio.extend(audio_chunk)
120
+
121
+ audio_data = np.array(buffer_audio, dtype=np.float32)
122
+
123
+ if len(audio_data) > 16000 * 4:
124
+ wav_bytes = convert_to_wav(audio_data)
125
+ audio_source = sr.AudioData(wav_bytes, 16000, 2)
126
 
127
  try:
128
+ text = recognizer.recognize_google(audio_source)
129
+ transcript_box.markdown(f"### 🎧 Transcript\n{text}")
130
+
131
+ if "equal" in text.lower():
132
+ expr = spoken_to_expression(text)
133
+ expr_box.markdown(f"### 🧮 Expression\n`{expr}`")
134
+
135
+ try:
136
+ result = eval(expr)
137
+ result_box.markdown(f"### 📊 Result\n**{result}**")
138
+ except Exception as e:
139
+ result_box.markdown(f"### 📊 Result\n❌ Error: {e}")
140
+
141
+ except Exception:
142
+ pass
143
+
144
+ except queue.Empty:
145
+ pass