Spaces:

umaradnaan
/

Image_Caption_Generator

Sleeping

App Files Files Community

umaradnaan commited on Nov 26, 2025

Commit

1623ce9

verified ·

1 Parent(s): 9903cca

Update app.py

Browse files

Files changed (1) hide show

app.py +283 -114

app.py CHANGED Viewed

@@ -1,145 +1,314 @@
 import streamlit as st
-import speech_recognition as sr
-import numpy as np
-from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
-import queue
 import math
 import re
-# -----------------------------------------
-# Expression Fixing / Parsing
-# -----------------------------------------
-def spoken_to_expression(text):
     text = text.lower()
-    replacements = {
-        r"plus": "+",
-        r"minus": "-",
-        r"(into|times|multiply|multiplied by)": "*",
-        r"(divided by|divide)": "/",
-        r"(to the power of|power of|power)": "**",
-        r"square root of": "math.sqrt",
-        r"root of": "math.sqrt",
-        r"sin": "math.sin",
-        r"cos": "math.cos",
-        r"tan": "math.tan",
-        r"log": "math.log10",
-        r"ln": "math.log",
-    }
-    for pattern, repl in replacements.items():
-        text = re.sub(pattern, repl, text)
-    text = re.sub(r"point", ".", text)
-    text = re.sub(r"[a-zA-Z]+", "", text)
-    return text.strip()
-# -----------------------------------------
-# WebRTC Audio Processor
-# -----------------------------------------
 class AudioProcessor(AudioProcessorBase):
     def __init__(self):
         self.q = queue.Queue()
     def recv_audio(self, frame):
-        audio = frame.to_ndarray().flatten().astype(np.float32)
-        self.q.put(audio)
         return frame
-# -----------------------------------------
-# UI DESIGN
-# -----------------------------------------
-st.set_page_config(page_title="Voice Calculator", layout="wide")
-st.markdown("""
-    <h1 style='text-align:center;color:white;'>🎙️ Voice Scientific Calculator</h1>
-""", unsafe_allow_html=True)
-st.markdown("""
-    <div style="background:#111;padding:20px;border-radius:15px;color:white;">
-        Speak expressions like:
-        - 10 plus 20
-        - 10 to the power of 2
-        - square root of 81
-        - sin 45
-        - ... then say **equal**
-    </div>
-""", unsafe_allow_html=True)
-# -----------------------------------------
-# WebRTC
-# -----------------------------------------
-st.subheader("🎤 Voice Input")
 webrtc_ctx = webrtc_streamer(
-    key="voice",
     mode=WebRtcMode.SENDONLY,
     audio_processor_factory=AudioProcessor,
     media_stream_constraints={"audio": True, "video": False},
 )
-recognizer = sr.Recognizer()
 transcript_box = st.empty()
 expr_box = st.empty()
 result_box = st.empty()
-buffer_audio = []
-# -----------------------------------------
-# Audio Capture + Recognition
-# -----------------------------------------
-def convert_to_wav(float_audio):
-    import io
-    import wave
-    bio = io.BytesIO()
-    wav = wave.open(bio, "wb")
-    wav.setnchannels(1)
-    wav.setsampwidth(2)
-    wav.setframerate(16000)
-    wav.writeframes((float_audio * 32767).astype(np.int16).tobytes())
-    wav.close()
-    return bio.getvalue()
 if webrtc_ctx and webrtc_ctx.state.playing:
     processor = webrtc_ctx.audio_processor
     if processor:
         try:
-            audio_chunk = processor.q.get(timeout=1)
-            buffer_audio.extend(audio_chunk)
-            audio_data = np.array(buffer_audio, dtype=np.float32)
-            if len(audio_data) > 16000 * 4:
-                wav_bytes = convert_to_wav(audio_data)
-                audio_source = sr.AudioData(wav_bytes, 16000, 2)
-                try:
-                    text = recognizer.recognize_google(audio_source)
-                    transcript_box.markdown(f"### 🎧 Transcript\n{text}")
-                    if "equal" in text.lower():
-                        expr = spoken_to_expression(text)
-                        expr_box.markdown(f"### 🧮 Expression\n`{expr}`")
-                        try:
-                            result = eval(expr)
-                            result_box.markdown(f"### 📊 Result\n**{result}**")
-                        except Exception as e:
-                            result_box.markdown(f"### 📊 Result\n❌ Error: {e}")
-                except Exception:
-                    pass
         except queue.Empty:
-            pass

+# app.py
 import streamlit as st
+# -----------------------------
+# Fix/session-state initialization for streamlit-webrtc internal callbacks
+# (prevents: st.session_state has no attribute "_components_callbacks")
+# -----------------------------
+if "_components_callbacks" not in st.session_state:
+    st.session_state["_components_callbacks"] = {}
+# streamlit-webrtc also expects an ordered list mapping; initialize conservatively
+if "_component_value" not in st.session_state:
+    st.session_state["_component_value"] = {}
+# now import the rest
 import math
 import re
+import time
+import queue
+import numpy as np
+import speech_recognition as sr
+from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
+# -----------------------------
+# Helper: Spoken → expression parser
+# -----------------------------
+SIMPLE = {
+    "zero":0,"one":1,"two":2,"three":3,"four":4,"five":5,
+    "six":6,"seven":7,"eight":8,"nine":9,"ten":10,
+    "eleven":11,"twelve":12,"thirteen":13,"fourteen":14,
+    "fifteen":15,"sixteen":16,"seventeen":17,"eighteen":18,
+    "nineteen":19,"twenty":20,"thirty":30,"forty":40,"fifty":50,
+    "sixty":60,"seventy":70,"eighty":80,"ninety":90
+}
+SCALE = {"hundred":100, "thousand":1000, "million":1000000}
+OPERATORS = {
+    "plus":"+","add":"+","added":"+",
+    "minus":"-","subtract":"-","less":"-",
+    "into":"*","times":"*","multiply":"*","x":"*",
+    "divide":"/","divided":"/","over":"/","by":"/"
+}
+FUNCTION_WORDS = {
+    "sin":"math.sin(", "sine":"math.sin(",
+    "cos":"math.cos(", "cosine":"math.cos(",
+    "tan":"math.tan(", "tangent":"math.tan(",
+    "log":"math.log10(", "ln":"math.log(",
+    "sqrt":"math.sqrt(", "square root":"math.sqrt("
+}
+SUFFIXES = {"square":"**2","squared":"**2","cube":"**3","cubed":"**3","factorial":"!"}
+EQUAL_WORDS = {"equal","equals","equal to","="}
+IGNORES = {"of","the","and","to","a","please"}
+def number_words_to_str(tokens):
+    if not tokens:
+        return ""
+    # numeric tokens case
+    if all(re.fullmatch(r"\d+(\.\d+)?", t) for t in tokens):
+        return "".join(tokens)
+    total=0; current=0; i=0; decimal_mode=False; decimal_digits=[]
+    while i < len(tokens):
+        t = tokens[i]
+        if t in ("point","dot"):
+            decimal_mode=True; i+=1
+            while i < len(tokens):
+                d = tokens[i]
+                if d in SIMPLE:
+                    decimal_digits.append(str(SIMPLE[d]))
+                elif re.fullmatch(r"\d", d):
+                    decimal_digits.append(d)
+                else:
+                    break
+                i += 1
+            break
+        if t in SIMPLE:
+            current += SIMPLE[t]
+        elif t == "hundred":
+            if current == 0: current = 1
+            current *= 100
+        elif t in ("thousand","million"):
+            scale_val = SCALE[t]
+            if current == 0: current = 1
+            total += current * scale_val
+            current = 0
+        elif re.fullmatch(r"\d+(\.\d+)?", t):
+            current = current * 10 + int(float(t))
+        else:
+            break
+        i += 1
+    total += current
+    if decimal_mode:
+        return f"{total}.{''.join(decimal_digits) if decimal_digits else '0'}"
+    return str(total)
+def parse_spoken_to_expr(text: str) -> str:
     text = text.lower()
+    # remove filler words early, but keep needed phrases
+    text = text.replace("to the power of", " power ")
+    text = text.replace("to the power", " power ")
+    text = text.replace("power of", " power ")
+    text = text.replace("raised to the power of", " power ")
+    text = text.replace("square root of", " sqrt ")
+    text = re.sub(r"\b(of|the|and|please|a)\b", " ", text)
+    for eq in EQUAL_WORDS:
+        text = text.replace(eq, " ")
+    raw = [t for t in re.split(r"\s+", text) if t]
+    parts=[]; num_buf=[]; i=0
+    while i < len(raw):
+        w = raw[i]
+        if w in OPERATORS:
+            if num_buf:
+                parts.append(number_words_to_str(num_buf)); num_buf=[]
+            parts.append(OPERATORS[w]); i+=1; continue
+        if w in SIMPLE or w in SCALE or re.fullmatch(r"\d+(\.\d+)?", w) or w in ("point","dot"):
+            num_buf.append(w); i+=1; continue
+        if w in FUNCTION_WORDS:
+            if num_buf:
+                parts.append(number_words_to_str(num_buf)); num_buf=[]
+            func = FUNCTION_WORDS[w]
+            # try immediate number after function
+            j=i+1; arg_buf=[]
+            while j < len(raw) and (raw[j] in SIMPLE or raw[j] in SCALE or re.fullmatch(r"\d+(\.\d+)?", raw[j]) or raw[j] in ("point","dot")):
+                arg_buf.append(raw[j]); j+=1
+            if arg_buf:
+                parts.append(f"{func}{number_words_to_str(arg_buf)})"); i=j; continue
+            else:
+                parts.append(func); i+=1; continue
+        if w == "reciprocal":
+            if num_buf:
+                parts.append(number_words_to_str(num_buf)); num_buf=[]
+            j=i+1; arg_buf=[]
+            while j < len(raw) and (raw[j] in SIMPLE or raw[j] in SCALE or re.fullmatch(r"\d+(\.\d+)?", raw[j]) or raw[j] in ("point","dot")):
+                arg_buf.append(raw[j]); j+=1
+            if arg_buf:
+                parts.append(f"(1/({number_words_to_str(arg_buf)}))"); i=j; continue
+            else:
+                i+=1; continue
+        if w in ("power","^","**","to"):
+            if num_buf:
+                parts.append(number_words_to_str(num_buf)); num_buf=[]
+            parts.append("**"); i+=1; continue
+        if w in SUFFIXES:
+            if num_buf:
+                parts.append(number_words_to_str(num_buf)); num_buf=[]
+            parts.append(SUFFIXES[w]); i+=1; continue
+        if w in ("percent","percentage","%"):
+            if num_buf:
+                parts.append(number_words_to_str(num_buf)); num_buf=[]
+            parts.append("/100"); i+=1; continue
+        # unknown token flush number buffer then skip
+        if num_buf:
+            parts.append(number_words_to_str(num_buf)); num_buf=[]
+        i += 1
+    if num_buf:
+        parts.append(number_words_to_str(num_buf)); num_buf=[]
+    expr = "".join(parts)
+    expr = re.sub(r"\s+", "", expr).strip()
+    return expr
+# -----------------------------
+# Safe evaluator mapping
+# -----------------------------
+ALLOWED = {
+    "sin": math.sin, "cos": math.cos, "tan": math.tan,
+    "log": lambda x: math.log10(x), "ln": lambda x: math.log(x),
+    "sqrt": math.sqrt, "factorial": math.factorial
+}
+def safe_eval(expr: str):
+    if not expr or expr.strip() == "":
+        raise ValueError("Empty expression")
+    # convert n! -> factorial(n)
+    expr2 = re.sub(r"(\d+(\.\d+)?|\([^\)]+\))\!", r"factorial(\1)", expr)
+    expr2 = expr2.replace("%", "/100")
+    if not re.fullmatch(r"[0-9a-zA-Z_\+\-\*\/\.\(\),%!]+", expr2):
+        raise ValueError("Invalid characters")
+    return eval(expr2, {"__builtins__": None}, ALLOWED)
+# -----------------------------
+# Audio processor for streamlit-webrtc
+# -----------------------------
 class AudioProcessor(AudioProcessorBase):
     def __init__(self):
         self.q = queue.Queue()
     def recv_audio(self, frame):
+        # frame.to_ndarray() returns shape (n_channels, n_samples)
+        arr = frame.to_ndarray()
+        # convert to mono if needed
+        if arr.ndim > 1:
+            arr = np.mean(arr, axis=0)
+        self.q.put(arr.astype(np.float32))
         return frame
+# -----------------------------
+# Streamlit UI + webrtc
+# -----------------------------
+st.set_page_config(page_title="Voice Scientific Calculator", layout="wide")
+st.markdown("<h2>🎙️ Voice Scientific Calculator — Continuous (auto-transcribe)</h2>", unsafe_allow_html=True)
+st.markdown("Say math naturally and say **equal / equals / equal to** to evaluate. This runs continuously (short chunks are transcribed).")
+if "history" not in st.session_state:
+    st.session_state.history = []
+# Start the webrtc streamer (SENDONLY: we only send audio from browser to python)
 webrtc_ctx = webrtc_streamer(
+    key="voice-calculator",
     mode=WebRtcMode.SENDONLY,
     audio_processor_factory=AudioProcessor,
     media_stream_constraints={"audio": True, "video": False},
+    async_processing=True,
+    desired_playing_state=True
 )
 transcript_box = st.empty()
 expr_box = st.empty()
 result_box = st.empty()
+status_box = st.empty()
+recognizer = sr.Recognizer()
+# Buffer to accumulate audio chunks (per session)
+if "audio_buffer" not in st.session_state:
+    st.session_state.audio_buffer = []
+# Main loop: poll audio queue and try to transcribe short chunks
 if webrtc_ctx and webrtc_ctx.state.playing:
+    status_box.info("🎧 Listening... (auto-transcribe). Say 'equal' to compute.")
     processor = webrtc_ctx.audio_processor
     if processor:
         try:
+            # drain up to some frames from processor.q
+            collected = []
+            while True:
+                # non-blocking get
+                arr = processor.q.get_nowait()
+                collected.append(arr)
+                # stop after collecting enough
+                if sum(a.size for a in collected) > 16000 * 2:  # ~2 seconds at 16k
+                    break
         except queue.Empty:
+            collected = collected  # may be empty or partial
+        if collected:
+            # concatenate and convert to 16k mono int16 PCM bytes for SpeechRecognition
+            audio_float = np.concatenate(collected)
+            # normalize to int16
+            audio_int16 = np.int16(np.clip(audio_float * 32767, -32768, 32767))
+            import io, wave
+            bio = io.BytesIO()
+            wf = wave.open(bio, 'wb')
+            wf.setnchannels(1)
+            wf.setsampwidth(2)
+            wf.setframerate(16000)
+            wf.writeframes(audio_int16.tobytes())
+            wf.close()
+            wav_bytes = bio.getvalue()
+            # send to SpeechRecognition
+            audio_data = sr.AudioData(wav_bytes, 16000, 2)
+            try:
+                text = recognizer.recognize_google(audio_data)
+            except sr.UnknownValueError:
+                text = ""
+            except sr.RequestError as e:
+                status_box.error(f"Speech API error: {e}")
+                text = ""
+            if text:
+                # append to rolling transcript
+                prev = st.session_state.get("rolling_transcript", "")
+                prev = (prev + " " + text).strip()
+                st.session_state.rolling_transcript = prev
+                transcript_box.markdown(f"**Transcript:** {prev}")
+                # if user said 'equal' in the newest chunk, evaluate
+                if any(eq in text.lower() for eq in EQUAL_WORDS):
+                    # parse the whole rolling transcript
+                    expr = parse_spoken_to_expr(prev)
+                    expr_box.markdown(f"**Expression:** `{expr}`")
+                    try:
+                        value = safe_eval(expr)
+                        result_box.success(f"Result: {value}")
+                        # store history
+                        st.session_state.history.append({
+                            "time": time.strftime("%Y-%m-%d %H:%M:%S"),
+                            "transcript": prev,
+                            "expression": expr,
+                            "result": str(value)
+                        })
+                        # reset rolling transcript
+                        st.session_state.rolling_transcript = ""
+                    except Exception as e:
+                        result_box.error(f"Eval error: {e}")
+                        st.session_state.rolling_transcript = ""
+# Controls / History UI
+st.markdown("---")
+c1, c2 = st.columns([1,1])
+with c1:
+    if st.button("Clear History"):
+        st.session_state.history = []
+        st.success("History cleared")
+with c2:
+    if st.session_state.history:
+        import pandas as pd, base64, io
+        df = pd.DataFrame(st.session_state.history)
+        csv = df.to_csv(index=False).encode()
+        st.download_button("Download history CSV", csv, file_name="history.csv")
+st.markdown("### History")
+if st.session_state.history:
+    for it in reversed(st.session_state.history[-40:]):
+        st.markdown(f"**{it['time']}** — `{it['transcript']}` → `{it['expression']}` = **{it['result']}**")
+else:
+    st.info("No history yet.")