Spaces:

umaradnaan
/

Image_Caption_Generator

Sleeping

App Files Files Community

umaradnaan commited on Nov 26, 2025

Commit

bfc42c8

verified ·

1 Parent(s): 1623ce9

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -290

app.py CHANGED Viewed

@@ -1,314 +1,135 @@
-# app.py
 import streamlit as st
-# -----------------------------
-# Fix/session-state initialization for streamlit-webrtc internal callbacks
-# (prevents: st.session_state has no attribute "_components_callbacks")
-# -----------------------------
-if "_components_callbacks" not in st.session_state:
-    st.session_state["_components_callbacks"] = {}
-# streamlit-webrtc also expects an ordered list mapping; initialize conservatively
-if "_component_value" not in st.session_state:
-    st.session_state["_component_value"] = {}
-# now import the rest
-import math
-import re
-import time
-import queue
 import numpy as np
 import speech_recognition as sr
-from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
-# -----------------------------
-# Helper: Spoken → expression parser
-# -----------------------------
-SIMPLE = {
-    "zero":0,"one":1,"two":2,"three":3,"four":4,"five":5,
-    "six":6,"seven":7,"eight":8,"nine":9,"ten":10,
-    "eleven":11,"twelve":12,"thirteen":13,"fourteen":14,
-    "fifteen":15,"sixteen":16,"seventeen":17,"eighteen":18,
-    "nineteen":19,"twenty":20,"thirty":30,"forty":40,"fifty":50,
-    "sixty":60,"seventy":70,"eighty":80,"ninety":90
-}
-SCALE = {"hundred":100, "thousand":1000, "million":1000000}
-OPERATORS = {
-    "plus":"+","add":"+","added":"+",
-    "minus":"-","subtract":"-","less":"-",
-    "into":"*","times":"*","multiply":"*","x":"*",
-    "divide":"/","divided":"/","over":"/","by":"/"
-}
-FUNCTION_WORDS = {
-    "sin":"math.sin(", "sine":"math.sin(",
-    "cos":"math.cos(", "cosine":"math.cos(",
-    "tan":"math.tan(", "tangent":"math.tan(",
-    "log":"math.log10(", "ln":"math.log(",
-    "sqrt":"math.sqrt(", "square root":"math.sqrt("
 }
-SUFFIXES = {"square":"**2","squared":"**2","cube":"**3","cubed":"**3","factorial":"!"}
-EQUAL_WORDS = {"equal","equals","equal to","="}
-IGNORES = {"of","the","and","to","a","please"}
-def number_words_to_str(tokens):
-    if not tokens:
-        return ""
-    # numeric tokens case
-    if all(re.fullmatch(r"\d+(\.\d+)?", t) for t in tokens):
-        return "".join(tokens)
-    total=0; current=0; i=0; decimal_mode=False; decimal_digits=[]
-    while i < len(tokens):
-        t = tokens[i]
-        if t in ("point","dot"):
-            decimal_mode=True; i+=1
-            while i < len(tokens):
-                d = tokens[i]
-                if d in SIMPLE:
-                    decimal_digits.append(str(SIMPLE[d]))
-                elif re.fullmatch(r"\d", d):
-                    decimal_digits.append(d)
-                else:
-                    break
-                i += 1
-            break
-        if t in SIMPLE:
-            current += SIMPLE[t]
-        elif t == "hundred":
-            if current == 0: current = 1
-            current *= 100
-        elif t in ("thousand","million"):
-            scale_val = SCALE[t]
-            if current == 0: current = 1
-            total += current * scale_val
-            current = 0
-        elif re.fullmatch(r"\d+(\.\d+)?", t):
-            current = current * 10 + int(float(t))
-        else:
-            break
-        i += 1
-    total += current
-    if decimal_mode:
-        return f"{total}.{''.join(decimal_digits) if decimal_digits else '0'}"
-    return str(total)
-def parse_spoken_to_expr(text: str) -> str:
-    text = text.lower()
-    # remove filler words early, but keep needed phrases
-    text = text.replace("to the power of", " power ")
-    text = text.replace("to the power", " power ")
-    text = text.replace("power of", " power ")
-    text = text.replace("raised to the power of", " power ")
-    text = text.replace("square root of", " sqrt ")
-    text = re.sub(r"\b(of|the|and|please|a)\b", " ", text)
-    for eq in EQUAL_WORDS:
-        text = text.replace(eq, " ")
-    raw = [t for t in re.split(r"\s+", text) if t]
-    parts=[]; num_buf=[]; i=0
-    while i < len(raw):
-        w = raw[i]
-        if w in OPERATORS:
-            if num_buf:
-                parts.append(number_words_to_str(num_buf)); num_buf=[]
-            parts.append(OPERATORS[w]); i+=1; continue
-        if w in SIMPLE or w in SCALE or re.fullmatch(r"\d+(\.\d+)?", w) or w in ("point","dot"):
-            num_buf.append(w); i+=1; continue
-        if w in FUNCTION_WORDS:
-            if num_buf:
-                parts.append(number_words_to_str(num_buf)); num_buf=[]
-            func = FUNCTION_WORDS[w]
-            # try immediate number after function
-            j=i+1; arg_buf=[]
-            while j < len(raw) and (raw[j] in SIMPLE or raw[j] in SCALE or re.fullmatch(r"\d+(\.\d+)?", raw[j]) or raw[j] in ("point","dot")):
-                arg_buf.append(raw[j]); j+=1
-            if arg_buf:
-                parts.append(f"{func}{number_words_to_str(arg_buf)})"); i=j; continue
-            else:
-                parts.append(func); i+=1; continue
-        if w == "reciprocal":
-            if num_buf:
-                parts.append(number_words_to_str(num_buf)); num_buf=[]
-            j=i+1; arg_buf=[]
-            while j < len(raw) and (raw[j] in SIMPLE or raw[j] in SCALE or re.fullmatch(r"\d+(\.\d+)?", raw[j]) or raw[j] in ("point","dot")):
-                arg_buf.append(raw[j]); j+=1
-            if arg_buf:
-                parts.append(f"(1/({number_words_to_str(arg_buf)}))"); i=j; continue
-            else:
-                i+=1; continue
-        if w in ("power","^","**","to"):
-            if num_buf:
-                parts.append(number_words_to_str(num_buf)); num_buf=[]
-            parts.append("**"); i+=1; continue
-        if w in SUFFIXES:
-            if num_buf:
-                parts.append(number_words_to_str(num_buf)); num_buf=[]
-            parts.append(SUFFIXES[w]); i+=1; continue
-        if w in ("percent","percentage","%"):
-            if num_buf:
-                parts.append(number_words_to_str(num_buf)); num_buf=[]
-            parts.append("/100"); i+=1; continue
-        # unknown token flush number buffer then skip
-        if num_buf:
-            parts.append(number_words_to_str(num_buf)); num_buf=[]
-        i += 1
-    if num_buf:
-        parts.append(number_words_to_str(num_buf)); num_buf=[]
-    expr = "".join(parts)
-    expr = re.sub(r"\s+", "", expr).strip()
-    return expr
-# -----------------------------
-# Safe evaluator mapping
-# -----------------------------
-ALLOWED = {
-    "sin": math.sin, "cos": math.cos, "tan": math.tan,
-    "log": lambda x: math.log10(x), "ln": lambda x: math.log(x),
-    "sqrt": math.sqrt, "factorial": math.factorial
-}
-def safe_eval(expr: str):
-    if not expr or expr.strip() == "":
-        raise ValueError("Empty expression")
-    # convert n! -> factorial(n)
-    expr2 = re.sub(r"(\d+(\.\d+)?|\([^\)]+\))\!", r"factorial(\1)", expr)
-    expr2 = expr2.replace("%", "/100")
-    if not re.fullmatch(r"[0-9a-zA-Z_\+\-\*\/\.\(\),%!]+", expr2):
-        raise ValueError("Invalid characters")
-    return eval(expr2, {"__builtins__": None}, ALLOWED)
-# -----------------------------
-# Audio processor for streamlit-webrtc
-# -----------------------------
-class AudioProcessor(AudioProcessorBase):
-    def __init__(self):
-        self.q = queue.Queue()
-    def recv_audio(self, frame):
-        # frame.to_ndarray() returns shape (n_channels, n_samples)
-        arr = frame.to_ndarray()
-        # convert to mono if needed
-        if arr.ndim > 1:
-            arr = np.mean(arr, axis=0)
-        self.q.put(arr.astype(np.float32))
-        return frame
-# -----------------------------
-# Streamlit UI + webrtc
-# -----------------------------
-st.set_page_config(page_title="Voice Scientific Calculator", layout="wide")
-st.markdown("<h2>🎙️ Voice Scientific Calculator — Continuous (auto-transcribe)</h2>", unsafe_allow_html=True)
-st.markdown("Say math naturally and say **equal / equals / equal to** to evaluate. This runs continuously (short chunks are transcribed).")
-if "history" not in st.session_state:
-    st.session_state.history = []
-# Start the webrtc streamer (SENDONLY: we only send audio from browser to python)
-webrtc_ctx = webrtc_streamer(
-    key="voice-calculator",
-    mode=WebRtcMode.SENDONLY,
-    audio_processor_factory=AudioProcessor,
-    media_stream_constraints={"audio": True, "video": False},
-    async_processing=True,
-    desired_playing_state=True
-)
-transcript_box = st.empty()
-expr_box = st.empty()
-result_box = st.empty()
-status_box = st.empty()
 recognizer = sr.Recognizer()
-# Buffer to accumulate audio chunks (per session)
-if "audio_buffer" not in st.session_state:
-    st.session_state.audio_buffer = []
-# Main loop: poll audio queue and try to transcribe short chunks
-if webrtc_ctx and webrtc_ctx.state.playing:
-    status_box.info("🎧 Listening... (auto-transcribe). Say 'equal' to compute.")
-    processor = webrtc_ctx.audio_processor
-    if processor:
-        try:
-            # drain up to some frames from processor.q
-            collected = []
-            while True:
-                # non-blocking get
-                arr = processor.q.get_nowait()
-                collected.append(arr)
-                # stop after collecting enough
-                if sum(a.size for a in collected) > 16000 * 2:  # ~2 seconds at 16k
-                    break
-        except queue.Empty:
-            collected = collected  # may be empty or partial
-        if collected:
-            # concatenate and convert to 16k mono int16 PCM bytes for SpeechRecognition
-            audio_float = np.concatenate(collected)
-            # normalize to int16
-            audio_int16 = np.int16(np.clip(audio_float * 32767, -32768, 32767))
-            import io, wave
-            bio = io.BytesIO()
-            wf = wave.open(bio, 'wb')
-            wf.setnchannels(1)
-            wf.setsampwidth(2)
-            wf.setframerate(16000)
-            wf.writeframes(audio_int16.tobytes())
-            wf.close()
-            wav_bytes = bio.getvalue()
-            # send to SpeechRecognition
-            audio_data = sr.AudioData(wav_bytes, 16000, 2)
-            try:
-                text = recognizer.recognize_google(audio_data)
-            except sr.UnknownValueError:
-                text = ""
-            except sr.RequestError as e:
-                status_box.error(f"Speech API error: {e}")
-                text = ""
-            if text:
-                # append to rolling transcript
-                prev = st.session_state.get("rolling_transcript", "")
-                prev = (prev + " " + text).strip()
-                st.session_state.rolling_transcript = prev
-                transcript_box.markdown(f"**Transcript:** {prev}")
-                # if user said 'equal' in the newest chunk, evaluate
-                if any(eq in text.lower() for eq in EQUAL_WORDS):
-                    # parse the whole rolling transcript
-                    expr = parse_spoken_to_expr(prev)
-                    expr_box.markdown(f"**Expression:** `{expr}`")
-                    try:
-                        value = safe_eval(expr)
-                        result_box.success(f"Result: {value}")
-                        # store history
-                        st.session_state.history.append({
-                            "time": time.strftime("%Y-%m-%d %H:%M:%S"),
-                            "transcript": prev,
-                            "expression": expr,
-                            "result": str(value)
-                        })
-                        # reset rolling transcript
-                        st.session_state.rolling_transcript = ""
-                    except Exception as e:
-                        result_box.error(f"Eval error: {e}")
-                        st.session_state.rolling_transcript = ""
-# Controls / History UI
-st.markdown("---")
-c1, c2 = st.columns([1,1])
-with c1:
-    if st.button("Clear History"):
-        st.session_state.history = []
-        st.success("History cleared")
-with c2:
-    if st.session_state.history:
-        import pandas as pd, base64, io
-        df = pd.DataFrame(st.session_state.history)
-        csv = df.to_csv(index=False).encode()
-        st.download_button("Download history CSV", csv, file_name="history.csv")
-st.markdown("### History")
-if st.session_state.history:
-    for it in reversed(st.session_state.history[-40:]):
-        st.markdown(f"**{it['time']}** — `{it['transcript']}` → `{it['expression']}` = **{it['result']}**")
-else:
-    st.info("No history yet.")

 import streamlit as st
 import numpy as np
 import speech_recognition as sr
+import base64
+import re
+import math
+import io
+import time
+import pandas as pd
+st.set_page_config(page_title="Voice Calculator", layout="wide")
+# ---------------- UI ----------------
+st.markdown("<h2>🎙️ Voice Scientific Calculator (HF Spaces Compatible)</h2>", unsafe_allow_html=True)
+st.markdown("Record voice → auto convert → evaluate when you say **equal**")
+# ---------------- Recorder UI (JS) ----------------
+record_js = """
+<script>
+let chunks = [];
+let mediaRecorder;
+function startRecording() {
+    navigator.mediaDevices.getUserMedia({ audio: true })
+    .then(stream => {
+        mediaRecorder = new MediaRecorder(stream);
+        mediaRecorder.start();
+        chunks = [];
+        mediaRecorder.ondataavailable = e => { chunks.push(e.data); };
+    });
 }
+function stopRecording() {
+    mediaRecorder.stop();
+    mediaRecorder.onstop = e => {
+        let blob = new Blob(chunks, { type: 'audio/webm' });
+        let reader = new FileReader();
+        reader.readAsDataURL(blob);
+        reader.onloadend = () => {
+            let base64data = reader.result.split(',')[1];
+            const streamlitEvent = new CustomEvent("streamlit:audio_recorded", {
+                detail: { data: base64data }
+            });
+            window.parent.document.dispatchEvent(streamlitEvent);
+        };
+    };
+}
+</script>
+"""
+st.markdown(record_js, unsafe_allow_html=True)
+st.button("🎙 Start Recording", on_click=lambda: st.session_state.update({"record": "start"}))
+st.button("⏹ Stop & Process", on_click=lambda: st.session_state.update({"record": "stop"}))
+# ---------------------------------------
+# LISTEN FOR JS → PYTHON AUDIO TRANSFER
+# ---------------------------------------
+audio_slot = st.empty()
+if "audio" not in st.session_state:
+    st.session_state.audio = None
+def on_audio_received():
+    pass
+# Inject listener
+audio_script = """
+<script>
+window.parent.document.addEventListener("streamlit:audio_recorded", (e) => {
+    const data = e.detail.data;
+    window.parent.postMessage({isStreamlitMessage: true, type: "streamlit:setComponentValue", value: data}, "*");
+});
+</script>
+"""
+audio_slot.markdown(audio_script, unsafe_allow_html=True)
+audio_data = st.experimental_get_query_params().get("componentValue", [None])[0]
+if audio_data:
+    st.session_state.audio = audio_data
+# ---------------- PROCESS AUDIO ----------------
 recognizer = sr.Recognizer()
+def decode_audio(b64):
+    raw = base64.b64decode(b64)
+    return raw
+if st.session_state.audio:
+    st.info("Processing audio…")
+    audio_bytes = decode_audio(st.session_state.audio)
+    audio = sr.AudioData(audio_bytes, 48000, 2)
+    try:
+        text = recognizer.recognize_google(audio)
+    except:
+        text = ""
+    st.success(f"Transcript: **{text}**")
+    # EXPRESSION PARSER (simple version)
+    text_lower = text.lower()
+    # Replace words
+    ops = {
+        "plus": "+",
+        "minus": "-",
+        "times": "*",
+        "into": "*",
+        "x": "*",
+        "divide": "/",
+        "divided by": "/",
+        "power": "**",
+        "to the power of": "**",
+        "square root of": "math.sqrt",
+    }
+    expr = text_lower
+    for k, v in ops.items():
+        expr = expr.replace(k, v)
+    expr = re.sub(r"[^0-9\+\-\*\/\.\(\)a-z ]", "", expr)
+    if "equal" in expr:
+        expr = expr.replace("equal", "")
+        st.write(f"Expression: `{expr}`")
+        try:
+            result = eval(expr, {"math": math, "__builtins__": {}})
+            st.success(f"Result = **{result}**")
+        except Exception as e:
+            st.error(f"Error: {e}")