Spaces:

LovnishVerma
/

Hand2Voice

Build error

App Files Files Community

LovnishVerma commited on Dec 24, 2025

Commit

adf170a

verified ·

1 Parent(s): ca1c87c

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -51

app.py CHANGED Viewed

@@ -6,12 +6,12 @@ It utilizes MediaPipe for computer vision and Google TTS for audio synthesis.
 Key Features:
 - Rotation-invariant gesture recognition (Euclidean geometry).
-- Zero-layout-shift audio playback (Pure JavaScript implementation).
 - Dual input modes: Camera and File Upload.
 Author: Arshbir Singh
 Date: 2025-12-24
-Version: 2.3.0 (Sidebar Fix)
 """
 import streamlit as st
@@ -20,21 +20,20 @@ from PIL import Image
 import os
 import base64
 import streamlit.components.v1 as components
-from typing import Optional, Tuple
 # --- Local Modules ---
-# Ensure these files exist in the same directory
 from gesture_classifier import classify_gesture
 from tts import speak
 # --- Constants & Configuration ---
 PAGE_TITLE = "Hand2Voice"
 PAGE_ICON = "🤟"
 LAYOUT_MODE = "wide"
 LOGO_PATH = "NIELIT-LOGO.png"
-# Custom CSS to stabilize the layout.
 STABILIZATION_CSS = """
     <style>
         /* 1. Prevent Image Collapse */
@@ -44,40 +43,49 @@ STABILIZATION_CSS = """
             align-items: center;
             justify-content: center;
         }
         /* 2. Disable Alert Animations */
         .stAlert {
             transition: none !important;
         }
-        /* 3. Global Reset for Iframe Containers to prevent ghost margins */
-        iframe {
-            display: block;
         }
     </style>
 """
 def inject_custom_css() -> None:
-    """Injects CSS to stabilize the UI and prevent layout shifts."""
     st.markdown(STABILIZATION_CSS, unsafe_allow_html=True)
 def play_audio_js(file_path: str) -> None:
     """
-    Plays an audio file using a hidden JavaScript trigger.
     """
     try:
         with open(file_path, "rb") as f:
             data = f.read()
             b64 = base64.b64encode(data).decode()
-            # JavaScript to create an Audio object in memory and play it immediately
             js_code = f"""
                 <script>
                     var audio = new Audio("data:audio/mp3;base64,{b64}");
                     audio.play();
                 </script>
             """
-            # Execute JS in a hidden 0x0 pixel iframe
             components.html(js_code, height=0, width=0)
     except FileNotFoundError:
@@ -85,7 +93,6 @@ def play_audio_js(file_path: str) -> None:
     except Exception as e:
         st.error(f"Error playing audio: {e}")
 def render_sidebar() -> None:
     """Renders the sidebar content."""
     with st.sidebar:
@@ -113,104 +120,79 @@ def render_sidebar() -> None:
         - 👌 OK
         - 👆 Pointing / Yes
         """)
         st.divider()
         st.markdown("Developed by **Arshbir Singh**")
 def main() -> None:
     """Main application execution flow."""
-    # 1. Page Configuration
     st.set_page_config(
         page_title=PAGE_TITLE,
         page_icon=PAGE_ICON,
         layout=LAYOUT_MODE
     )
-    # 2. Initialize UI
     inject_custom_css()
     render_sidebar()
-    # 3. Main Header
     st.title(f"{PAGE_ICON} {PAGE_TITLE}: AI Sign Language Assistant")
     st.markdown("### Translating Silence into Sound")
-    # 4. Layout Setup (Two Columns)
     col1, col2 = st.columns([1, 1])
-    # --- COLUMN 1: INPUT HANDLING ---
     with col1:
         st.subheader("1. Input Source")
         tab_cam, tab_upload = st.tabs(["📷 Camera", "📂 Upload"])
         input_buffer = None
-        # Camera Tab
         with tab_cam:
             cam_val = st.camera_input("Capture Gesture")
-            if cam_val:
-                input_buffer = cam_val
-        # Upload Tab
         with tab_upload:
             upl_val = st.file_uploader("Upload Image", type=["jpg", "png", "jpeg"])
-            if upl_val:
-                input_buffer = upl_val
-    # --- COLUMN 2: OUTPUT & PROCESSING ---
     with col2:
         st.subheader("2. AI Analysis")
-        # Pre-allocate a container to maintain vertical stability
         result_container = st.container()
-    # 5. Core Processing Logic
     if input_buffer:
         try:
-            # Image Preprocessing
             image = Image.open(input_buffer).convert("RGB")
             frame = np.array(image)
-            # AI Inference
             gesture_name, annotated_image = classify_gesture(frame)
-            # Update UI Result Container
             with result_container:
-                # Show Computer Vision View
                 st.image(
                     annotated_image,
                     caption="Computer Vision View",
                     use_container_width=True
                 )
-                # Show Classification Result
                 if gesture_name is None:
                     st.error("⚠️ No hand detected. Please ensure your hand is in frame.")
                 elif gesture_name == "UNKNOWN":
-                    st.warning("❓ Gesture detected but not recognized.\nTry: Peace Sign, Thumbs Up, Open Palm, or Fist.")
                 else:
                     st.success(f"### Detected: {gesture_name}")
-                    # --- Audio Playback Logic ---
                     if "last_spoken" not in st.session_state or st.session_state.last_spoken != gesture_name:
-                        # Generate Audio
                         audio_file = speak(gesture_name)
                         st.session_state.last_spoken = gesture_name
-                        # FIX: Trigger Audio in the MAIN FLOW (Bottom of page)
-                        # We do NOT put this in st.sidebar.
-                        # This ensures it runs invisibly regardless of sidebar state.
                         play_audio_js(audio_file)
         except Exception as e:
             with result_container:
-                st.error(f"An unexpected error occurred: {e}")
     else:
-        # Default empty state
         with result_container:
-            st.info("Waiting for input... Please capture or upload an image.")
 if __name__ == "__main__":
     main()

 Key Features:
 - Rotation-invariant gesture recognition (Euclidean geometry).
+- Zero-layout-shift audio playback (Absolute Positioning Fix).
 - Dual input modes: Camera and File Upload.
 Author: Arshbir Singh
 Date: 2025-12-24
+Version: 2.4.0 (Absolute Position Fix)
 """
 import streamlit as st
 import os
 import base64
 import streamlit.components.v1 as components
 # --- Local Modules ---
 from gesture_classifier import classify_gesture
 from tts import speak
 # --- Constants & Configuration ---
 PAGE_TITLE = "Hand2Voice"
 PAGE_ICON = "🤟"
 LAYOUT_MODE = "wide"
 LOGO_PATH = "NIELIT-LOGO.png"
+# --- CSS STABILIZATION (The Fix) ---
+# We use 'position: fixed' on the iframe to remove it from the layout flow entirely.
+# This ensures that no matter where the audio player loads, it cannot push pixels around.
 STABILIZATION_CSS = """
     <style>
         /* 1. Prevent Image Collapse */
             align-items: center;
             justify-content: center;
         }
         /* 2. Disable Alert Animations */
         .stAlert {
             transition: none !important;
         }
+        /* 3. ABSOLUTE POSITIONING FOR AUDIO IFRAME (The Nuclear Fix) */
+        /* This targets the specific iframe Streamlit uses for components.html */
+        iframe[title="streamlit.components.v1.components.html"] {
+            position: fixed !important;
+            top: 0;
+            left: 0;
+            width: 0px;
+            height: 0px;
+            border: none;
+            opacity: 0;
+            pointer-events: none;
+            z-index: -1;
         }
     </style>
 """
 def inject_custom_css() -> None:
+    """Injects CSS to stabilize the UI."""
     st.markdown(STABILIZATION_CSS, unsafe_allow_html=True)
 def play_audio_js(file_path: str) -> None:
     """
+    Plays audio using a hidden JS trigger.
+    The CSS above ensures this component has ABSOLUTELY NO layout footprint.
     """
     try:
         with open(file_path, "rb") as f:
             data = f.read()
             b64 = base64.b64encode(data).decode()
             js_code = f"""
                 <script>
                     var audio = new Audio("data:audio/mp3;base64,{b64}");
                     audio.play();
                 </script>
             """
+            # We create the component. The CSS will force it to position:fixed
             components.html(js_code, height=0, width=0)
     except FileNotFoundError:
     except Exception as e:
         st.error(f"Error playing audio: {e}")
 def render_sidebar() -> None:
     """Renders the sidebar content."""
     with st.sidebar:
         - 👌 OK
         - 👆 Pointing / Yes
         """)
         st.divider()
         st.markdown("Developed by **Arshbir Singh**")
 def main() -> None:
     """Main application execution flow."""
     st.set_page_config(
         page_title=PAGE_TITLE,
         page_icon=PAGE_ICON,
         layout=LAYOUT_MODE
     )
     inject_custom_css()
     render_sidebar()
     st.title(f"{PAGE_ICON} {PAGE_TITLE}: AI Sign Language Assistant")
     st.markdown("### Translating Silence into Sound")
     col1, col2 = st.columns([1, 1])
+    # --- INPUT ---
     with col1:
         st.subheader("1. Input Source")
         tab_cam, tab_upload = st.tabs(["📷 Camera", "📂 Upload"])
         input_buffer = None
         with tab_cam:
             cam_val = st.camera_input("Capture Gesture")
+            if cam_val: input_buffer = cam_val
         with tab_upload:
             upl_val = st.file_uploader("Upload Image", type=["jpg", "png", "jpeg"])
+            if upl_val: input_buffer = upl_val
+    # --- OUTPUT ---
     with col2:
         st.subheader("2. AI Analysis")
         result_container = st.container()
+    # --- PROCESSING ---
     if input_buffer:
         try:
             image = Image.open(input_buffer).convert("RGB")
             frame = np.array(image)
             gesture_name, annotated_image = classify_gesture(frame)
             with result_container:
                 st.image(
                     annotated_image,
                     caption="Computer Vision View",
                     use_container_width=True
                 )
                 if gesture_name is None:
                     st.error("⚠️ No hand detected. Please ensure your hand is in frame.")
                 elif gesture_name == "UNKNOWN":
+                    st.warning("❓ Gesture detected but not recognized.")
                 else:
                     st.success(f"### Detected: {gesture_name}")
+                    # --- Audio Logic ---
                     if "last_spoken" not in st.session_state or st.session_state.last_spoken != gesture_name:
                         audio_file = speak(gesture_name)
                         st.session_state.last_spoken = gesture_name
+                        # Trigger Audio - CSS handles the hidden positioning
                         play_audio_js(audio_file)
         except Exception as e:
             with result_container:
+                st.error(f"Error: {e}")
     else:
         with result_container:
+            st.info("Waiting for input...")
 if __name__ == "__main__":
     main()