Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,12 +6,12 @@ It utilizes MediaPipe for computer vision and Google TTS for audio synthesis.
|
|
| 6 |
|
| 7 |
Key Features:
|
| 8 |
- Rotation-invariant gesture recognition (Euclidean geometry).
|
| 9 |
-
- Zero-layout-shift audio playback (
|
| 10 |
- Dual input modes: Camera and File Upload.
|
| 11 |
|
| 12 |
Author: Arshbir Singh
|
| 13 |
Date: 2025-12-24
|
| 14 |
-
Version: 2.
|
| 15 |
"""
|
| 16 |
|
| 17 |
import streamlit as st
|
|
@@ -20,21 +20,20 @@ from PIL import Image
|
|
| 20 |
import os
|
| 21 |
import base64
|
| 22 |
import streamlit.components.v1 as components
|
| 23 |
-
from typing import Optional, Tuple
|
| 24 |
|
| 25 |
# --- Local Modules ---
|
| 26 |
-
# Ensure these files exist in the same directory
|
| 27 |
from gesture_classifier import classify_gesture
|
| 28 |
from tts import speak
|
| 29 |
|
| 30 |
-
|
| 31 |
# --- Constants & Configuration ---
|
| 32 |
PAGE_TITLE = "Hand2Voice"
|
| 33 |
PAGE_ICON = "π€"
|
| 34 |
LAYOUT_MODE = "wide"
|
| 35 |
LOGO_PATH = "NIELIT-LOGO.png"
|
| 36 |
|
| 37 |
-
#
|
|
|
|
|
|
|
| 38 |
STABILIZATION_CSS = """
|
| 39 |
<style>
|
| 40 |
/* 1. Prevent Image Collapse */
|
|
@@ -44,40 +43,49 @@ STABILIZATION_CSS = """
|
|
| 44 |
align-items: center;
|
| 45 |
justify-content: center;
|
| 46 |
}
|
|
|
|
| 47 |
/* 2. Disable Alert Animations */
|
| 48 |
.stAlert {
|
| 49 |
transition: none !important;
|
| 50 |
}
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
}
|
| 55 |
</style>
|
| 56 |
"""
|
| 57 |
|
| 58 |
-
|
| 59 |
def inject_custom_css() -> None:
|
| 60 |
-
"""Injects CSS to stabilize the UI
|
| 61 |
st.markdown(STABILIZATION_CSS, unsafe_allow_html=True)
|
| 62 |
|
| 63 |
-
|
| 64 |
def play_audio_js(file_path: str) -> None:
|
| 65 |
"""
|
| 66 |
-
Plays
|
|
|
|
| 67 |
"""
|
| 68 |
try:
|
| 69 |
with open(file_path, "rb") as f:
|
| 70 |
data = f.read()
|
| 71 |
b64 = base64.b64encode(data).decode()
|
| 72 |
|
| 73 |
-
# JavaScript to create an Audio object in memory and play it immediately
|
| 74 |
js_code = f"""
|
| 75 |
<script>
|
| 76 |
var audio = new Audio("data:audio/mp3;base64,{b64}");
|
| 77 |
audio.play();
|
| 78 |
</script>
|
| 79 |
"""
|
| 80 |
-
#
|
| 81 |
components.html(js_code, height=0, width=0)
|
| 82 |
|
| 83 |
except FileNotFoundError:
|
|
@@ -85,7 +93,6 @@ def play_audio_js(file_path: str) -> None:
|
|
| 85 |
except Exception as e:
|
| 86 |
st.error(f"Error playing audio: {e}")
|
| 87 |
|
| 88 |
-
|
| 89 |
def render_sidebar() -> None:
|
| 90 |
"""Renders the sidebar content."""
|
| 91 |
with st.sidebar:
|
|
@@ -113,104 +120,79 @@ def render_sidebar() -> None:
|
|
| 113 |
- π OK
|
| 114 |
- π Pointing / Yes
|
| 115 |
""")
|
| 116 |
-
|
| 117 |
st.divider()
|
| 118 |
st.markdown("Developed by **Arshbir Singh**")
|
| 119 |
|
| 120 |
-
|
| 121 |
def main() -> None:
|
| 122 |
"""Main application execution flow."""
|
| 123 |
-
|
| 124 |
-
# 1. Page Configuration
|
| 125 |
st.set_page_config(
|
| 126 |
page_title=PAGE_TITLE,
|
| 127 |
page_icon=PAGE_ICON,
|
| 128 |
layout=LAYOUT_MODE
|
| 129 |
)
|
| 130 |
|
| 131 |
-
# 2. Initialize UI
|
| 132 |
inject_custom_css()
|
| 133 |
render_sidebar()
|
| 134 |
|
| 135 |
-
# 3. Main Header
|
| 136 |
st.title(f"{PAGE_ICON} {PAGE_TITLE}: AI Sign Language Assistant")
|
| 137 |
st.markdown("### Translating Silence into Sound")
|
| 138 |
|
| 139 |
-
# 4. Layout Setup (Two Columns)
|
| 140 |
col1, col2 = st.columns([1, 1])
|
| 141 |
|
| 142 |
-
# ---
|
| 143 |
with col1:
|
| 144 |
st.subheader("1. Input Source")
|
| 145 |
tab_cam, tab_upload = st.tabs(["π· Camera", "π Upload"])
|
| 146 |
|
| 147 |
input_buffer = None
|
| 148 |
-
|
| 149 |
-
# Camera Tab
|
| 150 |
with tab_cam:
|
| 151 |
cam_val = st.camera_input("Capture Gesture")
|
| 152 |
-
if cam_val:
|
| 153 |
-
input_buffer = cam_val
|
| 154 |
-
|
| 155 |
-
# Upload Tab
|
| 156 |
with tab_upload:
|
| 157 |
upl_val = st.file_uploader("Upload Image", type=["jpg", "png", "jpeg"])
|
| 158 |
-
if upl_val:
|
| 159 |
-
input_buffer = upl_val
|
| 160 |
|
| 161 |
-
# ---
|
| 162 |
with col2:
|
| 163 |
st.subheader("2. AI Analysis")
|
| 164 |
-
# Pre-allocate a container to maintain vertical stability
|
| 165 |
result_container = st.container()
|
| 166 |
|
| 167 |
-
#
|
| 168 |
if input_buffer:
|
| 169 |
try:
|
| 170 |
-
# Image Preprocessing
|
| 171 |
image = Image.open(input_buffer).convert("RGB")
|
| 172 |
frame = np.array(image)
|
| 173 |
|
| 174 |
-
# AI Inference
|
| 175 |
gesture_name, annotated_image = classify_gesture(frame)
|
| 176 |
|
| 177 |
-
# Update UI Result Container
|
| 178 |
with result_container:
|
| 179 |
-
# Show Computer Vision View
|
| 180 |
st.image(
|
| 181 |
annotated_image,
|
| 182 |
caption="Computer Vision View",
|
| 183 |
use_container_width=True
|
| 184 |
)
|
| 185 |
|
| 186 |
-
# Show Classification Result
|
| 187 |
if gesture_name is None:
|
| 188 |
st.error("β οΈ No hand detected. Please ensure your hand is in frame.")
|
| 189 |
elif gesture_name == "UNKNOWN":
|
| 190 |
-
st.warning("β Gesture detected but not recognized.
|
| 191 |
else:
|
| 192 |
st.success(f"### Detected: {gesture_name}")
|
| 193 |
|
| 194 |
-
# --- Audio
|
| 195 |
if "last_spoken" not in st.session_state or st.session_state.last_spoken != gesture_name:
|
| 196 |
-
|
| 197 |
-
# Generate Audio
|
| 198 |
audio_file = speak(gesture_name)
|
| 199 |
st.session_state.last_spoken = gesture_name
|
| 200 |
|
| 201 |
-
#
|
| 202 |
-
# We do NOT put this in st.sidebar.
|
| 203 |
-
# This ensures it runs invisibly regardless of sidebar state.
|
| 204 |
play_audio_js(audio_file)
|
| 205 |
|
| 206 |
except Exception as e:
|
| 207 |
with result_container:
|
| 208 |
-
st.error(f"
|
| 209 |
else:
|
| 210 |
-
# Default empty state
|
| 211 |
with result_container:
|
| 212 |
-
st.info("Waiting for input...
|
| 213 |
-
|
| 214 |
|
| 215 |
if __name__ == "__main__":
|
| 216 |
main()
|
|
|
|
| 6 |
|
| 7 |
Key Features:
|
| 8 |
- Rotation-invariant gesture recognition (Euclidean geometry).
|
| 9 |
+
- Zero-layout-shift audio playback (Absolute Positioning Fix).
|
| 10 |
- Dual input modes: Camera and File Upload.
|
| 11 |
|
| 12 |
Author: Arshbir Singh
|
| 13 |
Date: 2025-12-24
|
| 14 |
+
Version: 2.4.0 (Absolute Position Fix)
|
| 15 |
"""
|
| 16 |
|
| 17 |
import streamlit as st
|
|
|
|
| 20 |
import os
|
| 21 |
import base64
|
| 22 |
import streamlit.components.v1 as components
|
|
|
|
| 23 |
|
| 24 |
# --- Local Modules ---
|
|
|
|
| 25 |
from gesture_classifier import classify_gesture
|
| 26 |
from tts import speak
|
| 27 |
|
|
|
|
| 28 |
# --- Constants & Configuration ---
|
| 29 |
PAGE_TITLE = "Hand2Voice"
|
| 30 |
PAGE_ICON = "π€"
|
| 31 |
LAYOUT_MODE = "wide"
|
| 32 |
LOGO_PATH = "NIELIT-LOGO.png"
|
| 33 |
|
| 34 |
+
# --- CSS STABILIZATION (The Fix) ---
|
| 35 |
+
# We use 'position: fixed' on the iframe to remove it from the layout flow entirely.
|
| 36 |
+
# This ensures that no matter where the audio player loads, it cannot push pixels around.
|
| 37 |
STABILIZATION_CSS = """
|
| 38 |
<style>
|
| 39 |
/* 1. Prevent Image Collapse */
|
|
|
|
| 43 |
align-items: center;
|
| 44 |
justify-content: center;
|
| 45 |
}
|
| 46 |
+
|
| 47 |
/* 2. Disable Alert Animations */
|
| 48 |
.stAlert {
|
| 49 |
transition: none !important;
|
| 50 |
}
|
| 51 |
+
|
| 52 |
+
/* 3. ABSOLUTE POSITIONING FOR AUDIO IFRAME (The Nuclear Fix) */
|
| 53 |
+
/* This targets the specific iframe Streamlit uses for components.html */
|
| 54 |
+
iframe[title="streamlit.components.v1.components.html"] {
|
| 55 |
+
position: fixed !important;
|
| 56 |
+
top: 0;
|
| 57 |
+
left: 0;
|
| 58 |
+
width: 0px;
|
| 59 |
+
height: 0px;
|
| 60 |
+
border: none;
|
| 61 |
+
opacity: 0;
|
| 62 |
+
pointer-events: none;
|
| 63 |
+
z-index: -1;
|
| 64 |
}
|
| 65 |
</style>
|
| 66 |
"""
|
| 67 |
|
|
|
|
| 68 |
def inject_custom_css() -> None:
|
| 69 |
+
"""Injects CSS to stabilize the UI."""
|
| 70 |
st.markdown(STABILIZATION_CSS, unsafe_allow_html=True)
|
| 71 |
|
|
|
|
| 72 |
def play_audio_js(file_path: str) -> None:
|
| 73 |
"""
|
| 74 |
+
Plays audio using a hidden JS trigger.
|
| 75 |
+
The CSS above ensures this component has ABSOLUTELY NO layout footprint.
|
| 76 |
"""
|
| 77 |
try:
|
| 78 |
with open(file_path, "rb") as f:
|
| 79 |
data = f.read()
|
| 80 |
b64 = base64.b64encode(data).decode()
|
| 81 |
|
|
|
|
| 82 |
js_code = f"""
|
| 83 |
<script>
|
| 84 |
var audio = new Audio("data:audio/mp3;base64,{b64}");
|
| 85 |
audio.play();
|
| 86 |
</script>
|
| 87 |
"""
|
| 88 |
+
# We create the component. The CSS will force it to position:fixed
|
| 89 |
components.html(js_code, height=0, width=0)
|
| 90 |
|
| 91 |
except FileNotFoundError:
|
|
|
|
| 93 |
except Exception as e:
|
| 94 |
st.error(f"Error playing audio: {e}")
|
| 95 |
|
|
|
|
| 96 |
def render_sidebar() -> None:
|
| 97 |
"""Renders the sidebar content."""
|
| 98 |
with st.sidebar:
|
|
|
|
| 120 |
- π OK
|
| 121 |
- π Pointing / Yes
|
| 122 |
""")
|
|
|
|
| 123 |
st.divider()
|
| 124 |
st.markdown("Developed by **Arshbir Singh**")
|
| 125 |
|
|
|
|
| 126 |
def main() -> None:
|
| 127 |
"""Main application execution flow."""
|
|
|
|
|
|
|
| 128 |
st.set_page_config(
|
| 129 |
page_title=PAGE_TITLE,
|
| 130 |
page_icon=PAGE_ICON,
|
| 131 |
layout=LAYOUT_MODE
|
| 132 |
)
|
| 133 |
|
|
|
|
| 134 |
inject_custom_css()
|
| 135 |
render_sidebar()
|
| 136 |
|
|
|
|
| 137 |
st.title(f"{PAGE_ICON} {PAGE_TITLE}: AI Sign Language Assistant")
|
| 138 |
st.markdown("### Translating Silence into Sound")
|
| 139 |
|
|
|
|
| 140 |
col1, col2 = st.columns([1, 1])
|
| 141 |
|
| 142 |
+
# --- INPUT ---
|
| 143 |
with col1:
|
| 144 |
st.subheader("1. Input Source")
|
| 145 |
tab_cam, tab_upload = st.tabs(["π· Camera", "π Upload"])
|
| 146 |
|
| 147 |
input_buffer = None
|
|
|
|
|
|
|
| 148 |
with tab_cam:
|
| 149 |
cam_val = st.camera_input("Capture Gesture")
|
| 150 |
+
if cam_val: input_buffer = cam_val
|
|
|
|
|
|
|
|
|
|
| 151 |
with tab_upload:
|
| 152 |
upl_val = st.file_uploader("Upload Image", type=["jpg", "png", "jpeg"])
|
| 153 |
+
if upl_val: input_buffer = upl_val
|
|
|
|
| 154 |
|
| 155 |
+
# --- OUTPUT ---
|
| 156 |
with col2:
|
| 157 |
st.subheader("2. AI Analysis")
|
|
|
|
| 158 |
result_container = st.container()
|
| 159 |
|
| 160 |
+
# --- PROCESSING ---
|
| 161 |
if input_buffer:
|
| 162 |
try:
|
|
|
|
| 163 |
image = Image.open(input_buffer).convert("RGB")
|
| 164 |
frame = np.array(image)
|
| 165 |
|
|
|
|
| 166 |
gesture_name, annotated_image = classify_gesture(frame)
|
| 167 |
|
|
|
|
| 168 |
with result_container:
|
|
|
|
| 169 |
st.image(
|
| 170 |
annotated_image,
|
| 171 |
caption="Computer Vision View",
|
| 172 |
use_container_width=True
|
| 173 |
)
|
| 174 |
|
|
|
|
| 175 |
if gesture_name is None:
|
| 176 |
st.error("β οΈ No hand detected. Please ensure your hand is in frame.")
|
| 177 |
elif gesture_name == "UNKNOWN":
|
| 178 |
+
st.warning("β Gesture detected but not recognized.")
|
| 179 |
else:
|
| 180 |
st.success(f"### Detected: {gesture_name}")
|
| 181 |
|
| 182 |
+
# --- Audio Logic ---
|
| 183 |
if "last_spoken" not in st.session_state or st.session_state.last_spoken != gesture_name:
|
|
|
|
|
|
|
| 184 |
audio_file = speak(gesture_name)
|
| 185 |
st.session_state.last_spoken = gesture_name
|
| 186 |
|
| 187 |
+
# Trigger Audio - CSS handles the hidden positioning
|
|
|
|
|
|
|
| 188 |
play_audio_js(audio_file)
|
| 189 |
|
| 190 |
except Exception as e:
|
| 191 |
with result_container:
|
| 192 |
+
st.error(f"Error: {e}")
|
| 193 |
else:
|
|
|
|
| 194 |
with result_container:
|
| 195 |
+
st.info("Waiting for input...")
|
|
|
|
| 196 |
|
| 197 |
if __name__ == "__main__":
|
| 198 |
main()
|