Hand2Voice / app.py
LovnishVerma's picture
Update app.py
04ddcc1 verified
"""
Hand2Voice: AI-Powered Sign Language Assistant
----------------------------------------------
Version: 11.0.0 (Footer Fix)
Author: Lovnish Verma
Organization: Government Research / NIELIT
Date: 2025-12-24
Description:
A production-ready accessibility tool that translates hand gestures into spoken audio.
Fixes in v11.0:
- Added bottom padding to main container so Footer doesn't overlap text.
- Enforced Z-Index layering to keep alerts visible.
"""
import streamlit as st
import numpy as np
from PIL import Image, ImageOps
import os
import base64
import streamlit.components.v1 as components
# --- Local Modules ---
from gesture_classifier import classify_gesture
from tts import speak
# --- Configuration ---
PAGE_TITLE = "Hand2Voice"
PAGE_ICON = "🀟"
LAYOUT = "wide"
LOGO_FILE = "NIELIT-LOGO.png"
GIF_FILE = "hand_animation.gif"
GIF_URL = "https://mediapipe.dev/images/mobile/hand_tracking_3d_android_gpu.gif"
AUTHOR_NAME = "Lovnish Verma"
COPYRIGHT_YEAR = "2026"
# --- 1. THE CONCRETE CSS (Layout & Design System) ---
STABILIZATION_CSS = """
<style>
/* A. GLOBAL SCROLLBAR LOCK */
html { overflow-y: scroll; }
/* B. CONTAINER LOCK */
div[data-testid="stImage"] {
height: 450px !important;
width: 100% !important;
display: flex;
align-items: center;
justify-content: center;
background-color: #f8f9fa;
border-radius: 12px;
border: 1px solid #dee2e6;
overflow: hidden;
}
div[data-testid="stImage"] > img {
height: 100% !important;
width: auto !important;
object-fit: contain !important;
}
/* C. SIDEBAR POLISH */
[data-testid="stSidebar"] [data-testid="stImage"] {
min-height: 100px !important;
transition: none !important;
border: none !important;
background-color: transparent !important;
}
/* D. UI CLEANUP & ANIMATION STOPPER */
.stAlert { transition: none !important; }
audio { display: none; }
/* E. FOOTER SPACING FIX (The Solution) */
/* We add huge bottom padding to the main block so content stops BEFORE the footer */
.block-container {
padding-top: 2rem;
padding-bottom: 5rem; /* Space for the footer */
}
/* F. FOOTER DESIGN */
.footer {
position: fixed;
bottom: 0;
left: 0;
width: 100%;
background-color: #f1f1f1;
color: #555;
text-align: center;
padding: 10px;
font-size: 14px;
border-top: 1px solid #ddd;
z-index: 999; /* Ensure it stays on top */
}
</style>
"""
# --- 2. ASSET MANAGEMENT ---
@st.cache_data
def load_image(path):
"""Loads image resources efficiently."""
if os.path.exists(path):
return Image.open(path)
return None
def inject_css():
st.markdown(STABILIZATION_CSS, unsafe_allow_html=True)
# --- 3. INVISIBLE AUDIO ENGINE ---
def play_audio_js(file_path):
"""Executes audio playback via pure JavaScript."""
try:
with open(file_path, "rb") as f:
data = f.read()
b64 = base64.b64encode(data).decode()
js = f"""
<script>
var audio = new Audio("data:audio/mp3;base64,{b64}");
audio.play();
</script>
"""
components.html(js, height=0, width=0)
except Exception as e:
print(f"Audio Error: {e}")
# --- 4. UI COMPONENTS ---
def render_status_box(text, color, icon):
"""Renders a consistent, professional status indicator."""
st.markdown(f"""
<div style="
background-color: {color};
color: #155724;
padding: 0px 20px;
height: 60px;
border-radius: 8px;
display: flex;
align-items: center;
font-weight: bold;
font-size: 18px;
border: 1px solid #b1dfbb;
margin-top: 10px;
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
">
<span style="font-size: 24px; margin-right: 15px;">{icon}</span>
{text}
</div>
""", unsafe_allow_html=True)
def render_sidebar():
"""Renders the navigation sidebar."""
with st.sidebar:
logo = load_image(LOGO_FILE)
if logo:
st.image(logo, use_container_width=True)
if os.path.exists(GIF_FILE):
st.image(GIF_FILE, caption="MediaPipe Hand Tracking")
else:
st.image(GIF_URL, caption="MediaPipe Hand Tracking (Web)")
st.divider()
st.subheader("πŸ“– How to Use")
st.markdown("""
1. **Select Input:** Choose 'Camera' or 'Upload'.
2. **Capture:** Click button to capture.
3. **Wait:** AI analyzes hand.
4. **Listen:** Hear the result.
""")
st.divider()
st.subheader("✌️ Supported Signs")
st.markdown("""
| Gesture | Meaning |
| :--- | :--- |
| βœ‹ **Palm** | Hello |
| ✌️ **V-Sign** | Peace / Victory |
| ✊ **Fist** | No / Stop |
| 🀘 **Rock** | Rock On |
| πŸ‘Œ **OK** | Okay / Perfect |
| πŸ‘† **Index** | Yes / Pointing |
""")
st.divider()
st.info(f"Designed by **{AUTHOR_NAME}**")
def main():
st.set_page_config(page_title=PAGE_TITLE, page_icon=PAGE_ICON, layout=LAYOUT)
inject_css()
render_sidebar()
# --- MAIN HEADER ---
st.title(f"{PAGE_ICON} {PAGE_TITLE}")
st.markdown(f"### **AI Sign Language Assistant**")
st.markdown("Translating silence into sound using Computer Vision.")
col1, col2 = st.columns([1, 1], gap="large")
# --- COLUMN 1: INPUT ---
with col1:
st.subheader("1. Input Source")
tab_cam, tab_up = st.tabs(["πŸ“· Camera", "πŸ“‚ Upload Image"])
input_buffer = None
with tab_cam:
input_buffer = st.camera_input("Capture Gesture")
with tab_up:
upl = st.file_uploader("Choose a file", type=["jpg", "png", "jpeg"])
if upl: input_buffer = upl
# --- COLUMN 2: OUTPUT ---
with col2:
st.subheader("2. AI Analysis")
result_container = st.container()
# --- PROCESSING LOGIC ---
if input_buffer:
try:
# 1. Load Image
img = Image.open(input_buffer).convert("RGB")
# 2. Resize
img = ImageOps.fit(img, (640, 480), method=Image.Resampling.LANCZOS)
frame = np.array(img)
# 3. Predict
gesture, annotated_img = classify_gesture(frame)
# 4. Render Result
with result_container:
st.image(annotated_img, caption="Computer Vision View", use_container_width=True)
if gesture is None:
render_status_box("No hand detected. Please try again.", "#f8d7da", "⚠️")
elif gesture == "UNKNOWN":
render_status_box("Gesture not recognized.", "#fff3cd", "❓")
else:
render_status_box(f"Detected: {gesture}", "#d4edda", "βœ…")
# 5. Speak Result
if "last_spoken" not in st.session_state or st.session_state.last_spoken != gesture:
audio_file = speak(gesture)
st.session_state.last_spoken = gesture
play_audio_js(audio_file)
except Exception as e:
st.error(f"System Error: {e}")
else:
# --- EMPTY STATE ---
with result_container:
placeholder = Image.new("RGB", (640, 480), (240, 240, 240))
st.image(placeholder, caption="Waiting for input...", use_container_width=True)
render_status_box("Waiting for gesture...", "#e2e3e5", "⏳")
# --- COPYRIGHT FOOTER ---
st.markdown(f"""
<div class="footer">
<p>&copy; {COPYRIGHT_YEAR} <b>{AUTHOR_NAME}</b> | Built with Streamlit, MediaPipe & OpenCV</p>
</div>
""", unsafe_allow_html=True)
if __name__ == "__main__":
main()