LovnishVerma commited on
Commit
adf170a
Β·
verified Β·
1 Parent(s): ca1c87c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -51
app.py CHANGED
@@ -6,12 +6,12 @@ It utilizes MediaPipe for computer vision and Google TTS for audio synthesis.
6
 
7
  Key Features:
8
  - Rotation-invariant gesture recognition (Euclidean geometry).
9
- - Zero-layout-shift audio playback (Pure JavaScript implementation).
10
  - Dual input modes: Camera and File Upload.
11
 
12
  Author: Arshbir Singh
13
  Date: 2025-12-24
14
- Version: 2.3.0 (Sidebar Fix)
15
  """
16
 
17
  import streamlit as st
@@ -20,21 +20,20 @@ from PIL import Image
20
  import os
21
  import base64
22
  import streamlit.components.v1 as components
23
- from typing import Optional, Tuple
24
 
25
  # --- Local Modules ---
26
- # Ensure these files exist in the same directory
27
  from gesture_classifier import classify_gesture
28
  from tts import speak
29
 
30
-
31
  # --- Constants & Configuration ---
32
  PAGE_TITLE = "Hand2Voice"
33
  PAGE_ICON = "🀟"
34
  LAYOUT_MODE = "wide"
35
  LOGO_PATH = "NIELIT-LOGO.png"
36
 
37
- # Custom CSS to stabilize the layout.
 
 
38
  STABILIZATION_CSS = """
39
  <style>
40
  /* 1. Prevent Image Collapse */
@@ -44,40 +43,49 @@ STABILIZATION_CSS = """
44
  align-items: center;
45
  justify-content: center;
46
  }
 
47
  /* 2. Disable Alert Animations */
48
  .stAlert {
49
  transition: none !important;
50
  }
51
- /* 3. Global Reset for Iframe Containers to prevent ghost margins */
52
- iframe {
53
- display: block;
 
 
 
 
 
 
 
 
 
 
54
  }
55
  </style>
56
  """
57
 
58
-
59
  def inject_custom_css() -> None:
60
- """Injects CSS to stabilize the UI and prevent layout shifts."""
61
  st.markdown(STABILIZATION_CSS, unsafe_allow_html=True)
62
 
63
-
64
  def play_audio_js(file_path: str) -> None:
65
  """
66
- Plays an audio file using a hidden JavaScript trigger.
 
67
  """
68
  try:
69
  with open(file_path, "rb") as f:
70
  data = f.read()
71
  b64 = base64.b64encode(data).decode()
72
 
73
- # JavaScript to create an Audio object in memory and play it immediately
74
  js_code = f"""
75
  <script>
76
  var audio = new Audio("data:audio/mp3;base64,{b64}");
77
  audio.play();
78
  </script>
79
  """
80
- # Execute JS in a hidden 0x0 pixel iframe
81
  components.html(js_code, height=0, width=0)
82
 
83
  except FileNotFoundError:
@@ -85,7 +93,6 @@ def play_audio_js(file_path: str) -> None:
85
  except Exception as e:
86
  st.error(f"Error playing audio: {e}")
87
 
88
-
89
  def render_sidebar() -> None:
90
  """Renders the sidebar content."""
91
  with st.sidebar:
@@ -113,104 +120,79 @@ def render_sidebar() -> None:
113
  - πŸ‘Œ OK
114
  - πŸ‘† Pointing / Yes
115
  """)
116
-
117
  st.divider()
118
  st.markdown("Developed by **Arshbir Singh**")
119
 
120
-
121
  def main() -> None:
122
  """Main application execution flow."""
123
-
124
- # 1. Page Configuration
125
  st.set_page_config(
126
  page_title=PAGE_TITLE,
127
  page_icon=PAGE_ICON,
128
  layout=LAYOUT_MODE
129
  )
130
 
131
- # 2. Initialize UI
132
  inject_custom_css()
133
  render_sidebar()
134
 
135
- # 3. Main Header
136
  st.title(f"{PAGE_ICON} {PAGE_TITLE}: AI Sign Language Assistant")
137
  st.markdown("### Translating Silence into Sound")
138
 
139
- # 4. Layout Setup (Two Columns)
140
  col1, col2 = st.columns([1, 1])
141
 
142
- # --- COLUMN 1: INPUT HANDLING ---
143
  with col1:
144
  st.subheader("1. Input Source")
145
  tab_cam, tab_upload = st.tabs(["πŸ“· Camera", "πŸ“‚ Upload"])
146
 
147
  input_buffer = None
148
-
149
- # Camera Tab
150
  with tab_cam:
151
  cam_val = st.camera_input("Capture Gesture")
152
- if cam_val:
153
- input_buffer = cam_val
154
-
155
- # Upload Tab
156
  with tab_upload:
157
  upl_val = st.file_uploader("Upload Image", type=["jpg", "png", "jpeg"])
158
- if upl_val:
159
- input_buffer = upl_val
160
 
161
- # --- COLUMN 2: OUTPUT & PROCESSING ---
162
  with col2:
163
  st.subheader("2. AI Analysis")
164
- # Pre-allocate a container to maintain vertical stability
165
  result_container = st.container()
166
 
167
- # 5. Core Processing Logic
168
  if input_buffer:
169
  try:
170
- # Image Preprocessing
171
  image = Image.open(input_buffer).convert("RGB")
172
  frame = np.array(image)
173
 
174
- # AI Inference
175
  gesture_name, annotated_image = classify_gesture(frame)
176
 
177
- # Update UI Result Container
178
  with result_container:
179
- # Show Computer Vision View
180
  st.image(
181
  annotated_image,
182
  caption="Computer Vision View",
183
  use_container_width=True
184
  )
185
 
186
- # Show Classification Result
187
  if gesture_name is None:
188
  st.error("⚠️ No hand detected. Please ensure your hand is in frame.")
189
  elif gesture_name == "UNKNOWN":
190
- st.warning("❓ Gesture detected but not recognized.\nTry: Peace Sign, Thumbs Up, Open Palm, or Fist.")
191
  else:
192
  st.success(f"### Detected: {gesture_name}")
193
 
194
- # --- Audio Playback Logic ---
195
  if "last_spoken" not in st.session_state or st.session_state.last_spoken != gesture_name:
196
-
197
- # Generate Audio
198
  audio_file = speak(gesture_name)
199
  st.session_state.last_spoken = gesture_name
200
 
201
- # FIX: Trigger Audio in the MAIN FLOW (Bottom of page)
202
- # We do NOT put this in st.sidebar.
203
- # This ensures it runs invisibly regardless of sidebar state.
204
  play_audio_js(audio_file)
205
 
206
  except Exception as e:
207
  with result_container:
208
- st.error(f"An unexpected error occurred: {e}")
209
  else:
210
- # Default empty state
211
  with result_container:
212
- st.info("Waiting for input... Please capture or upload an image.")
213
-
214
 
215
  if __name__ == "__main__":
216
  main()
 
6
 
7
  Key Features:
8
  - Rotation-invariant gesture recognition (Euclidean geometry).
9
+ - Zero-layout-shift audio playback (Absolute Positioning Fix).
10
  - Dual input modes: Camera and File Upload.
11
 
12
  Author: Arshbir Singh
13
  Date: 2025-12-24
14
+ Version: 2.4.0 (Absolute Position Fix)
15
  """
16
 
17
  import streamlit as st
 
20
  import os
21
  import base64
22
  import streamlit.components.v1 as components
 
23
 
24
  # --- Local Modules ---
 
25
  from gesture_classifier import classify_gesture
26
  from tts import speak
27
 
 
28
  # --- Constants & Configuration ---
29
  PAGE_TITLE = "Hand2Voice"
30
  PAGE_ICON = "🀟"
31
  LAYOUT_MODE = "wide"
32
  LOGO_PATH = "NIELIT-LOGO.png"
33
 
34
+ # --- CSS STABILIZATION (The Fix) ---
35
+ # We use 'position: fixed' on the iframe to remove it from the layout flow entirely.
36
+ # This ensures that no matter where the audio player loads, it cannot push pixels around.
37
  STABILIZATION_CSS = """
38
  <style>
39
  /* 1. Prevent Image Collapse */
 
43
  align-items: center;
44
  justify-content: center;
45
  }
46
+
47
  /* 2. Disable Alert Animations */
48
  .stAlert {
49
  transition: none !important;
50
  }
51
+
52
+ /* 3. ABSOLUTE POSITIONING FOR AUDIO IFRAME (The Nuclear Fix) */
53
+ /* This targets the specific iframe Streamlit uses for components.html */
54
+ iframe[title="streamlit.components.v1.components.html"] {
55
+ position: fixed !important;
56
+ top: 0;
57
+ left: 0;
58
+ width: 0px;
59
+ height: 0px;
60
+ border: none;
61
+ opacity: 0;
62
+ pointer-events: none;
63
+ z-index: -1;
64
  }
65
  </style>
66
  """
67
 
 
68
  def inject_custom_css() -> None:
69
+ """Injects CSS to stabilize the UI."""
70
  st.markdown(STABILIZATION_CSS, unsafe_allow_html=True)
71
 
 
72
  def play_audio_js(file_path: str) -> None:
73
  """
74
+ Plays audio using a hidden JS trigger.
75
+ The CSS above ensures this component has ABSOLUTELY NO layout footprint.
76
  """
77
  try:
78
  with open(file_path, "rb") as f:
79
  data = f.read()
80
  b64 = base64.b64encode(data).decode()
81
 
 
82
  js_code = f"""
83
  <script>
84
  var audio = new Audio("data:audio/mp3;base64,{b64}");
85
  audio.play();
86
  </script>
87
  """
88
+ # We create the component. The CSS will force it to position:fixed
89
  components.html(js_code, height=0, width=0)
90
 
91
  except FileNotFoundError:
 
93
  except Exception as e:
94
  st.error(f"Error playing audio: {e}")
95
 
 
96
  def render_sidebar() -> None:
97
  """Renders the sidebar content."""
98
  with st.sidebar:
 
120
  - πŸ‘Œ OK
121
  - πŸ‘† Pointing / Yes
122
  """)
 
123
  st.divider()
124
  st.markdown("Developed by **Arshbir Singh**")
125
 
 
126
  def main() -> None:
127
  """Main application execution flow."""
 
 
128
  st.set_page_config(
129
  page_title=PAGE_TITLE,
130
  page_icon=PAGE_ICON,
131
  layout=LAYOUT_MODE
132
  )
133
 
 
134
  inject_custom_css()
135
  render_sidebar()
136
 
 
137
  st.title(f"{PAGE_ICON} {PAGE_TITLE}: AI Sign Language Assistant")
138
  st.markdown("### Translating Silence into Sound")
139
 
 
140
  col1, col2 = st.columns([1, 1])
141
 
142
+ # --- INPUT ---
143
  with col1:
144
  st.subheader("1. Input Source")
145
  tab_cam, tab_upload = st.tabs(["πŸ“· Camera", "πŸ“‚ Upload"])
146
 
147
  input_buffer = None
 
 
148
  with tab_cam:
149
  cam_val = st.camera_input("Capture Gesture")
150
+ if cam_val: input_buffer = cam_val
 
 
 
151
  with tab_upload:
152
  upl_val = st.file_uploader("Upload Image", type=["jpg", "png", "jpeg"])
153
+ if upl_val: input_buffer = upl_val
 
154
 
155
+ # --- OUTPUT ---
156
  with col2:
157
  st.subheader("2. AI Analysis")
 
158
  result_container = st.container()
159
 
160
+ # --- PROCESSING ---
161
  if input_buffer:
162
  try:
 
163
  image = Image.open(input_buffer).convert("RGB")
164
  frame = np.array(image)
165
 
 
166
  gesture_name, annotated_image = classify_gesture(frame)
167
 
 
168
  with result_container:
 
169
  st.image(
170
  annotated_image,
171
  caption="Computer Vision View",
172
  use_container_width=True
173
  )
174
 
 
175
  if gesture_name is None:
176
  st.error("⚠️ No hand detected. Please ensure your hand is in frame.")
177
  elif gesture_name == "UNKNOWN":
178
+ st.warning("❓ Gesture detected but not recognized.")
179
  else:
180
  st.success(f"### Detected: {gesture_name}")
181
 
182
+ # --- Audio Logic ---
183
  if "last_spoken" not in st.session_state or st.session_state.last_spoken != gesture_name:
 
 
184
  audio_file = speak(gesture_name)
185
  st.session_state.last_spoken = gesture_name
186
 
187
+ # Trigger Audio - CSS handles the hidden positioning
 
 
188
  play_audio_js(audio_file)
189
 
190
  except Exception as e:
191
  with result_container:
192
+ st.error(f"Error: {e}")
193
  else:
 
194
  with result_container:
195
+ st.info("Waiting for input...")
 
196
 
197
  if __name__ == "__main__":
198
  main()