nailarais1 commited on
Commit
3512db3
Β·
verified Β·
1 Parent(s): 2dcf150

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1073 -0
app.py ADDED
@@ -0,0 +1,1073 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ VoiceBridge.AI - Production Ready Universal Communication Platform
4
+ Supporting: Blind, Deaf, Non-Verbal, Deaf-Blind Users
5
+ """
6
+
7
+ import gradio as gr
8
+ import speech_recognition as sr
9
+ import pyttsx3
10
+ import threading
11
+ import time
12
+ import json
13
+ import tempfile
14
+ import os
15
+ import logging
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+ import torch
19
+ from transformers import pipeline
20
+ import cv2
21
+ import numpy as np
22
+ import requests
23
+ from typing import Dict, List, Optional, Tuple
24
+
25
+ # Configure logging
26
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
27
+ logger = logging.getLogger(__name__)
28
+
29
+ class ProductionVoiceBridge:
30
+ """
31
+ Production-grade universal communication system for all disabilities
32
+ """
33
+
34
+ def __init__(self):
35
+ self.setup_directories()
36
+ self.load_config()
37
+ self.initialize_engines()
38
+ self.current_mode = "universal"
39
+ self.user_preferences = {}
40
+ self.conversation_history = []
41
+ self.emergency_contacts = []
42
+
43
+ def setup_directories(self):
44
+ """Create necessary directories for production"""
45
+ Path("data/conversations").mkdir(parents=True, exist_ok=True)
46
+ Path("data/emergency").mkdir(parents=True, exist_ok=True)
47
+ Path("data/user_profiles").mkdir(parents=True, exist_ok=True)
48
+
49
+ def load_config(self):
50
+ """Load production configuration"""
51
+ self.config = {
52
+ "api_timeout": 30,
53
+ "max_audio_length": 60,
54
+ "emergency_check_interval": 5,
55
+ "backup_interval": 300,
56
+ "supported_languages": ["en", "es", "fr", "de"],
57
+ "haptic_patterns": {
58
+ "emergency": [500, 200, 500],
59
+ "notification": [200],
60
+ "confirmation": [100, 100],
61
+ "error": [100, 50, 100, 50, 100]
62
+ }
63
+ }
64
+
65
+ def initialize_engines(self):
66
+ """Initialize all AI engines and hardware interfaces"""
67
+ try:
68
+ # Text-to-Speech Engine
69
+ self.tts_engine = pyttsx3.init()
70
+ voices = self.tts_engine.getProperty('voices')
71
+ self.tts_engine.setProperty('voice', voices[0].id)
72
+ self.tts_engine.setProperty('rate', 160)
73
+ self.tts_engine.setProperty('volume', 0.8)
74
+
75
+ # Speech Recognition
76
+ self.recognizer = sr.Recognizer()
77
+ self.microphone = sr.Microphone()
78
+ with self.microphone as source:
79
+ self.recognizer.adjust_for_ambient_noise(source, duration=1)
80
+
81
+ # AI Models with error handling
82
+ self.load_ai_models()
83
+
84
+ # Emergency system
85
+ self.emergency_mode = False
86
+ self.last_emergency_check = time.time()
87
+
88
+ logger.info("All engines initialized successfully")
89
+
90
+ except Exception as e:
91
+ logger.error(f"Engine initialization failed: {e}")
92
+ raise
93
+
94
+ def load_ai_models(self):
95
+ """Load AI models with fallbacks"""
96
+ try:
97
+ self.speech_to_text_model = pipeline(
98
+ "automatic-speech-recognition",
99
+ model="openai/whisper-base",
100
+ device=-1 # CPU for reliability
101
+ )
102
+ except Exception as e:
103
+ logger.warning(f"Whisper model failed, using fallback: {e}")
104
+ self.speech_to_text_model = None
105
+
106
+ try:
107
+ self.image_caption_model = pipeline(
108
+ "image-to-text",
109
+ model="Salesforce/blip-image-captioning-base",
110
+ device=-1
111
+ )
112
+ except Exception as e:
113
+ logger.warning(f"BLIP model failed, using fallback: {e}")
114
+ self.image_caption_model = None
115
+
116
+ # ==================== UNIVERSAL MODE ====================
117
+
118
+ def universal_communication(self, input_data: dict) -> dict:
119
+ """
120
+ Universal communication handler that adapts to any input type
121
+ """
122
+ try:
123
+ input_type = input_data.get('type', 'voice')
124
+
125
+ if input_type == 'voice' and input_data.get('audio'):
126
+ return self.handle_voice_input(input_data['audio'])
127
+
128
+ elif input_type == 'text' and input_data.get('text'):
129
+ return self.handle_text_input(input_data['text'])
130
+
131
+ elif input_type == 'image' and input_data.get('image'):
132
+ return self.handle_image_input(input_data['image'])
133
+
134
+ elif input_type == 'command':
135
+ return self.handle_system_command(input_data.get('command', ''))
136
+
137
+ else:
138
+ return self.create_response(
139
+ "Please provide voice, text, or image input",
140
+ "error"
141
+ )
142
+
143
+ except Exception as e:
144
+ logger.error(f"Universal communication error: {e}")
145
+ return self.create_response(
146
+ "System error. Please try again or use emergency mode.",
147
+ "error"
148
+ )
149
+
150
+ def handle_voice_input(self, audio_path: str) -> dict:
151
+ """Process voice input for deaf users and general transcription"""
152
+ try:
153
+ # Convert speech to text
154
+ if self.speech_to_text_model:
155
+ transcript = self.speech_to_text_model(audio_path)["text"]
156
+ else:
157
+ transcript = self.fallback_speech_to_text(audio_path)
158
+
159
+ # Check for emergency keywords
160
+ if self.detect_emergency_keywords(transcript):
161
+ emergency_result = self.trigger_emergency_mode("voice_triggered")
162
+ return self.create_response(
163
+ f"EMERGENCY DETECTED: {transcript}\n{emergency_result['message']}",
164
+ "emergency",
165
+ audio=emergency_result.get('audio'),
166
+ visual_alert="πŸ”΄ EMERGENCY ACTIVATED"
167
+ )
168
+
169
+ # Check for system commands
170
+ if self.is_system_command(transcript):
171
+ return self.handle_system_command(transcript)
172
+
173
+ # Regular communication
174
+ self.add_to_conversation("User", transcript)
175
+
176
+ return self.create_response(
177
+ transcript,
178
+ "transcription",
179
+ visual_alert=f"πŸ’¬ New message: {transcript[:50]}..."
180
+ )
181
+
182
+ except Exception as e:
183
+ logger.error(f"Voice input error: {e}")
184
+ return self.create_response(
185
+ "Could not process audio. Please try again.",
186
+ "error"
187
+ )
188
+
189
+ def handle_text_input(self, text: str) -> dict:
190
+ """Process text input for non-verbal users"""
191
+ try:
192
+ # Check for emergency
193
+ if self.detect_emergency_keywords(text):
194
+ emergency_result = self.trigger_emergency_mode("text_triggered")
195
+ return self.create_response(
196
+ f"EMERGENCY: {text}\n{emergency_result['message']}",
197
+ "emergency",
198
+ audio=emergency_result.get('audio'),
199
+ visual_alert="πŸ”΄ EMERGENCY"
200
+ )
201
+
202
+ # Convert to speech
203
+ audio_path = self.text_to_speech(text)
204
+
205
+ self.add_to_conversation("User", text, "spoken")
206
+
207
+ return self.create_response(
208
+ text,
209
+ "communication",
210
+ audio=audio_path,
211
+ visual_alert=f"πŸ—£οΈ Speaking: {text[:30]}..."
212
+ )
213
+
214
+ except Exception as e:
215
+ logger.error(f"Text input error: {e}")
216
+ return self.create_response(
217
+ "Could not process text. Please try again.",
218
+ "error"
219
+ )
220
+
221
+ def handle_image_input(self, image_path: str) -> dict:
222
+ """Process image input for blind users"""
223
+ try:
224
+ if not self.image_caption_model:
225
+ description = "I see an image but cannot describe it in detail right now."
226
+ else:
227
+ description = self.image_caption_model(image_path)[0]['generated_text']
228
+ # Enhance description
229
+ description = self.enhance_scene_description(description)
230
+
231
+ # Convert description to speech
232
+ audio_path = self.text_to_speech(description)
233
+
234
+ return self.create_response(
235
+ description,
236
+ "scene_description",
237
+ audio=audio_path
238
+ )
239
+
240
+ except Exception as e:
241
+ logger.error(f"Image input error: {e}")
242
+ return self.create_response(
243
+ "Could not process image. Please try again.",
244
+ "error"
245
+ )
246
+
247
+ # ==================== DISABILITY-SPECIFIC MODES ====================
248
+
249
+ def blind_mode(self, command: str = None, image_path: str = None) -> dict:
250
+ """Voice-first interface for blind users"""
251
+ if not command and not image_path:
252
+ welcome_msg = (
253
+ "Blind mode activated. Say 'describe scene' to use camera, "
254
+ "'read text' for text recognition, or 'help' for options."
255
+ )
256
+ return self.create_response(welcome_msg, "system", audio=self.text_to_speech(welcome_msg))
257
+
258
+ if command:
259
+ command = command.lower()
260
+
261
+ if 'describe' in command or 'scene' in command or image_path:
262
+ if image_path:
263
+ return self.handle_image_input(image_path)
264
+ else:
265
+ return self.create_response(
266
+ "Please capture an image using the camera",
267
+ "instruction"
268
+ )
269
+
270
+ elif 'read' in command or 'text' in command:
271
+ return self.create_response(
272
+ "Please capture an image containing text",
273
+ "instruction"
274
+ )
275
+
276
+ elif 'navigate' in command or 'direction' in command:
277
+ guidance = "Navigation assistance: Move forward carefully. Obstacle detection active."
278
+ return self.create_response(
279
+ guidance,
280
+ "navigation",
281
+ audio=self.text_to_speech(guidance)
282
+ )
283
+
284
+ elif 'help' in command:
285
+ help_text = """
286
+ Blind Mode Commands:
287
+ β€’ "Describe scene" - Describe surroundings using camera
288
+ β€’ "Read text" - Read text from images
289
+ β€’ "Navigate" - Get walking directions
290
+ β€’ "Emergency" - Immediate assistance
291
+ β€’ "Change mode" - Switch accessibility mode
292
+ """
293
+ return self.create_response(help_text, "help", audio=self.text_to_speech(help_text))
294
+
295
+ else:
296
+ response = "Command not recognized. Say 'help' for options."
297
+ return self.create_response(response, "error", audio=self.text_to_speech(response))
298
+
299
+ def deaf_mode(self, audio_input: str = None, continuous: bool = False) -> dict:
300
+ """Visual interface for deaf users with real-time transcription"""
301
+ if audio_input:
302
+ result = self.handle_voice_input(audio_input)
303
+
304
+ # Add visual enhancements for deaf users
305
+ if result['type'] == 'transcription':
306
+ result['visual_alert'] = f"πŸ‘‚ TRANSCRIPTION: {result['text'][:100]}..."
307
+
308
+ # Check for important sounds
309
+ if self.detect_important_sounds(audio_input):
310
+ result['visual_alert'] = "πŸ”” IMPORTANT SOUND DETECTED! " + result.get('visual_alert', '')
311
+ result['haptic_feedback'] = self.config['haptic_patterns']['notification']
312
+
313
+ return result
314
+ else:
315
+ status = "Deaf mode active. Real-time transcription ready. Visual alerts enabled."
316
+ return self.create_response(status, "system", visual_alert="πŸ‘‚ Deaf Mode Active")
317
+
318
+ def non_verbal_mode(self, text: str = None, preset: str = None) -> dict:
319
+ """Text-to-speech communication for non-verbal users"""
320
+ if preset:
321
+ phrases = {
322
+ 'greeting': "Hello, I use this device to communicate",
323
+ 'help': "I need assistance please",
324
+ 'medical': "I have a medical condition and may need help",
325
+ 'emergency': "This is an emergency! I need immediate assistance!",
326
+ 'thanks': "Thank you for your help",
327
+ 'yes': "Yes",
328
+ 'no': "No",
329
+ 'pain': "I am in pain and need medical help",
330
+ 'lost': "I am lost and need directions",
331
+ 'bathroom': "I need to find a bathroom"
332
+ }
333
+ text_to_speak = phrases.get(preset, preset)
334
+ else:
335
+ text_to_speak = text or "I need help"
336
+
337
+ audio_path = self.text_to_speech(text_to_speak)
338
+
339
+ self.add_to_conversation("User", text_to_speak, "spoken")
340
+
341
+ return self.create_response(
342
+ text_to_speak,
343
+ "communication",
344
+ audio=audio_path,
345
+ visual_alert=f"πŸ—£οΈ Speaking: {text_to_speak}",
346
+ haptic_feedback=self.config['haptic_patterns']['confirmation']
347
+ )
348
+
349
+ def deaf_blind_mode(self, input_text: str = None, output_format: str = "haptic") -> dict:
350
+ """Tactile communication for deaf-blind users"""
351
+ if input_text:
352
+ if output_format == "haptic":
353
+ vibration_pattern = self.text_to_vibration_pattern(input_text)
354
+ return self.create_response(
355
+ f"Message converted to vibrations: {input_text}",
356
+ "tactile",
357
+ haptic_feedback=vibration_pattern,
358
+ braille=self.text_to_braille(input_text)
359
+ )
360
+ else: # braille output
361
+ braille_text = self.text_to_braille(input_text)
362
+ return self.create_response(
363
+ f"Braille output: {input_text}",
364
+ "tactile",
365
+ braille=braille_text
366
+ )
367
+ else:
368
+ status = "Deaf-blind mode active. Use text input with haptic or braille output."
369
+ return self.create_response(status, "system")
370
+
371
+ # ==================== EMERGENCY SYSTEM ====================
372
+
373
+ def trigger_emergency_mode(self, trigger_source: str = "manual") -> dict:
374
+ """Activate emergency response system"""
375
+ self.emergency_mode = True
376
+ timestamp = datetime.now().isoformat()
377
+
378
+ emergency_data = {
379
+ "status": "EMERGENCY_ACTIVATED",
380
+ "timestamp": timestamp,
381
+ "trigger_source": trigger_source,
382
+ "message": "EMERGENCY! Assistance required immediately!",
383
+ "actions_taken": [],
384
+ "contacts_notified": []
385
+ }
386
+
387
+ # Notify emergency contacts
388
+ for contact in self.emergency_contacts:
389
+ try:
390
+ self.notify_emergency_contact(contact, emergency_data)
391
+ emergency_data["contacts_notified"].append(contact)
392
+ except Exception as e:
393
+ logger.error(f"Failed to notify {contact}: {e}")
394
+
395
+ # Create emergency audio message
396
+ emergency_audio = self.text_to_speech(emergency_data["message"])
397
+ emergency_data["audio"] = emergency_audio
398
+
399
+ # Log emergency
400
+ self.log_emergency(emergency_data)
401
+
402
+ return emergency_data
403
+
404
+ def notify_emergency_contact(self, contact: str, emergency_data: dict):
405
+ """Notify emergency contact (simplified - in production would use SMS/email)"""
406
+ logger.info(f"EMERGENCY NOTIFICATION to {contact}: {emergency_data['message']}")
407
+ # In production: send SMS, email, or push notification
408
+
409
+ # ==================== CORE ENGINE METHODS ====================
410
+
411
+ def text_to_speech(self, text: str) -> str:
412
+ """Convert text to speech and return audio file path"""
413
+ try:
414
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav', dir='data/') as tmp_file:
415
+ self.tts_engine.save_to_file(text, tmp_file.name)
416
+ self.tts_engine.runAndWait()
417
+ return tmp_file.name
418
+ except Exception as e:
419
+ logger.error(f"TTS error: {e}")
420
+ return None
421
+
422
+ def fallback_speech_to_text(self, audio_path: str) -> str:
423
+ """Fallback speech recognition using speech_recognition library"""
424
+ try:
425
+ with sr.AudioFile(audio_path) as source:
426
+ audio = self.recognizer.record(source)
427
+ return self.recognizer.recognize_google(audio)
428
+ except Exception as e:
429
+ return f"Could not understand audio: {str(e)}"
430
+
431
+ def detect_emergency_keywords(self, text: str) -> bool:
432
+ """Detect emergency keywords in text"""
433
+ emergency_words = [
434
+ 'emergency', 'help', 'urgent', 'danger', 'dangerous',
435
+ 'accident', 'injured', 'hurt', 'pain', 'bleeding',
436
+ 'fire', 'police', 'ambulance', 'hospital', '911',
437
+ 'save me', 'help me', 'i need help'
438
+ ]
439
+ text_lower = text.lower()
440
+ return any(word in text_lower for word in emergency_words)
441
+
442
+ def detect_important_sounds(self, audio_path: str) -> bool:
443
+ """Detect important environmental sounds"""
444
+ # Simplified - in production would use audio analysis
445
+ # For now, use speech recognition to check for important words
446
+ try:
447
+ transcript = self.fallback_speech_to_text(audio_path)
448
+ important_words = ['help', 'emergency', 'fire', 'watch out', 'danger']
449
+ return any(word in transcript.lower() for word in important_words)
450
+ except:
451
+ return False
452
+
453
+ def text_to_vibration_pattern(self, text: str) -> List[int]:
454
+ """Convert text to vibration pattern (simplified Morse code)"""
455
+ morse_code = {
456
+ 'A': '.-', 'B': '-...', 'C': '-.-.', 'D': '-..', 'E': '.',
457
+ 'F': '..-.', 'G': '--.', 'H': '....', 'I': '..', 'J': '.---',
458
+ 'K': '-.-', 'L': '.-..', 'M': '--', 'N': '-.', 'O': '---',
459
+ 'P': '.--.', 'Q': '--.-', 'R': '.-.', 'S': '...', 'T': '-',
460
+ 'U': '..-', 'V': '...-', 'W': '.--', 'X': '-..-', 'Y': '-.--', 'Z': '--..',
461
+ '1': '.----', '2': '..---', '3': '...--', '4': '....-', '5': '.....',
462
+ '6': '-....', '7': '--...', '8': '---..', '9': '----.', '0': '-----',
463
+ ' ': ' '
464
+ }
465
+
466
+ pattern = []
467
+ for char in text.upper():
468
+ if char in morse_code:
469
+ morse = morse_code[char]
470
+ for symbol in morse:
471
+ if symbol == '.':
472
+ pattern.extend([100]) # Short vibration
473
+ elif symbol == '-':
474
+ pattern.extend([300]) # Long vibration
475
+ pattern.extend([50]) # Gap between symbols
476
+ pattern.extend([200]) # Gap between letters
477
+
478
+ return pattern
479
+
480
+ def text_to_braille(self, text: str) -> str:
481
+ """Convert text to braille unicode characters"""
482
+ braille_map = {
483
+ 'A': '⠁', 'B': 'β ƒ', 'C': 'β ‰', 'D': 'β ™', 'E': 'β ‘', 'F': 'β ‹', 'G': 'β ›', 'H': 'β “', 'I': '⠊', 'J': '⠚',
484
+ 'K': 'β …', 'L': 'β ‡', 'M': '⠍', 'N': '⠝', 'O': 'β •', 'P': '⠏', 'Q': '⠟', 'R': 'β —', 'S': '⠎', 'T': '⠞',
485
+ 'U': 'β ₯', 'V': 'β §', 'W': 'β Ί', 'X': 'β ­', 'Y': 'β ½', 'Z': 'β ΅',
486
+ '1': '⠁', '2': 'β ƒ', '3': 'β ‰', '4': 'β ™', '5': 'β ‘', '6': 'β ‹', '7': 'β ›', '8': 'β “', '9': '⠊', '0': '⠚',
487
+ ' ': ' ', '.': 'β ²', ',': 'β ‚', '!': 'β –', '?': 'β ¦'
488
+ }
489
+
490
+ return ''.join(braille_map.get(char.upper(), '?') for char in text)
491
+
492
+ def enhance_scene_description(self, description: str) -> str:
493
+ """Enhance AI-generated scene descriptions"""
494
+ enhancements = {
495
+ "indoor": "This appears to be an indoor setting. ",
496
+ "outdoor": "This appears to be an outdoor area. ",
497
+ "people": "There are people visible. ",
498
+ "text": "There is text that could be read. ",
499
+ "obstacle": "Be careful of potential obstacles. ",
500
+ }
501
+
502
+ enhanced = description
503
+ desc_lower = description.lower()
504
+
505
+ if any(word in desc_lower for word in ['room', 'indoor', 'inside', 'wall']):
506
+ enhanced = enhancements["indoor"] + enhanced
507
+ elif any(word in desc_lower for word in ['outdoor', 'outside', 'sky', 'tree']):
508
+ enhanced = enhancements["outdoor"] + enhanced
509
+
510
+ if any(word in desc_lower for word in ['person', 'people', 'man', 'woman']):
511
+ enhanced = enhancements["people"] + enhanced
512
+
513
+ if any(word in desc_lower for word in ['sign', 'text', 'letter', 'word']):
514
+ enhanced = enhancements["text"] + enhanced
515
+
516
+ return enhanced
517
+
518
+ def is_system_command(self, text: str) -> bool:
519
+ """Check if text contains system commands"""
520
+ commands = ['mode', 'help', 'emergency', 'stop', 'cancel', 'reset']
521
+ return any(command in text.lower() for command in commands)
522
+
523
+ def handle_system_command(self, command: str) -> dict:
524
+ """Handle system control commands"""
525
+ command = command.lower()
526
+
527
+ if 'blind' in command:
528
+ self.current_mode = "blind"
529
+ response = "Blind mode activated. Voice navigation enabled."
530
+ elif 'deaf' in command:
531
+ self.current_mode = "deaf"
532
+ response = "Deaf mode activated. Visual alerts enabled."
533
+ elif 'non verbal' in command or 'mute' in command:
534
+ self.current_mode = "non_verbal"
535
+ response = "Non-verbal mode activated. Text-to-speech ready."
536
+ elif 'deaf blind' in command:
537
+ self.current_mode = "deaf_blind"
538
+ response = "Deaf-blind mode activated. Haptic feedback enabled."
539
+ elif 'universal' in command:
540
+ self.current_mode = "universal"
541
+ response = "Universal mode activated."
542
+ elif 'emergency' in command:
543
+ return self.trigger_emergency_mode("voice_command")
544
+ else:
545
+ response = f"Current mode: {self.current_mode}. Say 'help' for options."
546
+
547
+ return self.create_response(response, "system", audio=self.text_to_speech(response))
548
+
549
+ def add_to_conversation(self, speaker: str, text: str, message_type: str = "text"):
550
+ """Add message to conversation history"""
551
+ self.conversation_history.append({
552
+ "timestamp": datetime.now().isoformat(),
553
+ "speaker": speaker,
554
+ "text": text,
555
+ "type": message_type
556
+ })
557
+
558
+ # Keep only last 100 messages
559
+ if len(self.conversation_history) > 100:
560
+ self.conversation_history = self.conversation_history[-100:]
561
+
562
+ def log_emergency(self, emergency_data: dict):
563
+ """Log emergency event"""
564
+ try:
565
+ filename = f"data/emergency/emergency_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
566
+ with open(filename, 'w') as f:
567
+ json.dump(emergency_data, f, indent=2)
568
+ except Exception as e:
569
+ logger.error(f"Failed to log emergency: {e}")
570
+
571
+ def create_response(self, text: str, response_type: str, **kwargs) -> dict:
572
+ """Create standardized response object"""
573
+ return {
574
+ "text": text,
575
+ "type": response_type,
576
+ "timestamp": datetime.now().isoformat(),
577
+ "mode": self.current_mode,
578
+ "audio": kwargs.get('audio'),
579
+ "visual_alert": kwargs.get('visual_alert'),
580
+ "haptic_feedback": kwargs.get('haptic_feedback'),
581
+ "braille": kwargs.get('braille')
582
+ }
583
+
584
+
585
+ # ==================== GRADIO INTERFACE ====================
586
+
587
+ def create_production_interface():
588
+ """Create production-ready Gradio interface"""
589
+
590
+ # Initialize the system
591
+ voice_bridge = ProductionVoiceBridge()
592
+
593
+ # Custom CSS for accessibility
594
+ custom_css = """
595
+ :root {
596
+ --primary-color: #2563eb;
597
+ --danger-color: #dc2626;
598
+ --success-color: #16a34a;
599
+ --warning-color: #d97706;
600
+ }
601
+
602
+ .accessible-btn {
603
+ min-height: 48px !important;
604
+ min-width: 48px !important;
605
+ padding: 12px 24px !important;
606
+ font-size: 16px !important;
607
+ margin: 4px !important;
608
+ border: 2px solid !important;
609
+ }
610
+
611
+ .emergency-btn {
612
+ background: linear-gradient(45deg, #dc2626, #ef4444) !important;
613
+ color: white !important;
614
+ font-weight: bold !important;
615
+ font-size: 20px !important;
616
+ animation: pulse 2s infinite !important;
617
+ }
618
+
619
+ @keyframes pulse {
620
+ 0% { transform: scale(1); opacity: 1; }
621
+ 50% { transform: scale(1.05); opacity: 0.9; }
622
+ 100% { transform: scale(1); opacity: 1; }
623
+ }
624
+
625
+ .high-contrast {
626
+ filter: contrast(200%) !important;
627
+ }
628
+
629
+ .large-text { font-size: 18px !important; }
630
+ .x-large-text { font-size: 22px !important; }
631
+
632
+ .sr-only {
633
+ position: absolute !important;
634
+ width: 1px !important;
635
+ height: 1px !important;
636
+ padding: 0 !important;
637
+ margin: -1px !important;
638
+ overflow: hidden !important;
639
+ clip: rect(0, 0, 0, 0) !important;
640
+ white-space: nowrap !important;
641
+ border: 0 !important;
642
+ }
643
+
644
+ @media (max-width: 768px) {
645
+ .container {
646
+ padding: 8px !important;
647
+ }
648
+ .accessible-btn {
649
+ min-height: 54px !important;
650
+ min-width: 54px !important;
651
+ font-size: 18px !important;
652
+ }
653
+ }
654
+ """
655
+
656
+ with gr.Blocks(
657
+ css=custom_css,
658
+ theme=gr.themes.Soft(primary_hue="blue"),
659
+ title="VoiceBridge AI - Universal Communication",
660
+ head='<meta name="description" content="Accessibility communication platform for blind, deaf, non-verbal, and deaf-blind users">'
661
+ ) as demo:
662
+
663
+ # Screen reader announcement area
664
+ sr_announcement = gr.Textbox(
665
+ label="Screen Reader Announcements",
666
+ elem_id="sr-announcement",
667
+ visible=False
668
+ )
669
+
670
+ gr.Markdown("""
671
+ # 🎯 VoiceBridge AI - Universal Communication Platform
672
+ **Production-Ready Accessibility Solution for All Disabilities**
673
+
674
+ *Supporting: πŸ‘οΈ Blind Users β€’ πŸ‘‚ Deaf Users β€’ 🀐 Non-Verbal Users β€’ πŸ‘οΈπŸ‘‚ Deaf-Blind Users*
675
+ """)
676
+
677
+ # System Status Bar
678
+ with gr.Row():
679
+ system_status = gr.Textbox(
680
+ label="System Status",
681
+ value="βœ… System Ready - VoiceBridge AI Initialized",
682
+ interactive=False,
683
+ max_lines=1
684
+ )
685
+ current_mode_display = gr.Textbox(
686
+ label="Current Mode",
687
+ value="universal",
688
+ interactive=False,
689
+ max_lines=1
690
+ )
691
+
692
+ # Emergency Section (Always Visible)
693
+ with gr.Row():
694
+ emergency_btn = gr.Button(
695
+ "🚨 ACTIVATE EMERGENCY MODE",
696
+ elem_classes=["accessible-btn", "emergency-btn", "x-large-text"],
697
+ scale=2
698
+ )
699
+ emergency_contact_input = gr.Textbox(
700
+ label="Emergency Contact (Email/Phone)",
701
+ placeholder="Enter emergency contact information...",
702
+ scale=1
703
+ )
704
+
705
+ # Mode Selection
706
+ with gr.Row():
707
+ mode_selector = gr.Radio(
708
+ choices=[
709
+ ("Universal", "universal"),
710
+ ("Blind", "blind"),
711
+ ("Deaf", "deaf"),
712
+ ("Non-Verbal", "non_verbal"),
713
+ ("Deaf-Blind", "deaf_blind")
714
+ ],
715
+ label="Accessibility Mode",
716
+ value="universal",
717
+ elem_id="mode-selector"
718
+ )
719
+
720
+ # Universal Communication Tab
721
+ with gr.Tab("🌐 Universal Communication", id="universal"):
722
+ with gr.Row():
723
+ with gr.Column(scale=1):
724
+ universal_audio = gr.Audio(
725
+ label="🎀 Speak (Voice Input)",
726
+ type="filepath",
727
+ sources=["microphone"]
728
+ )
729
+ universal_text = gr.Textbox(
730
+ label="⌨️ Type to Speak",
731
+ placeholder="Enter text to be spoken aloud...",
732
+ lines=3
733
+ )
734
+ universal_image = gr.Image(
735
+ label="πŸ“· Capture Scene",
736
+ type="filepath",
737
+ sources=["webcam", "upload"]
738
+ )
739
+
740
+ process_universal = gr.Button(
741
+ "Process Input",
742
+ elem_classes="accessible-btn",
743
+ size="lg"
744
+ )
745
+
746
+ with gr.Column(scale=1):
747
+ universal_output = gr.Textbox(
748
+ label="Output",
749
+ lines=6,
750
+ max_lines=10
751
+ )
752
+ universal_audio_output = gr.Audio(
753
+ label="Audio Output",
754
+ type="filepath",
755
+ interactive=False
756
+ )
757
+ universal_alert = gr.Textbox(
758
+ label="Visual Alerts",
759
+ visible=False
760
+ )
761
+
762
+ # Blind Assistance Tab
763
+ with gr.Tab("πŸ‘οΈ Blind Assistance", id="blind"):
764
+ with gr.Row():
765
+ with gr.Column(scale=1):
766
+ blind_audio = gr.Audio(
767
+ label="Voice Commands",
768
+ type="filepath",
769
+ sources=["microphone"]
770
+ )
771
+ blind_commands = gr.Radio(
772
+ choices=[
773
+ "describe scene",
774
+ "read text",
775
+ "navigate",
776
+ "help"
777
+ ],
778
+ label="Quick Commands",
779
+ value="describe scene"
780
+ )
781
+ blind_image = gr.Image(
782
+ label="Camera Feed",
783
+ type="filepath",
784
+ sources=["webcam", "upload"]
785
+ )
786
+
787
+ process_blind = gr.Button(
788
+ "Execute Command",
789
+ elem_classes="accessible-btn"
790
+ )
791
+
792
+ with gr.Column(scale=1):
793
+ blind_output = gr.Textbox(
794
+ label="Scene Description",
795
+ lines=5
796
+ )
797
+ blind_audio_output = gr.Audio(
798
+ label="Audio Description",
799
+ type="filepath"
800
+ )
801
+
802
+ # Deaf Assistance Tab
803
+ with gr.Tab("πŸ‘‚ Deaf Assistance", id="deaf"):
804
+ with gr.Row():
805
+ with gr.Column(scale=1):
806
+ deaf_audio = gr.Audio(
807
+ label="Audio to Transcribe",
808
+ type="filepath",
809
+ sources=["microphone", "upload"]
810
+ )
811
+ continuous_listening = gr.Checkbox(
812
+ label="Continuous Listening Mode",
813
+ value=False
814
+ )
815
+
816
+ process_deaf = gr.Button(
817
+ "Transcribe Audio",
818
+ elem_classes="accessible-btn"
819
+ )
820
+
821
+ with gr.Column(scale=1):
822
+ deaf_output = gr.Textbox(
823
+ label="Transcription",
824
+ lines=6
825
+ )
826
+ deaf_alerts = gr.Textbox(
827
+ label="Sound Alerts",
828
+ lines=2
829
+ )
830
+
831
+ # Non-Verbal Communication Tab
832
+ with gr.Tab("🀐 Non-Verbal Communication", id="non_verbal"):
833
+ with gr.Row():
834
+ with gr.Column(scale=1):
835
+ preset_phrases = gr.Radio(
836
+ choices=[
837
+ "greeting", "help", "medical", "emergency",
838
+ "thanks", "yes", "no", "pain", "lost", "bathroom"
839
+ ],
840
+ label="Quick Phrases",
841
+ value="greeting"
842
+ )
843
+ custom_phrase = gr.Textbox(
844
+ label="Custom Message",
845
+ placeholder="Or type your own message...",
846
+ lines=2
847
+ )
848
+
849
+ speak_btn = gr.Button(
850
+ "Speak Message",
851
+ elem_classes="accessible-btn",
852
+ size="lg"
853
+ )
854
+
855
+ with gr.Column(scale=1):
856
+ spoken_text = gr.Textbox(
857
+ label="Message",
858
+ lines=3
859
+ )
860
+ message_audio = gr.Audio(
861
+ label="Spoken Audio",
862
+ type="filepath"
863
+ )
864
+
865
+ # Deaf-Blind Communication Tab
866
+ with gr.Tab("πŸ‘οΈπŸ‘‚ Deaf-Blind Communication", id="deaf_blind"):
867
+ with gr.Row():
868
+ with gr.Column(scale=1):
869
+ tactile_input = gr.Textbox(
870
+ label="Message to Convert",
871
+ placeholder="Enter text for tactile communication...",
872
+ lines=3
873
+ )
874
+ output_format = gr.Radio(
875
+ choices=["haptic", "braille"],
876
+ label="Output Format",
877
+ value="haptic"
878
+ )
879
+
880
+ convert_btn = gr.Button(
881
+ "Convert to Tactile",
882
+ elem_classes="accessible-btn"
883
+ )
884
+
885
+ with gr.Column(scale=1):
886
+ braille_output = gr.Textbox(
887
+ label="Braille Output",
888
+ lines=3
889
+ )
890
+ vibration_pattern = gr.Textbox(
891
+ label="Vibration Pattern",
892
+ lines=2
893
+ )
894
+
895
+ # Feedback and Settings
896
+ with gr.Tab("βš™οΈ Settings & Feedback", id="settings"):
897
+ with gr.Row():
898
+ with gr.Column(scale=1):
899
+ gr.Markdown("### πŸ”§ Accessibility Settings")
900
+ high_contrast = gr.Checkbox(label="High Contrast Mode", value=False)
901
+ large_text = gr.Checkbox(label="Large Text Mode", value=False)
902
+ voice_navigation = gr.Checkbox(label="Voice Navigation", value=True)
903
+
904
+ gr.Markdown("### πŸ“§ Feedback")
905
+ feedback_email = gr.Textbox(label="Your Email (optional)")
906
+ feedback_message = gr.Textbox(
907
+ label="Feedback & Suggestions",
908
+ placeholder="Help us improve VoiceBridge AI...",
909
+ lines=4
910
+ )
911
+ submit_feedback = gr.Button("Submit Feedback", elem_classes="accessible-btn")
912
+ feedback_status = gr.Textbox(label="Status", interactive=False)
913
+
914
+ with gr.Column(scale=1):
915
+ gr.Markdown("### πŸ“Š System Information")
916
+ conversation_history = gr.Textbox(
917
+ label="Recent Conversation",
918
+ lines=8,
919
+ max_lines=10
920
+ )
921
+ clear_history = gr.Button("Clear History", elem_classes="accessible-btn")
922
+ export_data = gr.Button("Export Data", elem_classes="accessible-btn")
923
+
924
+ # ==================== EVENT HANDLERS ====================
925
+
926
+ def handle_mode_change(mode):
927
+ voice_bridge.current_mode = mode
928
+ status_msg = f"Mode changed to: {mode}"
929
+ voice_bridge.add_to_conversation("System", status_msg)
930
+ return status_msg, status_msg # For both status displays
931
+
932
+ def handle_universal_input(audio, text, image, mode):
933
+ if audio:
934
+ input_data = {'type': 'voice', 'audio': audio}
935
+ elif text:
936
+ input_data = {'type': 'text', 'text': text}
937
+ elif image:
938
+ input_data = {'type': 'image', 'image': image}
939
+ else:
940
+ return "Please provide input", None, None
941
+
942
+ result = voice_bridge.universal_communication(input_data)
943
+ return result['text'], result.get('audio'), result.get('visual_alert', '')
944
+
945
+ def handle_blind_assistance(audio, command, image):
946
+ if audio:
947
+ transcript = voice_bridge.fallback_speech_to_text(audio)
948
+ result = voice_bridge.blind_mode(transcript, image)
949
+ elif image:
950
+ result = voice_bridge.blind_mode(command, image)
951
+ else:
952
+ result = voice_bridge.blind_mode(command)
953
+
954
+ return result['text'], result.get('audio')
955
+
956
+ def handle_deaf_assistance(audio, continuous):
957
+ result = voice_bridge.deaf_mode(audio, continuous)
958
+ return result['text'], result.get('visual_alert', 'No important sounds detected')
959
+
960
+ def handle_non_verbal(preset, custom):
961
+ text_to_speak = custom if custom else None
962
+ result = voice_bridge.non_verbal_mode(text_to_speak, preset)
963
+ return result['text'], result.get('audio')
964
+
965
+ def handle_deaf_blind(input_text, output_format):
966
+ result = voice_bridge.deaf_blind_mode(input_text, output_format)
967
+ braille = result.get('braille', '')
968
+ pattern = str(result.get('haptic_feedback', []))
969
+ return result['text'], braille, pattern
970
+
971
+ def handle_emergency(contact):
972
+ if contact:
973
+ voice_bridge.emergency_contacts.append(contact)
974
+ result = voice_bridge.trigger_emergency_mode("manual")
975
+ return result['message'], result.get('audio')
976
+
977
+ def handle_feedback(email, message):
978
+ if not message.strip():
979
+ return "Please enter feedback message"
980
+
981
+ # In production, this would save to database/send email
982
+ feedback_data = {
983
+ 'timestamp': datetime.now().isoformat(),
984
+ 'email': email,
985
+ 'message': message,
986
+ 'mode': voice_bridge.current_mode
987
+ }
988
+
989
+ try:
990
+ # Save feedback locally
991
+ with open('data/feedback.json', 'a') as f:
992
+ f.write(json.dumps(feedback_data) + '\n')
993
+
994
+ return "βœ… Thank you for your feedback! We'll review it soon. Contact: Naila.Rais@msftcommunity.com"
995
+ except Exception as e:
996
+ return f"❌ Could not save feedback: {str(e)}"
997
+
998
+ # Connect event handlers
999
+ mode_selector.change(
1000
+ handle_mode_change,
1001
+ inputs=mode_selector,
1002
+ outputs=[system_status, current_mode_display]
1003
+ )
1004
+
1005
+ process_universal.click(
1006
+ handle_universal_input,
1007
+ inputs=[universal_audio, universal_text, universal_image, mode_selector],
1008
+ outputs=[universal_output, universal_audio_output, universal_alert]
1009
+ )
1010
+
1011
+ process_blind.click(
1012
+ handle_blind_assistance,
1013
+ inputs=[blind_audio, blind_commands, blind_image],
1014
+ outputs=[blind_output, blind_audio_output]
1015
+ )
1016
+
1017
+ process_deaf.click(
1018
+ handle_deaf_assistance,
1019
+ inputs=[deaf_audio, continuous_listening],
1020
+ outputs=[deaf_output, deaf_alerts]
1021
+ )
1022
+
1023
+ speak_btn.click(
1024
+ handle_non_verbal,
1025
+ inputs=[preset_phrases, custom_phrase],
1026
+ outputs=[spoken_text, message_audio]
1027
+ )
1028
+
1029
+ convert_btn.click(
1030
+ handle_deaf_blind,
1031
+ inputs=[tactile_input, output_format],
1032
+ outputs=[tactile_input, braille_output, vibration_pattern]
1033
+ )
1034
+
1035
+ emergency_btn.click(
1036
+ handle_emergency,
1037
+ inputs=emergency_contact_input,
1038
+ outputs=[system_status, universal_audio_output]
1039
+ )
1040
+
1041
+ submit_feedback.click(
1042
+ handle_feedback,
1043
+ inputs=[feedback_email, feedback_message],
1044
+ outputs=feedback_status
1045
+ )
1046
+
1047
+ # Initialize system
1048
+ demo.load(
1049
+ fn=lambda: ("System Ready - VoiceBridge AI Initialized", "universal"),
1050
+ outputs=[system_status, current_mode_display]
1051
+ )
1052
+
1053
+ return demo
1054
+
1055
+ # Production deployment
1056
+ if __name__ == "__main__":
1057
+ try:
1058
+ print("πŸš€ Starting VoiceBridge AI Production Server...")
1059
+ print("πŸ“§ Support & Feedback: Naila.Rais@msftcommunity.com")
1060
+ print("🌐 Access the app at the URL provided below")
1061
+
1062
+ demo = create_production_interface()
1063
+ demo.launch(
1064
+ server_name="0.0.0.0",
1065
+ server_port=7860,
1066
+ share=True,
1067
+ debug=False,
1068
+ show_error=True,
1069
+ auth=("NailaR", "voicebridge2025") if os.getenv('PRODUCTION') else None
1070
+ )
1071
+ except Exception as e:
1072
+ logger.error(f"Failed to start production server: {e}")
1073
+ print(f"❌ Startup failed: {e}")