Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| VoiceBridge.AI - Production Ready Universal Communication Platform | |
| Supporting: Blind, Deaf, Non-Verbal, Deaf-Blind Users | |
| """ | |
| import os | |
| import logging | |
| import json | |
| import tempfile | |
| import time | |
| from datetime import datetime | |
| from pathlib import Path | |
| from typing import List | |
| import gradio as gr | |
| import speech_recognition as sr | |
| import pyttsx3 | |
| import torch | |
| from transformers import pipeline | |
| # Optional imports (may fail gracefully in some environments) | |
| try: | |
| import cv2 | |
| import numpy as np | |
| except Exception: | |
| cv2 = None | |
| np = None | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| class ProductionVoiceBridge: | |
| """ | |
| Production-grade universal communication system for all disabilities | |
| """ | |
| def __init__(self, allow_microphone: bool = False): | |
| """ | |
| If allow_microphone is False (default) we will not force initialization | |
| of system microphone — avoids failures in headless/HF Spaces. | |
| """ | |
| self.allow_microphone = allow_microphone | |
| self.setup_directories() | |
| self.load_config() | |
| self.current_mode = "universal" | |
| self.user_preferences = {} | |
| self.conversation_history = [] | |
| self.emergency_contacts = [] | |
| # initialize engines in try so failures are caught | |
| self._init_defaults() | |
| self.initialize_engines() | |
| def _init_defaults(self): | |
| # placeholders so other methods can rely on attributes | |
| self.tts_engine = None | |
| self.recognizer = None | |
| self.microphone = None | |
| self.speech_to_text_model = None | |
| self.image_caption_model = None | |
| self.emergency_mode = False | |
| self.last_emergency_check = time.time() | |
| def setup_directories(self): | |
| """Create necessary directories for production""" | |
| Path("data").mkdir(parents=True, exist_ok=True) | |
| Path("data/conversations").mkdir(parents=True, exist_ok=True) | |
| Path("data/emergency").mkdir(parents=True, exist_ok=True) | |
| Path("data/user_profiles").mkdir(parents=True, exist_ok=True) | |
| Path("data/feedback").mkdir(parents=True, exist_ok=True) | |
| def load_config(self): | |
| """Load production configuration""" | |
| self.config = { | |
| "api_timeout": 30, | |
| "max_audio_length": 60, | |
| "emergency_check_interval": 5, | |
| "backup_interval": 300, | |
| "supported_languages": ["en", "es", "fr", "de"], | |
| "haptic_patterns": { | |
| "emergency": [500, 200, 500], | |
| "notification": [200], | |
| "confirmation": [100, 100], | |
| "error": [100, 50, 100, 50, 100] | |
| } | |
| } | |
| def initialize_engines(self): | |
| """Initialize all AI engines and hardware interfaces""" | |
| try: | |
| # Text-to-Speech Engine (pyttsx3 is local and usually safe) | |
| try: | |
| self.tts_engine = pyttsx3.init() | |
| voices = self.tts_engine.getProperty('voices') or [] | |
| if voices: | |
| self.tts_engine.setProperty('voice', voices[0].id) | |
| self.tts_engine.setProperty('rate', 160) | |
| self.tts_engine.setProperty('volume', 0.8) | |
| except Exception as e: | |
| logger.warning(f"TTS engine init failed: {e}") | |
| self.tts_engine = None | |
| # Speech Recognition (recognizer only; microphone optional) | |
| try: | |
| self.recognizer = sr.Recognizer() | |
| except Exception as e: | |
| logger.warning(f"SpeechRecognition init failed: {e}") | |
| self.recognizer = None | |
| # Only attempt to initialize Microphone if explicitly allowed | |
| if self.allow_microphone: | |
| try: | |
| self.microphone = sr.Microphone() | |
| # adjust_for_ambient_noise can fail in headless; guard it | |
| try: | |
| with self.microphone as source: | |
| self.recognizer.adjust_for_ambient_noise(source, duration=1) | |
| except Exception as e: | |
| logger.warning(f"Ambient noise adjust failed: {e}") | |
| except Exception as e: | |
| logger.warning(f"Microphone not available: {e}") | |
| self.microphone = None | |
| else: | |
| # keep microphone None in headless mode to avoid exceptions | |
| self.microphone = None | |
| # AI Models with error handling (transformers pipelines) | |
| self.load_ai_models() | |
| # Emergency system | |
| self.emergency_mode = False | |
| self.last_emergency_check = time.time() | |
| logger.info("All engines initialized (best-effort)") | |
| except Exception as e: | |
| # Log full traceback then re-raise to make failure obvious in dev mode | |
| logger.error(f"Engine initialization failed: {e}") | |
| logger.debug("Traceback:", exc_info=True) | |
| raise | |
| def load_ai_models(self): | |
| """Load AI models with fallbacks. These can be heavy; fail gracefully.""" | |
| # Whisper (ASR) - optional; if not available will fall back to sr.Recognizer | |
| try: | |
| # device selection: if CUDA available, use it, else CPU | |
| device = 0 if torch.cuda.is_available() else -1 | |
| self.speech_to_text_model = pipeline( | |
| "automatic-speech-recognition", | |
| model="openai/whisper-base", | |
| device=device | |
| ) | |
| logger.info("Whisper ASR model loaded") | |
| except Exception as e: | |
| logger.warning(f"Whisper model failed to load: {e}") | |
| self.speech_to_text_model = None | |
| # Image captioning - optional | |
| try: | |
| self.image_caption_model = pipeline( | |
| "image-to-text", | |
| model="Salesforce/blip-image-captioning-base", | |
| device=-1 | |
| ) | |
| logger.info("Image caption model loaded") | |
| except Exception as e: | |
| logger.warning(f"Image caption model failed to load: {e}") | |
| self.image_caption_model = None | |
| # ==================== UNIVERSAL MODE ==================== | |
| def universal_communication(self, input_data: dict) -> dict: | |
| """ | |
| Universal communication handler that adapts to any input type | |
| """ | |
| try: | |
| input_type = input_data.get('type', 'voice') | |
| if input_type == 'voice' and input_data.get('audio'): | |
| return self.handle_voice_input(input_data['audio']) | |
| elif input_type == 'text' and input_data.get('text'): | |
| return self.handle_text_input(input_data['text']) | |
| elif input_type == 'image' and input_data.get('image'): | |
| return self.handle_image_input(input_data['image']) | |
| elif input_type == 'command': | |
| return self.handle_system_command(input_data.get('command', '')) | |
| else: | |
| return self.create_response( | |
| "Please provide voice, text, or image input", | |
| "error" | |
| ) | |
| except Exception as e: | |
| logger.error(f"Universal communication error: {e}", exc_info=True) | |
| return self.create_response( | |
| "System error. Please try again or use emergency mode.", | |
| "error" | |
| ) | |
| def handle_voice_input(self, audio_path: str) -> dict: | |
| """Process voice input for deaf users and general transcription""" | |
| try: | |
| transcript = "" | |
| # If Hugging Face/transformers ASR available and audio path exists, try it | |
| if self.speech_to_text_model and audio_path: | |
| try: | |
| # transformers pipelines accept file path | |
| out = self.speech_to_text_model(audio_path) | |
| transcript = out.get("text", "") if isinstance(out, dict) else str(out) | |
| except Exception as e: | |
| logger.warning(f"HF ASR failed, falling back to sr: {e}") | |
| transcript = self.fallback_speech_to_text(audio_path) | |
| else: | |
| transcript = self.fallback_speech_to_text(audio_path) | |
| if not transcript: | |
| transcript = "" | |
| # Check for emergency keywords | |
| if self.detect_emergency_keywords(transcript): | |
| emergency_result = self.trigger_emergency_mode("voice_triggered") | |
| return self.create_response( | |
| f"EMERGENCY DETECTED: {transcript}\n{emergency_result['message']}", | |
| "emergency", | |
| audio=emergency_result.get('audio'), | |
| visual_alert="🔴 EMERGENCY ACTIVATED" | |
| ) | |
| # Check for system commands | |
| if self.is_system_command(transcript): | |
| return self.handle_system_command(transcript) | |
| # Regular communication | |
| self.add_to_conversation("User", transcript) | |
| return self.create_response( | |
| transcript, | |
| "transcription", | |
| visual_alert=f"💬 New message: {transcript[:50]}..." | |
| ) | |
| except Exception as e: | |
| logger.error(f"Voice input error: {e}", exc_info=True) | |
| return self.create_response( | |
| "Could not process audio. Please try again.", | |
| "error" | |
| ) | |
| def handle_text_input(self, text: str) -> dict: | |
| """Process text input for non-verbal users""" | |
| try: | |
| # Check for emergency | |
| if self.detect_emergency_keywords(text): | |
| emergency_result = self.trigger_emergency_mode("text_triggered") | |
| return self.create_response( | |
| f"EMERGENCY: {text}\n{emergency_result['message']}", | |
| "emergency", | |
| audio=emergency_result.get('audio'), | |
| visual_alert="🔴 EMERGENCY" | |
| ) | |
| # Convert to speech | |
| audio_path = self.text_to_speech(text) | |
| self.add_to_conversation("User", text, "spoken") | |
| return self.create_response( | |
| text, | |
| "communication", | |
| audio=audio_path, | |
| visual_alert=f"🗣️ Speaking: {text[:30]}..." | |
| ) | |
| except Exception as e: | |
| logger.error(f"Text input error: {e}", exc_info=True) | |
| return self.create_response( | |
| "Could not process text. Please try again.", | |
| "error" | |
| ) | |
| def handle_image_input(self, image_path: str) -> dict: | |
| """Process image input for blind users""" | |
| try: | |
| if not self.image_caption_model: | |
| description = "I see an image but cannot describe it in detail right now." | |
| else: | |
| try: | |
| caption_out = self.image_caption_model(image_path) | |
| # pipeline returns list or dict depending on version | |
| if isinstance(caption_out, list) and caption_out: | |
| description = caption_out[0].get('generated_text', '') | |
| elif isinstance(caption_out, dict): | |
| description = caption_out.get('generated_text', '') or caption_out.get('text', '') | |
| else: | |
| description = str(caption_out) | |
| except Exception as e: | |
| logger.warning(f"Image captioning failed: {e}") | |
| description = "I see an image but cannot describe it in detail right now." | |
| description = self.enhance_scene_description(description) | |
| # Convert description to speech | |
| audio_path = self.text_to_speech(description) | |
| return self.create_response( | |
| description, | |
| "scene_description", | |
| audio=audio_path | |
| ) | |
| except Exception as e: | |
| logger.error(f"Image input error: {e}", exc_info=True) | |
| return self.create_response( | |
| "Could not process image. Please try again.", | |
| "error" | |
| ) | |
| # ==================== DISABILITY-SPECIFIC MODES ==================== | |
| def blind_mode(self, command: str = None, image_path: str = None) -> dict: | |
| """Voice-first interface for blind users""" | |
| if not command and not image_path: | |
| welcome_msg = ( | |
| "Blind mode activated. Say 'describe scene' to use camera, " | |
| "'read text' for text recognition, or 'help' for options." | |
| ) | |
| return self.create_response(welcome_msg, "system", audio=self.text_to_speech(welcome_msg)) | |
| if command: | |
| command = command.lower() | |
| if 'describe' in command or 'scene' in command or image_path: | |
| if image_path: | |
| return self.handle_image_input(image_path) | |
| else: | |
| return self.create_response( | |
| "Please capture an image using the camera", | |
| "instruction" | |
| ) | |
| elif 'read' in command or 'text' in command: | |
| return self.create_response( | |
| "Please capture an image containing text", | |
| "instruction" | |
| ) | |
| elif 'navigate' in command or 'direction' in command: | |
| guidance = "Navigation assistance: Move forward carefully. Obstacle detection active." | |
| return self.create_response( | |
| guidance, | |
| "navigation", | |
| audio=self.text_to_speech(guidance) | |
| ) | |
| elif 'help' in command: | |
| help_text = ( | |
| "Blind Mode Commands:\n" | |
| "• \"Describe scene\" - Describe surroundings using camera\n" | |
| "• \"Read text\" - Read text from images\n" | |
| "• \"Navigate\" - Get walking directions\n" | |
| "• \"Emergency\" - Immediate assistance\n" | |
| "• \"Change mode\" - Switch accessibility mode\n" | |
| ) | |
| return self.create_response(help_text, "help", audio=self.text_to_speech(help_text)) | |
| else: | |
| response = "Command not recognized. Say 'help' for options." | |
| return self.create_response(response, "error", audio=self.text_to_speech(response)) | |
| def deaf_mode(self, audio_input: str = None, continuous: bool = False) -> dict: | |
| """Visual interface for deaf users with real-time transcription""" | |
| if audio_input: | |
| result = self.handle_voice_input(audio_input) | |
| # Add visual enhancements for deaf users | |
| if result.get('type') == 'transcription': | |
| result['visual_alert'] = f"👂 TRANSCRIPTION: {result.get('text','')[:100]}..." | |
| # Check for important sounds | |
| if self.detect_important_sounds(audio_input): | |
| result['visual_alert'] = "🔔 IMPORTANT SOUND DETECTED! " + result.get('visual_alert', '') | |
| result['haptic_feedback'] = self.config['haptic_patterns']['notification'] | |
| return result | |
| else: | |
| status = "Deaf mode active. Real-time transcription ready. Visual alerts enabled." | |
| return self.create_response(status, "system", visual_alert="👂 Deaf Mode Active") | |
| def non_verbal_mode(self, text: str = None, preset: str = None) -> dict: | |
| """Text-to-speech communication for non-verbal users""" | |
| if preset: | |
| phrases = { | |
| 'greeting': "Hello, I use this device to communicate", | |
| 'help': "I need assistance please", | |
| 'medical': "I have a medical condition and may need help", | |
| 'emergency': "This is an emergency! I need immediate assistance!", | |
| 'thanks': "Thank you for your help", | |
| 'yes': "Yes", | |
| 'no': "No", | |
| 'pain': "I am in pain and need medical help", | |
| 'lost': "I am lost and need directions", | |
| 'bathroom': "I need to find a bathroom" | |
| } | |
| text_to_speak = phrases.get(preset, preset) | |
| else: | |
| text_to_speak = text or "I need help" | |
| audio_path = self.text_to_speech(text_to_speak) | |
| self.add_to_conversation("User", text_to_speak, "spoken") | |
| return self.create_response( | |
| text_to_speak, | |
| "communication", | |
| audio=audio_path, | |
| visual_alert=f"🗣️ Speaking: {text_to_speak}", | |
| haptic_feedback=self.config['haptic_patterns']['confirmation'] | |
| ) | |
| def deaf_blind_mode(self, input_text: str = None, output_format: str = "haptic") -> dict: | |
| """Tactile communication for deaf-blind users""" | |
| if input_text: | |
| if output_format == "haptic": | |
| vibration_pattern = self.text_to_vibration_pattern(input_text) | |
| return self.create_response( | |
| f"Message converted to vibrations: {input_text}", | |
| "tactile", | |
| haptic_feedback=vibration_pattern, | |
| braille=self.text_to_braille(input_text) | |
| ) | |
| else: # braille output | |
| braille_text = self.text_to_braille(input_text) | |
| return self.create_response( | |
| f"Braille output: {input_text}", | |
| "tactile", | |
| braille=braille_text | |
| ) | |
| else: | |
| status = "Deaf-blind mode active. Use text input with haptic or braille output." | |
| return self.create_response(status, "system") | |
| # ==================== EMERGENCY SYSTEM ==================== | |
| def trigger_emergency_mode(self, trigger_source: str = "manual") -> dict: | |
| """Activate emergency response system""" | |
| self.emergency_mode = True | |
| timestamp = datetime.now().isoformat() | |
| emergency_data = { | |
| "status": "EMERGENCY_ACTIVATED", | |
| "timestamp": timestamp, | |
| "trigger_source": trigger_source, | |
| "message": "EMERGENCY! Assistance required immediately!", | |
| "actions_taken": [], | |
| "contacts_notified": [] | |
| } | |
| # Notify emergency contacts | |
| for contact in self.emergency_contacts: | |
| try: | |
| self.notify_emergency_contact(contact, emergency_data) | |
| emergency_data["contacts_notified"].append(contact) | |
| except Exception as e: | |
| logger.error(f"Failed to notify {contact}: {e}") | |
| # Create emergency audio message | |
| emergency_audio = self.text_to_speech(emergency_data["message"]) | |
| emergency_data["audio"] = emergency_audio | |
| # Log emergency | |
| self.log_emergency(emergency_data) | |
| return emergency_data | |
| def notify_emergency_contact(self, contact: str, emergency_data: dict): | |
| """Notify emergency contact (simplified - in production would use SMS/email)""" | |
| logger.info(f"EMERGENCY NOTIFICATION to {contact}: {emergency_data['message']}") | |
| # In production: send SMS, email, or push notification | |
| # ==================== CORE ENGINE METHODS ==================== | |
| def text_to_speech(self, text: str) -> str: | |
| """Convert text to speech and return audio file path (best-effort).""" | |
| if not text: | |
| return None | |
| # If no TTS engine, return None gracefully | |
| if self.tts_engine is None: | |
| logger.warning("TTS engine not available; returning None for audio path.") | |
| return None | |
| try: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.wav', dir='data/') as tmp_file: | |
| tmp_path = tmp_file.name | |
| # pyttsx3 uses save_to_file then runAndWait | |
| self.tts_engine.save_to_file(text, tmp_path) | |
| self.tts_engine.runAndWait() | |
| return tmp_path | |
| except Exception as e: | |
| logger.error(f"TTS error: {e}", exc_info=True) | |
| return None | |
| def fallback_speech_to_text(self, audio_path: str) -> str: | |
| """Fallback speech recognition using speech_recognition library""" | |
| if not audio_path: | |
| return "" | |
| if self.recognizer is None: | |
| logger.warning("Recognizer not available; cannot transcribe audio.") | |
| return "" | |
| try: | |
| with sr.AudioFile(audio_path) as source: | |
| audio = self.recognizer.record(source) | |
| # Use Google Web Speech API (requires internet) | |
| text = self.recognizer.recognize_google(audio) | |
| return text | |
| except sr.UnknownValueError: | |
| return "" | |
| except sr.RequestError as e: | |
| logger.error(f"Speech recognition RequestError: {e}") | |
| return "" | |
| except Exception as e: | |
| logger.error(f"Fallback STT error: {e}", exc_info=True) | |
| return "" | |
| def detect_emergency_keywords(self, text: str) -> bool: | |
| """Detect emergency keywords in text""" | |
| if not text: | |
| return False | |
| emergency_words = [ | |
| 'emergency', 'help', 'urgent', 'danger', 'dangerous', | |
| 'accident', 'injured', 'hurt', 'pain', 'bleeding', | |
| 'fire', 'police', 'ambulance', 'hospital', '911', | |
| 'save me', 'help me', 'i need help' | |
| ] | |
| text_lower = text.lower() | |
| return any(word in text_lower for word in emergency_words) | |
| def detect_important_sounds(self, audio_path: str) -> bool: | |
| """Detect important environmental sounds (simplified heuristic)""" | |
| try: | |
| transcript = self.fallback_speech_to_text(audio_path) | |
| important_words = ['help', 'emergency', 'fire', 'watch out', 'danger'] | |
| return any(word in transcript.lower() for word in important_words) | |
| except Exception: | |
| return False | |
| def text_to_vibration_pattern(self, text: str) -> List[int]: | |
| """Convert text to vibration pattern (simplified Morse code)""" | |
| morse_code = { | |
| 'A': '.-', 'B': '-...', 'C': '-.-.', 'D': '-..', 'E': '.', | |
| 'F': '..-.', 'G': '--.', 'H': '....', 'I': '..', 'J': '.---', | |
| 'K': '-.-', 'L': '.-..', 'M': '--', 'N': '-.', 'O': '---', | |
| 'P': '.--.', 'Q': '--.-', 'R': '.-.', 'S': '...', 'T': '-', | |
| 'U': '..-', 'V': '...-', 'W': '.--', 'X': '-..-', 'Y': '-.--', 'Z': '--..', | |
| '1': '.----', '2': '..---', '3': '...--', '4': '....-', '5': '.....', | |
| '6': '-....', '7': '--...', '8': '---..', '9': '----.', '0': '-----', | |
| ' ': ' ' | |
| } | |
| pattern = [] | |
| for char in text.upper(): | |
| if char in morse_code: | |
| morse = morse_code[char] | |
| for symbol in morse: | |
| if symbol == '.': | |
| pattern.extend([100]) # Short vibration | |
| elif symbol == '-': | |
| pattern.extend([300]) # Long vibration | |
| pattern.extend([50]) # Gap between symbols | |
| pattern.extend([200]) # Gap between letters | |
| return pattern | |
| def text_to_braille(self, text: str) -> str: | |
| """Convert text to braille unicode characters""" | |
| braille_map = { | |
| 'A': '⠁', 'B': '⠃', 'C': '⠉', 'D': '⠙', 'E': '⠑', 'F': '⠋', 'G': '⠛', 'H': '⠓', 'I': '⠊', 'J': '⠚', | |
| 'K': '⠅', 'L': '⠇', 'M': '⠍', 'N': '⠝', 'O': '⠕', 'P': '⠏', 'Q': '⠟', 'R': '⠗', 'S': '⠎', 'T': '⠞', | |
| 'U': '⠥', 'V': '⠧', 'W': '⠺', 'X': '⠭', 'Y': '⠽', 'Z': '⠵', | |
| '1': '⠁', '2': '⠃', '3': '⠉', '4': '⠙', '5': '⠑', '6': '⠋', '7': '⠛', '8': '⠓', '9': '⠊', '0': '⠚', | |
| ' ': ' ', '.': '⠲', ',': '⠂', '!': '⠖', '?': '⠦' | |
| } | |
| return ''.join(braille_map.get(char.upper(), '?') for char in text) | |
| def enhance_scene_description(self, description: str) -> str: | |
| """Enhance AI-generated scene descriptions""" | |
| if not description: | |
| return description | |
| enhancements = { | |
| "indoor": "This appears to be an indoor setting. ", | |
| "outdoor": "This appears to be an outdoor area. ", | |
| "people": "There are people visible. ", | |
| "text": "There is text that could be read. ", | |
| "obstacle": "Be careful of potential obstacles. ", | |
| } | |
| enhanced = description | |
| desc_lower = description.lower() | |
| if any(word in desc_lower for word in ['room', 'indoor', 'inside', 'wall']): | |
| enhanced = enhancements["indoor"] + enhanced | |
| elif any(word in desc_lower for word in ['outdoor', 'outside', 'sky', 'tree']): | |
| enhanced = enhancements["outdoor"] + enhanced | |
| if any(word in desc_lower for word in ['person', 'people', 'man', 'woman']): | |
| enhanced = enhancements["people"] + enhanced | |
| if any(word in desc_lower for word in ['sign', 'text', 'letter', 'word']): | |
| enhanced = enhancements["text"] + enhanced | |
| return enhanced | |
| def is_system_command(self, text: str) -> bool: | |
| """Check if text contains system commands""" | |
| if not text: | |
| return False | |
| commands = ['mode', 'help', 'emergency', 'stop', 'cancel', 'reset'] | |
| return any(command in text.lower() for command in commands) | |
| def handle_system_command(self, command: str) -> dict: | |
| """Handle system control commands""" | |
| command = (command or "").lower() | |
| if 'blind' in command: | |
| self.current_mode = "blind" | |
| response = "Blind mode activated. Voice navigation enabled." | |
| elif 'deaf blind' in command: | |
| self.current_mode = "deaf_blind" | |
| response = "Deaf-blind mode activated. Haptic feedback enabled." | |
| elif 'deaf' in command: | |
| self.current_mode = "deaf" | |
| response = "Deaf mode activated. Visual alerts enabled." | |
| elif 'non verbal' in command or 'mute' in command: | |
| self.current_mode = "non_verbal" | |
| response = "Non-verbal mode activated. Text-to-speech ready." | |
| elif 'universal' in command: | |
| self.current_mode = "universal" | |
| response = "Universal mode activated." | |
| elif 'emergency' in command: | |
| return self.trigger_emergency_mode("voice_command") | |
| else: | |
| response = f"Current mode: {self.current_mode}. Say 'help' for options." | |
| return self.create_response(response, "system", audio=self.text_to_speech(response)) | |
| def add_to_conversation(self, speaker: str, text: str, message_type: str = "text"): | |
| """Add message to conversation history""" | |
| self.conversation_history.append({ | |
| "timestamp": datetime.now().isoformat(), | |
| "speaker": speaker, | |
| "text": text, | |
| "type": message_type | |
| }) | |
| # Keep only last 100 messages | |
| if len(self.conversation_history) > 100: | |
| self.conversation_history = self.conversation_history[-100:] | |
| def log_emergency(self, emergency_data: dict): | |
| """Log emergency event""" | |
| try: | |
| filename = f"data/emergency/emergency_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" | |
| with open(filename, 'w') as f: | |
| json.dump(emergency_data, f, indent=2) | |
| except Exception as e: | |
| logger.error(f"Failed to log emergency: {e}") | |
| def create_response(self, text: str, response_type: str, **kwargs) -> dict: | |
| """Create standardized response object""" | |
| return { | |
| "text": text, | |
| "type": response_type, | |
| "timestamp": datetime.now().isoformat(), | |
| "mode": self.current_mode, | |
| "audio": kwargs.get('audio'), | |
| "visual_alert": kwargs.get('visual_alert'), | |
| "haptic_feedback": kwargs.get('haptic_feedback'), | |
| "braille": kwargs.get('braille') | |
| } | |
| # ==================== GRADIO INTERFACE ==================== | |
| def create_production_interface(allow_microphone: bool = False): | |
| """Create production-ready Gradio interface""" | |
| # Load your external CSS (if used) | |
| custom_css = "" # No external CSS file | |
| # Initialize the system; microphone allowed only if specified | |
| voice_bridge = ProductionVoiceBridge(allow_microphone=allow_microphone) | |
| # Additional accessibility CSS | |
| accessibility_css = """ | |
| :root { --primary-color: #2563eb; --danger-color: #dc2626; } | |
| .accessible-btn { min-height:48px !important; padding:12px 18px !important; font-size:16px !important; } | |
| .emergency-btn { background: linear-gradient(45deg,#dc2626,#ef4444) !important; color:white !important; font-weight:bold !important; } | |
| .large-text { font-size:18px !important; } | |
| """ | |
| # Combine both CSS blocks | |
| final_css = custom_css + "\n" + accessibility_css | |
| # Gradio 4.x → NO css= allowed | |
| with gr.Blocks() as demo: | |
| # Inject CSS manually (correct for Gradio 4.x) | |
| gr.HTML(f"<style>{final_css}</style>") | |
| # Your header | |
| gr.Markdown("# 🎯 VoiceBridge AI - Universal Communication Platform") | |
| # Status row | |
| with gr.Row(): | |
| system_status = gr.Textbox(label="System Status", | |
| value="✅ System Ready - VoiceBridge AI Initialized", | |
| interactive=False) | |
| current_mode_display = gr.Textbox(label="Current Mode", value=voice_bridge.current_mode, interactive=False) | |
| # Emergency | |
| with gr.Row(): | |
| emergency_btn = gr.Button("🚨 ACTIVATE EMERGENCY MODE", elem_classes=["accessible-btn", "emergency-btn"]) | |
| emergency_contact_input = gr.Textbox(label="Emergency Contact (Email/Phone)", | |
| placeholder="Enter emergency contact information...") | |
| # Mode selector | |
| mode_selector = gr.Radio(choices=[("Universal", "universal"), ("Blind", "blind"), | |
| ("Deaf", "deaf"), ("Non-Verbal", "non_verbal"), | |
| ("Deaf-Blind", "deaf_blind")], | |
| label="Accessibility Mode", value=voice_bridge.current_mode) | |
| # Universal Tab | |
| with gr.Tab("🌐 Universal Communication"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| universal_audio = gr.Audio(label="🎤 Speak (Voice Input)", type="filepath", sources=["microphone", "upload"]) | |
| universal_text = gr.Textbox(label="⌨️ Type to Speak", lines=3) | |
| universal_image = gr.Image(label="📷 Capture Scene", type="filepath", sources=["webcam", "upload"]) | |
| process_universal = gr.Button("Process Input", elem_classes="accessible-btn") | |
| with gr.Column(): | |
| universal_output = gr.Textbox(label="Output", lines=6) | |
| universal_audio_output = gr.Audio(label="Audio Output", type="filepath", interactive=False) | |
| universal_alert = gr.Textbox(label="Visual Alerts", visible=False) | |
| # Blind Tab | |
| with gr.Tab("👁️ Blind Assistance"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| blind_audio = gr.Audio(label="Voice Commands", type="filepath", sources=["microphone", "upload"]) | |
| blind_commands = gr.Radio(choices=["describe scene", "read text", "navigate", "help"], | |
| label="Quick Commands", value="describe scene") | |
| blind_image = gr.Image(label="Camera Feed", type="filepath", sources=["webcam", "upload"]) | |
| process_blind = gr.Button("Execute Command", elem_classes="accessible-btn") | |
| with gr.Column(): | |
| blind_output = gr.Textbox(label="Scene Description", lines=5) | |
| blind_audio_output = gr.Audio(label="Audio Description", type="filepath") | |
| # Deaf Tab | |
| with gr.Tab("👂 Deaf Assistance"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| deaf_audio = gr.Audio(label="Audio to Transcribe", type="filepath", sources=["microphone", "upload"]) | |
| continuous_listening = gr.Checkbox(label="Continuous Listening Mode", value=False) | |
| process_deaf = gr.Button("Transcribe Audio", elem_classes="accessible-btn") | |
| with gr.Column(): | |
| deaf_output = gr.Textbox(label="Transcription", lines=6) | |
| deaf_alerts = gr.Textbox(label="Sound Alerts", lines=2) | |
| # Non-verbal Tab | |
| with gr.Tab("🤐 Non-Verbal Communication"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| preset_phrases = gr.Radio(choices=["greeting", "help", "medical", "emergency", "thanks", | |
| "yes", "no", "pain", "lost", "bathroom"], | |
| label="Quick Phrases", value="greeting") | |
| custom_phrase = gr.Textbox(label="Custom Message", lines=2) | |
| speak_btn = gr.Button("Speak Message", elem_classes="accessible-btn") | |
| with gr.Column(): | |
| spoken_text = gr.Textbox(label="Message", lines=3) | |
| message_audio = gr.Audio(label="Spoken Audio", type="filepath") | |
| # Deaf-Blind Tab | |
| with gr.Tab("👁️👂 Deaf-Blind Communication"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| tactile_input = gr.Textbox(label="Message to Convert", lines=3) | |
| output_format = gr.Radio(choices=["haptic", "braille"], label="Output Format", value="haptic") | |
| convert_btn = gr.Button("Convert to Tactile", elem_classes="accessible-btn") | |
| with gr.Column(): | |
| braille_output = gr.Textbox(label="Braille Output", lines=3) | |
| vibration_pattern = gr.Textbox(label="Vibration Pattern", lines=2) | |
| # Settings & Feedback | |
| with gr.Tab("⚙️ Settings & Feedback"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| high_contrast = gr.Checkbox(label="High Contrast Mode", value=False) | |
| large_text = gr.Checkbox(label="Large Text Mode", value=False) | |
| voice_navigation = gr.Checkbox(label="Voice Navigation", value=True) | |
| feedback_email = gr.Textbox(label="Your Email (optional)") | |
| feedback_message = gr.Textbox(label="Feedback & Suggestions", lines=4) | |
| submit_feedback = gr.Button("Submit Feedback", elem_classes="accessible-btn") | |
| feedback_status = gr.Textbox(label="Status", interactive=False) | |
| with gr.Column(): | |
| conversation_history = gr.Textbox(label="Recent Conversation", lines=8, max_lines=10) | |
| clear_history = gr.Button("Clear History", elem_classes="accessible-btn") | |
| export_data = gr.Button("Export Data", elem_classes="accessible-btn") | |
| # ---------------- Event handlers (single definitions, no duplicates) ---------------- | |
| def handle_mode_change(mode): | |
| voice_bridge.current_mode = mode | |
| status_msg = f"Mode changed to: {mode}" | |
| voice_bridge.add_to_conversation("System", status_msg) | |
| return status_msg, status_msg | |
| def handle_universal_input(audio, text, image, mode): | |
| # prioritize audio > text > image | |
| if audio: | |
| input_data = {'type': 'voice', 'audio': audio} | |
| elif text: | |
| input_data = {'type': 'text', 'text': text} | |
| elif image: | |
| input_data = {'type': 'image', 'image': image} | |
| else: | |
| return "Please provide input", None, "" | |
| result = voice_bridge.universal_communication(input_data) | |
| return result.get('text', ''), result.get('audio', None), result.get('visual_alert', '') | |
| def handle_blind_assistance(audio, command, image): | |
| if audio: | |
| transcript = voice_bridge.fallback_speech_to_text(audio) | |
| result = voice_bridge.blind_mode(transcript, image) | |
| elif image: | |
| result = voice_bridge.blind_mode(command, image) | |
| else: | |
| result = voice_bridge.blind_mode(command) | |
| return result.get('text', ''), result.get('audio', None) | |
| def handle_deaf_assistance(audio, continuous): | |
| result = voice_bridge.deaf_mode(audio, continuous) | |
| return result.get('text', ''), result.get('visual_alert', 'No important sounds detected') | |
| def handle_non_verbal(preset, custom): | |
| # preset radio contains phrase key | |
| text_to_speak = custom if custom and custom.strip() else None | |
| result = voice_bridge.non_verbal_mode(text_to_speak, preset) | |
| return result.get('text', ''), result.get('audio', None) | |
| def handle_deaf_blind(input_text, out_format): | |
| result = voice_bridge.deaf_blind_mode(input_text, out_format) | |
| braille = result.get('braille', '') if result else '' | |
| pattern = result.get('haptic_feedback', []) if result else [] | |
| # Return braille text and vibration pattern string | |
| return braille, str(pattern) | |
| def handle_feedback(email, message): | |
| if not (message and message.strip()): | |
| return "Please enter feedback before submitting." | |
| fb = { | |
| "timestamp": datetime.now().isoformat(), | |
| "email": email, | |
| "message": message | |
| } | |
| Path("data/feedback").mkdir(parents=True, exist_ok=True) | |
| fname = f"data/feedback/feedback_{int(time.time())}.json" | |
| with open(fname, "w") as f: | |
| json.dump(fb, f, indent=2) | |
| return "Thank you! Feedback submitted." | |
| def handle_clear_history(): | |
| voice_bridge.conversation_history.clear() | |
| return "History cleared." | |
| def handle_export_data(): | |
| export_path = "data/export_conversation.json" | |
| with open(export_path, "w") as f: | |
| json.dump(voice_bridge.conversation_history, f, indent=2) | |
| return f"Conversation exported to {export_path}" | |
| def handle_emergency(contact=None): | |
| if contact: | |
| voice_bridge.emergency_contacts.append(contact) | |
| result = voice_bridge.trigger_emergency_mode("manual_button") | |
| # return message and (if available) audio path | |
| return result.get("message", ""), result.get("audio", None) | |
| # ---------------- Connect components ---------------- | |
| mode_selector.change(fn=handle_mode_change, inputs=mode_selector, outputs=[system_status, current_mode_display]) | |
| process_universal.click(fn=handle_universal_input, inputs=[universal_audio, universal_text, universal_image, mode_selector], | |
| outputs=[universal_output, universal_audio_output, universal_alert]) | |
| process_blind.click(fn=handle_blind_assistance, inputs=[blind_audio, blind_commands, blind_image], | |
| outputs=[blind_output, blind_audio_output]) | |
| process_deaf.click(fn=handle_deaf_assistance, inputs=[deaf_audio, continuous_listening], | |
| outputs=[deaf_output, deaf_alerts]) | |
| speak_btn.click(fn=handle_non_verbal, inputs=[preset_phrases, custom_phrase], | |
| outputs=[spoken_text, message_audio]) | |
| convert_btn.click(fn=handle_deaf_blind, inputs=[tactile_input, output_format], outputs=[braille_output, vibration_pattern]) | |
| submit_feedback.click(fn=handle_feedback, inputs=[feedback_email, feedback_message], outputs=feedback_status) | |
| clear_history.click(fn=handle_clear_history, outputs=conversation_history) | |
| export_data.click(fn=handle_export_data, outputs=feedback_status) | |
| emergency_btn.click(fn=lambda: handle_emergency(emergency_contact_input.value), inputs=None, outputs=[system_status]) | |
| # initial load state | |
| demo.load(fn=lambda: ("System Ready - VoiceBridge AI Initialized", voice_bridge.current_mode), | |
| outputs=[system_status, current_mode_display]) | |
| return demo | |
| # ==================== LAUNCH ==================== | |
| if __name__ == "__main__": | |
| # In most headless deployments (Hugging Face Spaces) you must NOT initialize the microphone. | |
| # Set allow_microphone=True only if running on a device with a microphone and you want live mic support. | |
| demo = create_production_interface(allow_microphone=False) | |
| demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)), share=False) | |