Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import speech_recognition as sr | |
| try: | |
| import pyttsx3 | |
| except Exception: | |
| pyttsx3 = None | |
| from gtts import gTTS | |
| import io | |
| import tempfile | |
| import os | |
| import threading | |
| import time | |
| from typing import Optional, Dict, List | |
| try: | |
| from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration | |
| import av | |
| WEBRTC_AVAILABLE = True | |
| except Exception: | |
| WEBRTC_AVAILABLE = False | |
| import logging | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| class SpeechInterface: | |
| """ | |
| Comprehensive speech interface supporting multiple languages for farmers | |
| """ | |
| def __init__(self): | |
| self.recognizer = sr.Recognizer() | |
| self.microphone = None | |
| self.pyaudio_available = False | |
| self.gtts_available = True # gTTS is imported; availability may depend on network | |
| self.browser_mic_available = False | |
| # Try to initialize microphone with error handling | |
| try: | |
| self.microphone = sr.Microphone() | |
| self.pyaudio_available = True | |
| logger.info("PyAudio and microphone initialized successfully") | |
| except AttributeError as e: | |
| if "PyAudio" in str(e): | |
| logger.warning("PyAudio not available. Voice input will be disabled.") | |
| self.pyaudio_available = False | |
| else: | |
| logger.error(f"Microphone initialization error: {e}") | |
| self.pyaudio_available = False | |
| except Exception as e: | |
| logger.error(f"Unexpected error initializing microphone: {e}") | |
| self.pyaudio_available = False | |
| # Language mapping for speech recognition and synthesis | |
| self.language_codes = { | |
| 'English': 'en', | |
| 'Hindi': 'hi', | |
| 'Telugu': 'te', | |
| 'Kannada': 'kn', | |
| 'Tamil': 'ta', | |
| 'Malayalam': 'ml', | |
| 'Marathi': 'mr', | |
| 'Bengali': 'bn', | |
| 'Gujarati': 'gu', | |
| 'Punjabi': 'pa', | |
| 'Urdu': 'ur', | |
| 'French': 'fr', | |
| 'Spanish': 'es' | |
| } | |
| # Initialize text-to-speech engine | |
| if pyttsx3 is not None: | |
| try: | |
| self.tts_engine = pyttsx3.init() | |
| self.tts_engine.setProperty('rate', 150) # Speed of speech | |
| self.tts_engine.setProperty('volume', 0.9) # Volume level | |
| except Exception as e: | |
| logger.warning(f"Could not initialize TTS engine: {e}") | |
| self.tts_engine = None | |
| else: | |
| self.tts_engine = None | |
| def has_tts(self) -> bool: | |
| """Return True if any TTS option (gTTS or pyttsx3) is available.""" | |
| return bool(self.tts_engine) or self.gtts_available | |
| def speech_to_text(self, language: str = 'English', timeout: int = 5) -> Optional[str]: | |
| """ | |
| Convert speech to text using microphone input | |
| """ | |
| # Prefer native mic if available, else offer WebRTC capture | |
| if not (self.pyaudio_available and self.microphone is not None): | |
| if WEBRTC_AVAILABLE: | |
| return self._speech_to_text_webrtc(language) | |
| st.error("❌ Microphone not available. PyAudio not installed and WebRTC not available.") | |
| return None | |
| try: | |
| with self.microphone as source: | |
| # Adjust for ambient noise | |
| self.recognizer.adjust_for_ambient_noise(source, duration=0.5) | |
| # Show listening indicator | |
| st.info("🎤 Listening... Speak now!") | |
| # Listen for audio | |
| audio = self.recognizer.listen(source, timeout=timeout, phrase_time_limit=10) | |
| # Show processing indicator | |
| st.info("🔄 Processing your speech...") | |
| # Convert speech to text | |
| language_code = self.language_codes.get(language, 'en') | |
| text = self.recognizer.recognize_google(audio, language=language_code) | |
| st.success(f"✅ Heard: {text}") | |
| return text | |
| except sr.WaitTimeoutError: | |
| st.warning("⏰ No speech detected. Please try again.") | |
| return None | |
| except sr.UnknownValueError: | |
| st.error("❌ Could not understand the speech. Please speak clearly.") | |
| return None | |
| except sr.RequestError as e: | |
| st.error(f"❌ Speech recognition service error: {e}") | |
| return None | |
| except Exception as e: | |
| st.error(f"❌ Unexpected error: {e}") | |
| return None | |
| def _speech_to_text_webrtc(self, language: str = 'English') -> Optional[str]: | |
| """Capture audio in-browser using WebRTC and run recognition on buffered audio.""" | |
| st.info("🎤 Using browser microphone (WebRTC)") | |
| rtc_configuration = RTCConfiguration({ | |
| "iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}] | |
| }) | |
| audio_frames: List[bytes] = [] | |
| def recv_audio(frame: av.AudioFrame): | |
| # Convert to bytes and buffer | |
| pcm = frame.to_ndarray() | |
| # Downmix to mono int16 expected by Recognizer | |
| import numpy as np | |
| if pcm.ndim > 1: | |
| pcm = pcm.mean(axis=0) | |
| pcm = pcm.astype('int16').tobytes() | |
| audio_frames.append(pcm) | |
| return frame | |
| ctx = webrtc_streamer( | |
| key="webrtc-audio", | |
| mode=WebRtcMode.SENDONLY, | |
| audio_receiver_size=1024, | |
| rtc_configuration=rtc_configuration, | |
| media_stream_constraints={"audio": True, "video": False}, | |
| async_processing=False, | |
| audio_frame_callback=recv_audio | |
| ) | |
| self.browser_mic_available = bool(ctx and ctx.state.playing) | |
| if st.button("🛑 Stop & Transcribe", help="Stop capture and transcribe the recorded audio"): | |
| if not audio_frames: | |
| st.warning("No audio captured") | |
| return None | |
| # Build an AudioData object for speech_recognition | |
| import numpy as np | |
| pcm = b"".join(audio_frames) | |
| sr_rate = 48000 # typical WebRTC rate | |
| sample_width = 2 # int16 | |
| audio_data = sr.AudioData(pcm, sr_rate, sample_width) | |
| try: | |
| language_code = self.language_codes.get(language, 'en') | |
| text = self.recognizer.recognize_google(audio_data, language=language_code) | |
| st.success(f"✅ Heard: {text}") | |
| return text | |
| except sr.UnknownValueError: | |
| st.error("❌ Could not understand the speech.") | |
| except sr.RequestError as e: | |
| st.error(f"❌ Speech recognition service error: {e}") | |
| return None | |
| def text_to_speech(self, text: str, language: str = 'English', use_gtts: bool = True) -> bool: | |
| """ | |
| Convert text to speech using either gTTS (online) or pyttsx3 (offline) | |
| """ | |
| try: | |
| if use_gtts and text.strip(): | |
| # Use Google Text-to-Speech (online, better quality, supports more languages) | |
| language_code = self.language_codes.get(language, 'en') | |
| tts = gTTS(text=text, lang=language_code, slow=False) | |
| # Create temporary file with better handling | |
| import uuid | |
| temp_filename = f"tts_{uuid.uuid4().hex}.mp3" | |
| temp_path = os.path.join(tempfile.gettempdir(), temp_filename) | |
| try: | |
| tts.save(temp_path) | |
| # Play audio in Streamlit | |
| with open(temp_path, 'rb') as audio_file: | |
| audio_bytes = audio_file.read() | |
| st.audio(audio_bytes, format='audio/mp3') | |
| st.info(f"🔊 Audio generated! If you don't hear anything, check your speakers/headphones and browser audio settings.") | |
| # Provide download option as fallback | |
| st.download_button( | |
| label="⬇️ Download Audio File", | |
| data=audio_bytes, | |
| file_name=f"recommendation_audio_{uuid.uuid4().hex[:8]}.mp3", | |
| mime="audio/mp3", | |
| help="Download the audio file to play it in your preferred audio player" | |
| ) | |
| # Clean up with retry mechanism | |
| try: | |
| os.unlink(temp_path) | |
| except PermissionError: | |
| # File might still be in use, try again after a short delay | |
| import time | |
| time.sleep(0.5) | |
| try: | |
| os.unlink(temp_path) | |
| except: | |
| # If still can't delete, just leave it - temp files are cleaned up by system | |
| pass | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error with gTTS file handling: {e}") | |
| # Fallback to pyttsx3 | |
| if self.tts_engine: | |
| self.tts_engine.say(text) | |
| self.tts_engine.runAndWait() | |
| return True | |
| return False | |
| elif self.tts_engine and text.strip(): | |
| # Use pyttsx3 (offline, limited language support) | |
| try: | |
| self.tts_engine.say(text) | |
| self.tts_engine.runAndWait() | |
| st.info("🔊 Playing audio using offline TTS engine...") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error with pyttsx3: {e}") | |
| st.error(f"❌ Offline TTS error: {e}") | |
| return False | |
| else: | |
| if not text.strip(): | |
| st.warning("⚠️ No text provided to speak") | |
| else: | |
| st.warning("⚠️ No TTS engine available. Please check your internet connection for online TTS or install offline TTS dependencies.") | |
| return False | |
| except Exception as e: | |
| logger.error(f"Error in text-to-speech: {e}") | |
| st.error(f"❌ Text-to-speech error: {e}") | |
| return False | |
| def create_voice_input_widget(self, label: str, language: str = 'English', | |
| key: str = None, help_text: str = None) -> Optional[str]: | |
| """ | |
| Create a voice input widget for Streamlit | |
| """ | |
| col1, col2 = st.columns([3, 1]) | |
| # Get the current value from session state | |
| text_key = f"{key}_text" if key else "text_input" | |
| voice_result_key = f"{key}_voice_result" if key else "voice_result" | |
| # Initialize session state for voice result | |
| if voice_result_key not in st.session_state: | |
| st.session_state[voice_result_key] = "" | |
| with col1: | |
| text_input = st.text_input( | |
| label, | |
| key=text_key, | |
| help=help_text | |
| ) | |
| with col2: | |
| if st.button("🎤 Voice", key=f"{key}_voice_btn" if key else None, help="Click to speak"): | |
| if not self.pyaudio_available: | |
| st.error("❌ Microphone not available") | |
| return text_input | |
| with st.spinner("Preparing microphone..."): | |
| time.sleep(1) # Give time for microphone to initialize | |
| try: | |
| voice_text = self.speech_to_text(language) | |
| if voice_text: | |
| # Store voice input in session state | |
| st.session_state[voice_result_key] = voice_text | |
| st.success(f"✅ Voice input: {voice_text}") | |
| # Show the voice input in a separate display | |
| st.info(f"🎤 Voice input captured: **{voice_text}**") | |
| st.info("💡 Copy this text and paste it into the input field above") | |
| # Force a rerun to update the text input | |
| st.rerun() | |
| else: | |
| st.warning("⚠️ No voice input detected") | |
| except Exception as e: | |
| st.error(f"❌ Voice input error: {e}") | |
| logger.error(f"Voice input error: {e}") | |
| # Show voice result if available | |
| if st.session_state.get(voice_result_key): | |
| st.info(f"🎤 Last voice input: **{st.session_state[voice_result_key]}**") | |
| return text_input | |
| def create_voice_output_button(self, text: str, language: str = 'English', | |
| button_text: str = "🔊 Listen", key: str = None): | |
| """ | |
| Create a voice output button for Streamlit | |
| """ | |
| if st.button(button_text, key=f"{key}_speak" if key else None, help="Click to hear the text"): | |
| if not text or not text.strip(): | |
| st.warning("⚠️ No text to speak") | |
| return | |
| with st.spinner("Generating speech..."): | |
| success = self.text_to_speech(text, language) | |
| if success: | |
| st.success("✅ Audio generated successfully!") | |
| else: | |
| st.error("❌ Failed to generate audio. Please try again.") | |
| def create_voice_interface_for_sustainability(self, language: str = 'English') -> Dict: | |
| """ | |
| Create voice interface specifically for sustainability tracker | |
| """ | |
| # Voice input for water usage | |
| water_usage_text = self.create_voice_input_widget( | |
| "💧 Water Usage (ML/ha) - Voice Input", | |
| language=language, | |
| key="water_voice", | |
| help_text="Speak the water usage amount" | |
| ) | |
| # Voice input for fertilizer usage | |
| fertilizer_usage_text = self.create_voice_input_widget( | |
| "🧪 Fertilizer Usage (tons/ha) - Voice Input", | |
| language=language, | |
| key="fertilizer_voice", | |
| help_text="Speak the fertilizer usage amount" | |
| ) | |
| # Voice input for crop rotation | |
| rotation_text = self.create_voice_input_widget( | |
| "🔄 Crop Rotation (Yes/No) - Voice Input", | |
| language=language, | |
| key="rotation_voice", | |
| help_text="Say 'Yes' or 'No' for crop rotation" | |
| ) | |
| # Get voice results from session state | |
| water_voice = st.session_state.get("water_voice_voice_result", "") | |
| fertilizer_voice = st.session_state.get("fertilizer_voice_voice_result", "") | |
| rotation_voice = st.session_state.get("rotation_voice_voice_result", "") | |
| # Use voice input if available, otherwise use text input | |
| water_usage_text = water_voice if water_voice else water_usage_text | |
| fertilizer_usage_text = fertilizer_voice if fertilizer_voice else fertilizer_usage_text | |
| rotation_text = rotation_voice if rotation_voice else rotation_text | |
| # Process voice inputs | |
| data = {} | |
| if water_usage_text: | |
| try: | |
| # Extract numbers from voice input | |
| import re | |
| numbers = re.findall(r'\d+\.?\d*', water_usage_text) | |
| if numbers: | |
| data['water_score'] = float(numbers[0]) | |
| except: | |
| st.warning("Could not parse water usage from voice input") | |
| if fertilizer_usage_text: | |
| try: | |
| import re | |
| numbers = re.findall(r'\d+\.?\d*', fertilizer_usage_text) | |
| if numbers: | |
| data['fertilizer_use'] = float(numbers[0]) | |
| except: | |
| st.warning("Could not parse fertilizer usage from voice input") | |
| if rotation_text: | |
| rotation_lower = rotation_text.lower() | |
| if any(word in rotation_lower for word in ['yes', 'haan', 'ಹೌದು', 'అవును', 'ஆம்', 'അതെ', 'हाँ', 'oui', 'sí']): | |
| data['rotation'] = True | |
| elif any(word in rotation_lower for word in ['no', 'nahi', 'ಇಲ್ಲ', 'లేదు', 'இல்லை', 'ഇല്ല', 'नहीं', 'non', 'no']): | |
| data['rotation'] = False | |
| return data | |
| def create_voice_interface_for_farm_details(self, language: str = 'English') -> Dict: | |
| """ | |
| Create voice interface for farm details input | |
| """ | |
| # Voice input for farm size | |
| farm_size_text = self.create_voice_input_widget( | |
| "🌾 Farm Size (hectares) - Voice Input", | |
| language=language, | |
| key="farm_size_voice", | |
| help_text="Speak the farm size in hectares" | |
| ) | |
| # Voice input for crop preference | |
| crop_preference_text = self.create_voice_input_widget( | |
| "🌱 Crop Preference - Voice Input", | |
| language=language, | |
| key="crop_preference_voice", | |
| help_text="Speak your crop preference (Grains, Vegetables, Fruits)" | |
| ) | |
| # Voice input for soil type | |
| soil_type_text = self.create_voice_input_widget( | |
| "🗺️ Soil Type - Voice Input", | |
| language=language, | |
| key="soil_type_voice", | |
| help_text="Speak the soil type (Loamy, Sandy, Clay)" | |
| ) | |
| # Get voice results from session state | |
| farm_size_voice = st.session_state.get("farm_size_voice_voice_result", "") | |
| crop_preference_voice = st.session_state.get("crop_preference_voice_voice_result", "") | |
| soil_type_voice = st.session_state.get("soil_type_voice_voice_result", "") | |
| # Use voice input if available, otherwise use text input | |
| farm_size_text = farm_size_voice if farm_size_voice else farm_size_text | |
| crop_preference_text = crop_preference_voice if crop_preference_voice else crop_preference_text | |
| soil_type_text = soil_type_voice if soil_type_voice else soil_type_text | |
| # Process voice inputs | |
| data = {} | |
| # Debug: Show what was captured | |
| if farm_size_text or crop_preference_text or soil_type_text: | |
| st.info(f"🎤 Voice inputs captured: Farm Size='{farm_size_text}', Crop='{crop_preference_text}', Soil='{soil_type_text}'") | |
| if farm_size_text: | |
| try: | |
| import re | |
| numbers = re.findall(r'\d+', farm_size_text) | |
| if numbers: | |
| data['land_size'] = int(numbers[0]) | |
| st.success(f"✅ Parsed land size: {data['land_size']} hectares") | |
| else: | |
| st.warning("Could not find numbers in farm size voice input") | |
| except Exception as e: | |
| st.warning(f"Could not parse farm size from voice input: {e}") | |
| if crop_preference_text: | |
| crop_lower = crop_preference_text.lower() | |
| if any(word in crop_lower for word in ['grain', 'grains', 'अनाज', 'ಧಾನ್ಯ', 'ధాన్యం', 'தானியம்', 'ധാന്യം']): | |
| data['crop_preference'] = 'Grains' | |
| st.success(f"✅ Parsed crop preference: {data['crop_preference']}") | |
| elif any(word in crop_lower for word in ['vegetable', 'vegetables', 'सब्जी', 'ತರಕಾರಿ', 'కూరగాయలు', 'காய்கறி', 'പച്ചക്കറി']): | |
| data['crop_preference'] = 'Vegetables' | |
| st.success(f"✅ Parsed crop preference: {data['crop_preference']}") | |
| elif any(word in crop_lower for word in ['fruit', 'fruits', 'फल', 'ಹಣ್ಣು', 'పండు', 'பழம்', 'പഴം']): | |
| data['crop_preference'] = 'Fruits' | |
| st.success(f"✅ Parsed crop preference: {data['crop_preference']}") | |
| else: | |
| st.warning(f"Could not recognize crop preference from: '{crop_preference_text}'") | |
| if soil_type_text: | |
| soil_lower = soil_type_text.lower() | |
| if any(word in soil_lower for word in ['loamy', 'loam', 'दोमट', 'ಲೋಮಿ', 'లోమి', 'லோமி', 'ലോമി']): | |
| data['soil_type'] = 'Loamy' | |
| st.success(f"✅ Parsed soil type: {data['soil_type']}") | |
| elif any(word in soil_lower for word in ['sandy', 'sand', 'बालू', 'ಮರಳು', 'ఇసుక', 'மணல்', 'മണൽ']): | |
| data['soil_type'] = 'Sandy' | |
| st.success(f"✅ Parsed soil type: {data['soil_type']}") | |
| elif any(word in soil_lower for word in ['clay', 'चिकनी', 'ಕ್ಲೇ', 'క్లే', 'களிமண்', 'കളിമണ്ണ്']): | |
| data['soil_type'] = 'Clay' | |
| st.success(f"✅ Parsed soil type: {data['soil_type']}") | |
| else: | |
| st.warning(f"Could not recognize soil type from: '{soil_type_text}'") | |
| return data | |
| def create_voice_help_system(self, language: str = 'English'): | |
| """ | |
| Create a voice help system for farmers | |
| """ | |
| st.markdown("### 🎤 Voice Help System") | |
| help_texts = { | |
| 'English': { | |
| 'welcome': "Welcome to the Sustainable Farming AI Platform. You can use voice commands to interact with the system.", | |
| 'farm_details': "To enter farm details, speak your farm size, crop preference, and soil type.", | |
| 'sustainability': "To log sustainability data, speak your water usage, fertilizer usage, and whether you practice crop rotation.", | |
| 'recommendations': "Click the generate recommendation button to get AI-powered farming advice based on your inputs." | |
| }, | |
| 'Hindi': { | |
| 'welcome': "सस्टेनेबल फार्मिंग AI प्लेटफॉर्म में आपका स्वागत है। आप सिस्टम के साथ बातचीत करने के लिए आवाज कमांड का उपयोग कर सकते हैं।", | |
| 'farm_details': "फार्म विवरण दर्ज करने के लिए, अपने फार्म का आकार, फसल पसंद और मिट्टी का प्रकार बोलें।", | |
| 'sustainability': "सस्टेनेबलिटी डेटा लॉग करने के लिए, अपने पानी के उपयोग, उर्वरक के उपयोग और क्या आप फसल चक्रण का अभ्यास करते हैं, बोलें।", | |
| 'recommendations': "अपने इनपुट के आधार पर AI-संचालित खेती सलाह प्राप्त करने के लिए सिफारिश बटन पर क्लिक करें।" | |
| }, | |
| 'Telugu': { | |
| 'welcome': "సస్టైనబుల్ ఫార్మింగ్ AI ప్లాట్ఫారమ్కు స్వాగతం. మీరు సిస్టమ్తో ఇంటరాక్ట్ చేయడానికి వాయిస్ కమాండ్లను ఉపయోగించవచ్చు.", | |
| 'farm_details': "ఫార్మ్ వివరాలను నమోదు చేయడానికి, మీ ఫార్మ్ పరిమాణం, పంట ప్రాధాన్యత మరియు నేల రకాన్ని మాట్లాడండి.", | |
| 'sustainability': "సస్టైనబిలిటీ డేటాను లాగ్ చేయడానికి, మీ నీటి వినియోగం, ఎరువు వినియోగం మరియు మీరు పంట మార్పిడిని అభ్యసిస్తున్నారా అని మాట్లాడండి.", | |
| 'recommendations': "మీ ఇన్పుట్ల ఆధారంగా AI-ఆధారిత వ్యవసాయ సలహా పొందడానికి సిఫారసు బటన్పై క్లిక్ చేయండి." | |
| } | |
| } | |
| help_data = help_texts.get(language, help_texts['English']) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| if st.button("🔊 Listen to Welcome", key="help_welcome"): | |
| self.text_to_speech(help_data['welcome'], language) | |
| with col2: | |
| if st.button("🔊 Listen to Farm Details Help", key="help_farm"): | |
| self.text_to_speech(help_data['farm_details'], language) | |
| col3, col4 = st.columns(2) | |
| with col3: | |
| if st.button("🔊 Listen to Sustainability Help", key="help_sustainability"): | |
| self.text_to_speech(help_data['sustainability'], language) | |
| with col4: | |
| if st.button("🔊 Listen to Recommendations Help", key="help_recommendations"): | |
| self.text_to_speech(help_data['recommendations'], language) | |
| def get_supported_languages(self) -> List[str]: | |
| """ | |
| Get list of supported languages | |
| """ | |
| return list(self.language_codes.keys()) | |
| def is_voice_available(self) -> bool: | |
| """ | |
| Check if microphone input is available (PyAudio + microphone). | |
| """ | |
| return (self.pyaudio_available and self.microphone is not None) or (WEBRTC_AVAILABLE and self.browser_mic_available) | |