Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import librosa | |
| import numpy as np | |
| import torch | |
| from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
| from simple_salesforce import Salesforce | |
| import os | |
| from datetime import datetime | |
| import logging | |
| import webrtcvad | |
| # Set up logging for usage metrics and debugging | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
| logger = logging.getLogger(__name__) | |
| usage_metrics = {"total_assessments": 0} # Simple in-memory metric (to be expanded with Salesforce) | |
| # Salesforce credentials (assumed secure via environment variables) | |
| SF_USERNAME = os.getenv("SF_USERNAME") | |
| SF_PASSWORD = os.getenv("SF_PASSWORD") | |
| SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN") | |
| SF_INSTANCE_URL = os.getenv("SF_INSTANCE_URL", "https://login.salesforce.com") | |
| # Initialize Salesforce | |
| sf = None | |
| try: | |
| if all([SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN]): | |
| sf = Salesforce( | |
| username=SF_USERNAME, | |
| password=SF_PASSWORD, | |
| security_token=SF_SECURITY_TOKEN, | |
| instance_url=SF_INSTANCE_URL | |
| ) | |
| logger.info("Connected to Salesforce for user management") | |
| else: | |
| logger.warning("Salesforce credentials missing; user management disabled") | |
| except Exception as e: | |
| logger.error(f"Salesforce connection failed: {str(e)}") | |
| # Load Whisper model for speech-to-text | |
| whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-tiny") | |
| whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny") | |
| whisper_model.config.forced_decoder_ids = whisper_processor.get_decoder_prompt_ids(language="english", task="transcribe") | |
| # Initialize VAD | |
| vad = webrtcvad.Vad(mode=2) # Moderate mode for balanced voice detection | |
| def extract_health_features(audio, sr): | |
| """Extract health-related audio features.""" | |
| try: | |
| # Normalize audio | |
| audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) != 0 else audio | |
| # Voice Activity Detection | |
| frame_duration = 30 # ms | |
| frame_samples = int(sr * frame_duration / 1000) | |
| frames = [audio[i:i + frame_samples] for i in range(0, len(audio), frame_samples)] | |
| voiced_frames = [ | |
| frame for frame in frames | |
| if len(frame) == frame_samples and vad.is_speech((frame * 32768).astype(np.int16).tobytes(), sr) | |
| ] | |
| if not voiced_frames: | |
| raise ValueError("No voiced segments detected") | |
| voiced_audio = np.concatenate(voiced_frames) | |
| # Pitch (F0) with validated range (75-300 Hz for adults) | |
| pitches, magnitudes = librosa.piptrack(y=voiced_audio, sr=sr, fmin=75, fmax=300) | |
| valid_pitches = [p for p in pitches[magnitudes > 0] if 75 <= p <= 300] | |
| pitch = np.mean(valid_pitches) if valid_pitches else 0 | |
| jitter = np.std(valid_pitches) / pitch if pitch and valid_pitches else 0 | |
| if jitter > 10: # Cap extreme jitter (likely noise) | |
| jitter = 10 | |
| logger.warning("Jitter capped at 10% due to possible noise or distortion") | |
| # Shimmer (amplitude variation) | |
| amplitudes = librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0] | |
| shimmer = np.std(amplitudes) / np.mean(amplitudes) if np.mean(amplitudes) else 0 | |
| if shimmer > 10: # Cap extreme shimmer (likely noise) | |
| shimmer = 10 | |
| logger.warning("Shimmer capped at 10% due to possible noise or distortion") | |
| # Energy | |
| energy = np.mean(librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0]) | |
| return { | |
| "pitch": pitch, | |
| "jitter": jitter * 100, # Convert to percentage | |
| "shimmer": shimmer * 100, # Convert to percentage | |
| "energy": energy | |
| } | |
| except Exception as e: | |
| logger.error(f"Feature extraction failed: {str(e)}") | |
| raise | |
| def transcribe_audio(audio): | |
| """Transcribe audio to text using Whisper.""" | |
| try: | |
| inputs = whisper_processor(audio, sampling_rate=16000, return_tensors="pt") | |
| with torch.no_grad(): | |
| generated_ids = whisper_model.generate(inputs["input_features"]) | |
| transcription = whisper_processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| logger.info(f"Transcription: {transcription}") | |
| return transcription | |
| except Exception as e: | |
| logger.error(f"Transcription failed: {str(e)}") | |
| return "" | |
| def analyze_symptoms(text): | |
| """Mock symptom-to-disease analysis (placeholder for symptom-2-disease-net).""" | |
| text = text.lower() | |
| feedback = [] | |
| if "cough" in text or "difficulty breathing" in text: | |
| feedback.append("Based on your input, you may have a respiratory issue, such as bronchitis or asthma. Please consult a doctor.") | |
| elif "stressed" in text or "stress" in text or "tired" in text or "fatigue" in text: | |
| feedback.append("Your description suggests possible stress or fatigue, potentially linked to anxiety or exhaustion. Consider seeking medical advice.") | |
| else: | |
| feedback.append("Your input didn’t clearly indicate specific symptoms. Please describe any health concerns (e.g., cough, stress) and consult a healthcare provider for a thorough check.") | |
| return "\n".join(feedback) | |
| def analyze_voice(audio_file=None): | |
| """Analyze voice for health indicators.""" | |
| global usage_metrics | |
| usage_metrics["total_assessments"] += 1 | |
| logger.info(f"Total assessments: {usage_metrics['total_assessments']}") | |
| try: | |
| # Load audio from file if provided | |
| if audio_file and os.path.exists(audio_file): | |
| audio, sr = librosa.load(audio_file, sr=16000) | |
| else: | |
| raise ValueError("No valid audio file provided for analysis") | |
| if len(audio) < sr: | |
| raise ValueError("Audio too short (minimum 1 second)") | |
| # Extract voice features | |
| features = extract_health_features(audio, sr) | |
| # Transcribe audio for symptom analysis | |
| transcription = transcribe_audio(audio) | |
| symptom_feedback = analyze_symptoms(transcription) if transcription else "No transcription available. Please record again with clear speech." | |
| # Analyze voice features for health indicators | |
| feedback = [] | |
| respiratory_score = features["jitter"] | |
| mental_health_score = features["shimmer"] | |
| # Rule-based analysis with personalized feedback | |
| if respiratory_score > 1.0: | |
| feedback.append(f"Your voice indicates elevated jitter ({respiratory_score:.2f}%), which may suggest respiratory issues. Consult a doctor.") | |
| if mental_health_score > 5.0: | |
| feedback.append(f"Your voice shows elevated shimmer ({mental_health_score:.2f}%), possibly indicating stress or emotional strain. Consider a health check.") | |
| if features["energy"] < 0.01: | |
| feedback.append(f"Your vocal energy is low ({features['energy']:.4f}), which might point to fatigue. Seek medical advice if this persists.") | |
| if not feedback and not symptom_feedback.startswith("No transcription"): | |
| feedback.append("Your voice analysis shows no immediate health concerns based on current data.") | |
| # Combine voice and symptom feedback | |
| feedback.append("\n**Symptom Feedback (Based on Your Input)**:") | |
| feedback.append(symptom_feedback) | |
| feedback.append("\n**Voice Analysis Details**:") | |
| feedback.append(f"Pitch: {features['pitch']:.2f} Hz (average fundamental frequency)") | |
| feedback.append(f"Jitter: {respiratory_score:.2f}% (pitch variation, higher values may indicate respiratory issues)") | |
| feedback.append(f"Shimmer: {mental_health_score:.2f}% (amplitude variation, higher values may indicate stress)") | |
| feedback.append(f"Energy: {features['energy']:.4f} (vocal intensity, lower values may indicate fatigue)") | |
| feedback.append(f"Transcription: {transcription if transcription else 'None'}") | |
| feedback.append("\n**Disclaimer**: This is a preliminary analysis, not a medical diagnosis. Always consult a healthcare provider for professional evaluation.") | |
| feedback_str = "\n".join(feedback) | |
| # Store in Salesforce (with consent implied via credentials) | |
| if sf: | |
| store_in_salesforce(audio_file, feedback_str, respiratory_score, mental_health_score, features, transcription) | |
| # Clean up audio file for HIPAA/GDPR compliance | |
| if audio_file and os.path.exists(audio_file): | |
| try: | |
| os.remove(audio_file) | |
| logger.info(f"Deleted audio file: {audio_file} for compliance") | |
| except Exception as e: | |
| logger.error(f"Failed to delete audio file: {str(e)}") | |
| return feedback_str | |
| except Exception as e: | |
| logger.error(f"Audio processing failed: {str(e)}") | |
| return f"Error: {str(e)}" | |
| def store_in_salesforce(audio_file, feedback, respiratory_score, mental_health_score, features, transcription): | |
| """Store results in Salesforce with encrypted data.""" | |
| try: | |
| sf.HealthAssessment__c.create({ | |
| "AssessmentDate__c": datetime.utcnow().isoformat(), | |
| "Feedback__c": feedback, | |
| "RespiratoryScore__c": float(respiratory_score), | |
| "MentalHealthScore__c": float(mental_health_score), | |
| "AudioFileName__c": os.path.basename(audio_file) if audio_file else "user_recorded_audio", | |
| "Pitch__c": float(features["pitch"]), | |
| "Jitter__c": float(features["jitter"]), | |
| "Shimmer__c": float(features["shimmer"]), | |
| "Energy__c": float(features["energy"]), | |
| "Transcription__c": transcription | |
| }) | |
| logger.info("Stored assessment in Salesforce") | |
| except Exception as e: | |
| logger.error(f"Salesforce storage failed: {str(e)}") | |
| # Gradio interface with accessibility focus | |
| iface = gr.Interface( | |
| fn=analyze_voice, | |
| inputs=gr.Audio(type="filepath", label="Record or Upload Your Voice (WAV, MP3, FLAC, 1+ sec)", format="wav"), | |
| outputs=gr.Textbox(label="Health Assessment Results", elem_id="health-results"), | |
| title="Smart Voicebot for Public Health", | |
| description="Record or upload your voice (minimum 1 second) to receive a preliminary health check. Speak clearly in English about your symptoms (e.g., 'I have a cough' or 'I feel stressed'). This tool is accessible via web and mobile.", | |
| theme="default", # Basic theme; enhance for screen readers later | |
| allow_flagging="never" # Prevent data retention without consent | |
| ) | |
| if __name__ == "__main__": | |
| logger.info("Starting Voice Health Analyzer at 12:34 PM IST, June 23, 2025") | |
| iface.launch(server_name="0.0.0.0", server_port=7860) | |