HealthVoiceAnalyzer1

Sleeping

App Files Files Community

geethareddy commited on Jun 21, 2025

Commit

cdb9181

verified ·

1 Parent(s): c997088

Update app.py

Browse files

Files changed (1) hide show

app.py +156 -70

app.py CHANGED Viewed

@@ -6,114 +6,200 @@ from transformers import Wav2Vec2Processor, Wav2Vec2Model
 from simple_salesforce import Salesforce
 import os
 from datetime import datetime
-# Salesforce credentials (store securely in environment variables)
-SF_USERNAME = os.getenv("SF_USERNAME", "your_salesforce_username")
-SF_PASSWORD = os.getenv("SF_PASSWORD", "your_salesforce_password")
-SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN", "your_salesforce_security_token")
-SF_INSTANCE_URL = os.getenv("SF_INSTANCE_URL", "https://your-salesforce-instance.salesforce.com")
-# Initialize Salesforce connection
 try:
-    sf = Salesforce(
-        username=SF_USERNAME,
-        password=SF_PASSWORD,
-        security_token=SF_SECURITY_TOKEN,
-        instance_url=SF_INSTANCE_URL
-    )
 except Exception as e:
-    print(f"Failed to connect to Salesforce: {str(e)}")
-    sf = None
-# Load Wav2Vec2 model for speech feature extraction
 processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
 model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")
 def analyze_voice(audio_file):
     """Analyze voice for health indicators."""
     try:
-        # Log audio file info
-        print(f"Processing audio file: {audio_file}")
-        # Load audio file
         audio, sr = librosa.load(audio_file, sr=16000)
-        print(f"Audio shape: {audio.shape}, Sampling rate: {sr}, Duration: {len(audio)/sr:.2f}s")
-        # Process audio for Wav2Vec2
-        inputs = processor(audio, sampling_rate=16000, return_tensors="pt", padding=True)
-        print(f"Input tensor shape: {inputs['input_values'].shape}")
-        with torch.no_grad():
-            outputs = model(**inputs)
         # Extract features
-        features = outputs.last_hidden_state.mean(dim=1).numpy()
-        print(f"Features shape: {features.shape}, Sample values: {features[0][:5]}")
-        # Mock health analysis
-        respiratory_score = np.mean(features)
-        mental_health_score = np.std(features)
-        # Log scores
-        print(f"Respiratory Score: {respiratory_score:.4f}, Mental Health Score: {mental_health_score:.4f}")
-        # Threshold-based feedback
-        feedback = ""
-        if respiratory_score > 0.1:
-            feedback += f"Possible respiratory issue detected (score: {respiratory_score:.4f}); consult a doctor. "
-        if mental_health_score > 0.1:
-            feedback += f"Possible stress indicators detected (score: {mental_health_score:.4f}); consider professional advice. "
         if not feedback:
-            feedback = "No significant health indicators detected."
-        feedback += f"\n\n**Debug Info**: Respiratory Score = {respiratory_score:.4f}, Mental Health Score = {mental_health_score:.4f}"
-        feedback += "\n**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice."
         # Store in Salesforce
         if sf:
-            store_in_salesforce(audio_file, feedback, respiratory_score, mental_health_score)
-        # Clean up temporary audio file
         try:
             os.remove(audio_file)
-            print(f"Deleted temporary audio file: {audio_file}")
         except Exception as e:
-            print(f"Failed to delete audio file: {str(e)}")
-        return feedback
     except Exception as e:
-        return f"Error processing audio: {str(e)}"
-def store_in_salesforce(audio_file, feedback, respiratory_score, mental_health_score):
-    """Store analysis results in Salesforce."""
     try:
         sf.HealthAssessment__c.create({
             "AssessmentDate__c": datetime.utcnow().isoformat(),
             "Feedback__c": feedback,
             "RespiratoryScore__c": float(respiratory_score),
             "MentalHealthScore__c": float(mental_health_score),
-            "AudioFileName__c": os.path.basename(audio_file)
         })
     except Exception as e:
-        print(f"Failed to store in Salesforce: {str(e)}")
 def test_with_sample_audio():
-    """Test the app with a sample audio file."""
     sample_audio_path = "audio_samples/sample.wav"
-    if os.path.exists(sample_audio_path):
-        return analyze_voice(sample_audio_path)
-    return "Sample audio file not found."
 # Gradio interface
 iface = gr.Interface(
     fn=analyze_voice,
-    inputs=gr.Audio(type="filepath", label="Record or Upload Voice"),
-    outputs=gr.Textbox(label="Health Assessment Feedback"),
-    title="Health Voice Analyzer",
-    description="Record or upload a voice sample for preliminary health assessment. Supports English, Spanish, Hindi, Mandarin."
 )
 if __name__ == "__main__":
     print(test_with_sample_audio())
     iface.launch(server_name="0.0.0.0", server_port=7860)

 from simple_salesforce import Salesforce
 import os
 from datetime import datetime
+import logging
+import soundfile as sf
+import webrtcvad
+# Set up logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger(__name__)
+# Salesforce credentials
+SF_USERNAME = os.getenv("SF_USERNAME")
+SF_PASSWORD = os.getenv("SF_PASSWORD")
+SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
+SF_INSTANCE_URL = os.getenv("SF_INSTANCE_URL", "https://login.salesforce.com")
+# Initialize Salesforce
+sf = None
 try:
+    if all([SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN]):
+        sf = Salesforce(
+            username=SF_USERNAME,
+            password=SF_PASSWORD,
+            security_token=SF_SECURITY_TOKEN,
+            instance_url=SF_INSTANCE_URL
+        )
+        logger.info("Connected to Salesforce")
+    else:
+        logger.warning("Salesforce credentials missing; skipping integration")
 except Exception as e:
+    logger.error(f"Salesforce connection failed: {str(e)}")
+# Load Wav2Vec2 model (optional context features)
 processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
 model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")
+# Initialize VAD
+vad = webrtcvad.Vad(mode=2)  # Moderate mode for balanced voice detection
+def extract_health_features(audio, sr):
+    """Extract health-related audio features."""
+    try:
+        # Normalize audio
+        audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) != 0 else audio
+        # Voice Activity Detection
+        frame_duration = 30  # ms
+        frame_samples = int(sr * frame_duration / 1000)
+        frames = [audio[i:i + frame_samples] for i in range(0, len(audio), frame_samples)]
+        voiced_frames = [
+            frame for frame in frames
+            if len(frame) == frame_samples and vad.is_speech((frame * 32768).astype(np.int16).tobytes(), sr)
+        ]
+        if not voiced_frames:
+            raise ValueError("No voiced segments detected")
+        voiced_audio = np.concatenate(voiced_frames)
+        # Pitch (F0)
+        pitches, magnitudes = librosa.piptrack(y=voiced_audio, sr=sr, fmin=50, fmax=500)
+        valid_pitches = [p for p in pitches[magnitudes > 0] if p > 0]
+        pitch = np.mean(valid_pitches) if valid_pitches else 0
+        jitter = np.std(valid_pitches) / pitch if pitch and valid_pitches else 0
+        # Shimmer (amplitude variation)
+        amplitudes = librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0]
+        shimmer = np.std(amplitudes) / np.mean(amplitudes) if np.mean(amplitudes) else 0
+        # Energy
+        energy = np.mean(librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0])
+        # Formants (for respiratory analysis)
+        try:
+            formants = librosa.lpc(voiced_audio, order=2 * int(sr / 1000))
+            formant_freqs = librosa.lpc_to_formants(formants, sr)
+            formant_mean = np.mean(formant_freqs) if formant_freqs.size > 0 else 0
+        except Exception as e:
+            logger.warning(f"Formant extraction failed: {str(e)}")
+            formant_mean = 0
+        return {
+            "pitch": pitch,
+            "jitter": jitter * 100,  # Convert to percentage
+            "shimmer": shimmer * 100,  # Convert to percentage
+            "energy": energy,
+            "formant_mean": formant_mean
+        }
+    except Exception as e:
+        logger.error(f"Feature extraction failed: {str(e)}")
+        raise
 def analyze_voice(audio_file):
     """Analyze voice for health indicators."""
     try:
+        # Validate input
+        if not os.path.exists(audio_file):
+            raise ValueError("Audio file not found")
+        if not audio_file.lower().endswith((".wav", ".mp3", ".flac")):
+            raise ValueError("Supported formats: WAV, MP3, FLAC")
         audio, sr = librosa.load(audio_file, sr=16000)
+        if len(audio) < sr:
+            raise ValueError("Audio too short (minimum 1 second)")
         # Extract features
+        features = extract_health_features(audio, sr)
+        # Analyze for health indicators
+        feedback = []
+        respiratory_score = features["jitter"]
+        mental_health_score = features["shimmer"]
+        # Rule-based analysis (thresholds from voice pathology studies)
+        if respiratory_score > 1.0:
+            feedback.append(f"Elevated jitter ({respiratory_score:.2f}%) suggests potential respiratory issues. Consult a doctor.")
+        if features["formant_mean"] and (features["formant_mean"] < 500 or features["formant_mean"] > 2000):
+            feedback.append(f"Abnormal formant frequency ({features['formant_mean']:.2f} Hz) may indicate vocal tract issues.")
+        if mental_health_score > 5.0:
+            feedback.append(f"Elevated shimmer ({mental_health_score:.2f}%) suggests potential stress or emotional strain.")
+        if features["energy"] < 0.01:
+            feedback.append(f"Low vocal energy ({features['energy']:.4f}) may indicate fatigue.")
         if not feedback:
+            feedback.append("No significant health indicators detected.")
+        # Debug info
+        feedback.append("\n**Analysis Details**:")
+        feedback.append(f"Pitch: {features['pitch']:.2f} Hz")
+        feedback.append(f"Jitter: {respiratory_score:.2f}%")
+        feedback.append(f"Shimmer: {mental_health_score:.2f}%")
+        feedback.append(f"Energy: {features['energy']:.4f}")
+        feedback.append(f"Formant Mean: {features['formant_mean']:.2f} Hz")
+        feedback.append("\n**Disclaimer**: Not a diagnostic tool. Consult a healthcare provider.")
+        feedback_str = "\n".join(feedback)
         # Store in Salesforce
         if sf:
+            store_in_salesforce(audio_file, feedback_str, respiratory_score, mental_health_score, features)
+        # Clean up
         try:
             os.remove(audio_file)
+            logger.info(f"Deleted audio file: {audio_file}")
         except Exception as e:
+            logger.error(f"Failed to delete audio file: {str(e)}")
+        return feedback_str
     except Exception as e:
+        logger.error(f"Audio processing failed: {str(e)}")
+        return f"Error: {str(e)}"
+def store_in_salesforce(audio_file, feedback, respiratory_score, mental_health_score, features):
+    """Store results in Salesforce."""
     try:
         sf.HealthAssessment__c.create({
             "AssessmentDate__c": datetime.utcnow().isoformat(),
             "Feedback__c": feedback,
             "RespiratoryScore__c": float(respiratory_score),
             "MentalHealthScore__c": float(mental_health_score),
+            "AudioFileName__c": os.path.basename(audio_file),
+            "Pitch__c": float(features["pitch"]),
+            "Jitter__c": float(features["jitter"]),
+            "Shimmer__c": float(features["shimmer"]),
+            "Energy__c": float(features["energy"]),
+            "FormantMean__c": float(features["formant_mean"])
         })
+        logger.info("Stored in Salesforce")
     except Exception as e:
+        logger.error(f"Salesforce storage failed: {str(e)}")
 def test_with_sample_audio():
+    """Test with sample or dummy audio."""
     sample_audio_path = "audio_samples/sample.wav"
+    if not os.path.exists(sample_audio_path):
+        logger.warning("Sample audio not found; generating dummy audio")
+        # Generate synthetic audio: 440 Hz sine wave with variations
+        sr = 16000
+        t = np.linspace(0, 2, 2 * sr)
+        freq_mod = 440 + 10 * np.sin(2 * np.pi * 0.5 * t)  # Frequency modulation
+        amplitude_mod = 0.5 + 0.1 * np.sin(2 * np.pi * 0.3 * t)  # Amplitude modulation
+        noise = 0.01 * np.random.normal(0, 1, len(t))  # Low-level noise
+        dummy_audio = amplitude_mod * np.sin(2 * np.pi * freq_mod * t) + noise
+        sample_audio_path = "audio_samples/dummy_test.wav"
+        os.makedirs("audio_samples", exist_ok=True)
+        sf.write(dummy_audio, sr, sample_audio_path)
+    return analyze_voice(sample_audio_path)
 # Gradio interface
 iface = gr.Interface(
     fn=analyze_voice,
+    inputs=gr.Audio(type="filepath", label="Record/Upload Voice (WAV, MP3, FLAC, 1+ sec)"),
+    outputs=gr.Textbox(label="Health Assessment Results"),
+    title="Voice Health Analyzer",
+    description="Analyze voice for preliminary health insights. Supports WAV, MP3, FLAC in multiple languages. Minimum 1 second."
 )
 if __name__ == "__main__":
+    logger.info("Starting Voice Health Analyzer")
     print(test_with_sample_audio())
     iface.launch(server_name="0.0.0.0", server_port=7860)