geethareddy commited on
Commit
cdb9181
·
verified ·
1 Parent(s): c997088

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +156 -70
app.py CHANGED
@@ -6,114 +6,200 @@ from transformers import Wav2Vec2Processor, Wav2Vec2Model
6
  from simple_salesforce import Salesforce
7
  import os
8
  from datetime import datetime
 
 
 
9
 
10
- # Salesforce credentials (store securely in environment variables)
11
- SF_USERNAME = os.getenv("SF_USERNAME", "your_salesforce_username")
12
- SF_PASSWORD = os.getenv("SF_PASSWORD", "your_salesforce_password")
13
- SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN", "your_salesforce_security_token")
14
- SF_INSTANCE_URL = os.getenv("SF_INSTANCE_URL", "https://your-salesforce-instance.salesforce.com")
15
 
16
- # Initialize Salesforce connection
 
 
 
 
 
 
 
17
  try:
18
- sf = Salesforce(
19
- username=SF_USERNAME,
20
- password=SF_PASSWORD,
21
- security_token=SF_SECURITY_TOKEN,
22
- instance_url=SF_INSTANCE_URL
23
- )
 
 
 
 
24
  except Exception as e:
25
- print(f"Failed to connect to Salesforce: {str(e)}")
26
- sf = None
27
 
28
- # Load Wav2Vec2 model for speech feature extraction
29
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
30
  model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def analyze_voice(audio_file):
33
  """Analyze voice for health indicators."""
34
  try:
35
- # Log audio file info
36
- print(f"Processing audio file: {audio_file}")
37
-
38
- # Load audio file
 
39
  audio, sr = librosa.load(audio_file, sr=16000)
40
- print(f"Audio shape: {audio.shape}, Sampling rate: {sr}, Duration: {len(audio)/sr:.2f}s")
41
-
42
- # Process audio for Wav2Vec2
43
- inputs = processor(audio, sampling_rate=16000, return_tensors="pt", padding=True)
44
- print(f"Input tensor shape: {inputs['input_values'].shape}")
45
-
46
- with torch.no_grad():
47
- outputs = model(**inputs)
48
-
49
  # Extract features
50
- features = outputs.last_hidden_state.mean(dim=1).numpy()
51
- print(f"Features shape: {features.shape}, Sample values: {features[0][:5]}")
52
-
53
- # Mock health analysis
54
- respiratory_score = np.mean(features)
55
- mental_health_score = np.std(features)
56
-
57
- # Log scores
58
- print(f"Respiratory Score: {respiratory_score:.4f}, Mental Health Score: {mental_health_score:.4f}")
59
-
60
- # Threshold-based feedback
61
- feedback = ""
62
- if respiratory_score > 0.1:
63
- feedback += f"Possible respiratory issue detected (score: {respiratory_score:.4f}); consult a doctor. "
64
- if mental_health_score > 0.1:
65
- feedback += f"Possible stress indicators detected (score: {mental_health_score:.4f}); consider professional advice. "
66
-
67
  if not feedback:
68
- feedback = "No significant health indicators detected."
69
-
70
- feedback += f"\n\n**Debug Info**: Respiratory Score = {respiratory_score:.4f}, Mental Health Score = {mental_health_score:.4f}"
71
- feedback += "\n**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice."
72
-
 
 
 
 
 
 
 
 
73
  # Store in Salesforce
74
  if sf:
75
- store_in_salesforce(audio_file, feedback, respiratory_score, mental_health_score)
76
-
77
- # Clean up temporary audio file
78
  try:
79
  os.remove(audio_file)
80
- print(f"Deleted temporary audio file: {audio_file}")
81
  except Exception as e:
82
- print(f"Failed to delete audio file: {str(e)}")
83
-
84
- return feedback
85
  except Exception as e:
86
- return f"Error processing audio: {str(e)}"
 
87
 
88
- def store_in_salesforce(audio_file, feedback, respiratory_score, mental_health_score):
89
- """Store analysis results in Salesforce."""
90
  try:
91
  sf.HealthAssessment__c.create({
92
  "AssessmentDate__c": datetime.utcnow().isoformat(),
93
  "Feedback__c": feedback,
94
  "RespiratoryScore__c": float(respiratory_score),
95
  "MentalHealthScore__c": float(mental_health_score),
96
- "AudioFileName__c": os.path.basename(audio_file)
 
 
 
 
 
97
  })
 
98
  except Exception as e:
99
- print(f"Failed to store in Salesforce: {str(e)}")
100
 
101
  def test_with_sample_audio():
102
- """Test the app with a sample audio file."""
103
  sample_audio_path = "audio_samples/sample.wav"
104
- if os.path.exists(sample_audio_path):
105
- return analyze_voice(sample_audio_path)
106
- return "Sample audio file not found."
 
 
 
 
 
 
 
 
 
 
107
 
108
  # Gradio interface
109
  iface = gr.Interface(
110
  fn=analyze_voice,
111
- inputs=gr.Audio(type="filepath", label="Record or Upload Voice"),
112
- outputs=gr.Textbox(label="Health Assessment Feedback"),
113
- title="Health Voice Analyzer",
114
- description="Record or upload a voice sample for preliminary health assessment. Supports English, Spanish, Hindi, Mandarin."
115
  )
116
 
117
  if __name__ == "__main__":
 
118
  print(test_with_sample_audio())
119
  iface.launch(server_name="0.0.0.0", server_port=7860)
 
6
  from simple_salesforce import Salesforce
7
  import os
8
  from datetime import datetime
9
+ import logging
10
+ import soundfile as sf
11
+ import webrtcvad
12
 
13
+ # Set up logging
14
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
15
+ logger = logging.getLogger(__name__)
 
 
16
 
17
+ # Salesforce credentials
18
+ SF_USERNAME = os.getenv("SF_USERNAME")
19
+ SF_PASSWORD = os.getenv("SF_PASSWORD")
20
+ SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
21
+ SF_INSTANCE_URL = os.getenv("SF_INSTANCE_URL", "https://login.salesforce.com")
22
+
23
+ # Initialize Salesforce
24
+ sf = None
25
  try:
26
+ if all([SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN]):
27
+ sf = Salesforce(
28
+ username=SF_USERNAME,
29
+ password=SF_PASSWORD,
30
+ security_token=SF_SECURITY_TOKEN,
31
+ instance_url=SF_INSTANCE_URL
32
+ )
33
+ logger.info("Connected to Salesforce")
34
+ else:
35
+ logger.warning("Salesforce credentials missing; skipping integration")
36
  except Exception as e:
37
+ logger.error(f"Salesforce connection failed: {str(e)}")
 
38
 
39
+ # Load Wav2Vec2 model (optional context features)
40
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
41
  model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")
42
 
43
+ # Initialize VAD
44
+ vad = webrtcvad.Vad(mode=2) # Moderate mode for balanced voice detection
45
+
46
+ def extract_health_features(audio, sr):
47
+ """Extract health-related audio features."""
48
+ try:
49
+ # Normalize audio
50
+ audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) != 0 else audio
51
+
52
+ # Voice Activity Detection
53
+ frame_duration = 30 # ms
54
+ frame_samples = int(sr * frame_duration / 1000)
55
+ frames = [audio[i:i + frame_samples] for i in range(0, len(audio), frame_samples)]
56
+ voiced_frames = [
57
+ frame for frame in frames
58
+ if len(frame) == frame_samples and vad.is_speech((frame * 32768).astype(np.int16).tobytes(), sr)
59
+ ]
60
+ if not voiced_frames:
61
+ raise ValueError("No voiced segments detected")
62
+ voiced_audio = np.concatenate(voiced_frames)
63
+
64
+ # Pitch (F0)
65
+ pitches, magnitudes = librosa.piptrack(y=voiced_audio, sr=sr, fmin=50, fmax=500)
66
+ valid_pitches = [p for p in pitches[magnitudes > 0] if p > 0]
67
+ pitch = np.mean(valid_pitches) if valid_pitches else 0
68
+ jitter = np.std(valid_pitches) / pitch if pitch and valid_pitches else 0
69
+
70
+ # Shimmer (amplitude variation)
71
+ amplitudes = librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0]
72
+ shimmer = np.std(amplitudes) / np.mean(amplitudes) if np.mean(amplitudes) else 0
73
+
74
+ # Energy
75
+ energy = np.mean(librosa.feature.rms(y=voiced_audio, frame_length=2048, hop_length=512)[0])
76
+
77
+ # Formants (for respiratory analysis)
78
+ try:
79
+ formants = librosa.lpc(voiced_audio, order=2 * int(sr / 1000))
80
+ formant_freqs = librosa.lpc_to_formants(formants, sr)
81
+ formant_mean = np.mean(formant_freqs) if formant_freqs.size > 0 else 0
82
+ except Exception as e:
83
+ logger.warning(f"Formant extraction failed: {str(e)}")
84
+ formant_mean = 0
85
+
86
+ return {
87
+ "pitch": pitch,
88
+ "jitter": jitter * 100, # Convert to percentage
89
+ "shimmer": shimmer * 100, # Convert to percentage
90
+ "energy": energy,
91
+ "formant_mean": formant_mean
92
+ }
93
+ except Exception as e:
94
+ logger.error(f"Feature extraction failed: {str(e)}")
95
+ raise
96
+
97
  def analyze_voice(audio_file):
98
  """Analyze voice for health indicators."""
99
  try:
100
+ # Validate input
101
+ if not os.path.exists(audio_file):
102
+ raise ValueError("Audio file not found")
103
+ if not audio_file.lower().endswith((".wav", ".mp3", ".flac")):
104
+ raise ValueError("Supported formats: WAV, MP3, FLAC")
105
  audio, sr = librosa.load(audio_file, sr=16000)
106
+ if len(audio) < sr:
107
+ raise ValueError("Audio too short (minimum 1 second)")
108
+
 
 
 
 
 
 
109
  # Extract features
110
+ features = extract_health_features(audio, sr)
111
+
112
+ # Analyze for health indicators
113
+ feedback = []
114
+ respiratory_score = features["jitter"]
115
+ mental_health_score = features["shimmer"]
116
+
117
+ # Rule-based analysis (thresholds from voice pathology studies)
118
+ if respiratory_score > 1.0:
119
+ feedback.append(f"Elevated jitter ({respiratory_score:.2f}%) suggests potential respiratory issues. Consult a doctor.")
120
+ if features["formant_mean"] and (features["formant_mean"] < 500 or features["formant_mean"] > 2000):
121
+ feedback.append(f"Abnormal formant frequency ({features['formant_mean']:.2f} Hz) may indicate vocal tract issues.")
122
+ if mental_health_score > 5.0:
123
+ feedback.append(f"Elevated shimmer ({mental_health_score:.2f}%) suggests potential stress or emotional strain.")
124
+ if features["energy"] < 0.01:
125
+ feedback.append(f"Low vocal energy ({features['energy']:.4f}) may indicate fatigue.")
126
+
127
  if not feedback:
128
+ feedback.append("No significant health indicators detected.")
129
+
130
+ # Debug info
131
+ feedback.append("\n**Analysis Details**:")
132
+ feedback.append(f"Pitch: {features['pitch']:.2f} Hz")
133
+ feedback.append(f"Jitter: {respiratory_score:.2f}%")
134
+ feedback.append(f"Shimmer: {mental_health_score:.2f}%")
135
+ feedback.append(f"Energy: {features['energy']:.4f}")
136
+ feedback.append(f"Formant Mean: {features['formant_mean']:.2f} Hz")
137
+ feedback.append("\n**Disclaimer**: Not a diagnostic tool. Consult a healthcare provider.")
138
+
139
+ feedback_str = "\n".join(feedback)
140
+
141
  # Store in Salesforce
142
  if sf:
143
+ store_in_salesforce(audio_file, feedback_str, respiratory_score, mental_health_score, features)
144
+
145
+ # Clean up
146
  try:
147
  os.remove(audio_file)
148
+ logger.info(f"Deleted audio file: {audio_file}")
149
  except Exception as e:
150
+ logger.error(f"Failed to delete audio file: {str(e)}")
151
+
152
+ return feedback_str
153
  except Exception as e:
154
+ logger.error(f"Audio processing failed: {str(e)}")
155
+ return f"Error: {str(e)}"
156
 
157
+ def store_in_salesforce(audio_file, feedback, respiratory_score, mental_health_score, features):
158
+ """Store results in Salesforce."""
159
  try:
160
  sf.HealthAssessment__c.create({
161
  "AssessmentDate__c": datetime.utcnow().isoformat(),
162
  "Feedback__c": feedback,
163
  "RespiratoryScore__c": float(respiratory_score),
164
  "MentalHealthScore__c": float(mental_health_score),
165
+ "AudioFileName__c": os.path.basename(audio_file),
166
+ "Pitch__c": float(features["pitch"]),
167
+ "Jitter__c": float(features["jitter"]),
168
+ "Shimmer__c": float(features["shimmer"]),
169
+ "Energy__c": float(features["energy"]),
170
+ "FormantMean__c": float(features["formant_mean"])
171
  })
172
+ logger.info("Stored in Salesforce")
173
  except Exception as e:
174
+ logger.error(f"Salesforce storage failed: {str(e)}")
175
 
176
  def test_with_sample_audio():
177
+ """Test with sample or dummy audio."""
178
  sample_audio_path = "audio_samples/sample.wav"
179
+ if not os.path.exists(sample_audio_path):
180
+ logger.warning("Sample audio not found; generating dummy audio")
181
+ # Generate synthetic audio: 440 Hz sine wave with variations
182
+ sr = 16000
183
+ t = np.linspace(0, 2, 2 * sr)
184
+ freq_mod = 440 + 10 * np.sin(2 * np.pi * 0.5 * t) # Frequency modulation
185
+ amplitude_mod = 0.5 + 0.1 * np.sin(2 * np.pi * 0.3 * t) # Amplitude modulation
186
+ noise = 0.01 * np.random.normal(0, 1, len(t)) # Low-level noise
187
+ dummy_audio = amplitude_mod * np.sin(2 * np.pi * freq_mod * t) + noise
188
+ sample_audio_path = "audio_samples/dummy_test.wav"
189
+ os.makedirs("audio_samples", exist_ok=True)
190
+ sf.write(dummy_audio, sr, sample_audio_path)
191
+ return analyze_voice(sample_audio_path)
192
 
193
  # Gradio interface
194
  iface = gr.Interface(
195
  fn=analyze_voice,
196
+ inputs=gr.Audio(type="filepath", label="Record/Upload Voice (WAV, MP3, FLAC, 1+ sec)"),
197
+ outputs=gr.Textbox(label="Health Assessment Results"),
198
+ title="Voice Health Analyzer",
199
+ description="Analyze voice for preliminary health insights. Supports WAV, MP3, FLAC in multiple languages. Minimum 1 second."
200
  )
201
 
202
  if __name__ == "__main__":
203
+ logger.info("Starting Voice Health Analyzer")
204
  print(test_with_sample_audio())
205
  iface.launch(server_name="0.0.0.0", server_port=7860)