norhan12 commited on
Commit
0068d30
·
verified ·
1 Parent(s): ec0833e

Update process_interview.py

Browse files
Files changed (1) hide show
  1. process_interview.py +15 -4
process_interview.py CHANGED
@@ -133,9 +133,11 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
133
  segment = audio[start_ms:end_ms]
134
  temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
135
  segment.export(temp_path, format="wav")
 
136
  y, sr = librosa.load(temp_path, sr=16000)
137
  pitches = librosa.piptrack(y=y, sr=sr)[0]
138
  pitches = pitches[pitches > 0]
 
139
  features = {
140
  'duration': (end_ms - start_ms) / 1000,
141
  'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
@@ -147,16 +149,24 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
147
  'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
148
  'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
149
  }
 
150
  os.remove(temp_path)
151
  return features
152
  except Exception as e:
153
  logger.error(f"Feature extraction failed: {str(e)}")
154
  return {
155
- 'duration': 0.0, 'mean_pitch': 0.0, 'min_pitch': 0.0, 'max_pitch': 0.0,
156
- 'pitch_sd': 0.0, 'intensityMean': 0.0, 'intensityMin': 0.0,
157
- 'intensityMax': 0.0, 'intensitySD': 0.0
 
 
 
 
 
 
158
  }
159
 
 
160
  def transcribe(audio_path: str) -> Dict:
161
  try:
162
  with open(audio_path, 'rb') as f:
@@ -172,7 +182,8 @@ def transcribe(audio_path: str) -> Dict:
172
  json={
173
  "audio_url": audio_url,
174
  "speaker_labels": True,
175
- "filter_profanity": True
 
176
  }
177
  )
178
  transcript_id = transcript_response.json()['id']
 
133
  segment = audio[start_ms:end_ms]
134
  temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
135
  segment.export(temp_path, format="wav")
136
+
137
  y, sr = librosa.load(temp_path, sr=16000)
138
  pitches = librosa.piptrack(y=y, sr=sr)[0]
139
  pitches = pitches[pitches > 0]
140
+
141
  features = {
142
  'duration': (end_ms - start_ms) / 1000,
143
  'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
 
149
  'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
150
  'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
151
  }
152
+
153
  os.remove(temp_path)
154
  return features
155
  except Exception as e:
156
  logger.error(f"Feature extraction failed: {str(e)}")
157
  return {
158
+ 'duration': (end_ms - start_ms) / 1000,
159
+ 'mean_pitch': 0.0,
160
+ 'min_pitch': 0.0,
161
+ 'max_pitch': 0.0,
162
+ 'pitch_sd': 0.0,
163
+ 'intensityMean': 0.0,
164
+ 'intensityMin': 0.0,
165
+ 'intensityMax': 0.0,
166
+ 'intensitySD': 0.0,
167
  }
168
 
169
+
170
  def transcribe(audio_path: str) -> Dict:
171
  try:
172
  with open(audio_path, 'rb') as f:
 
182
  json={
183
  "audio_url": audio_url,
184
  "speaker_labels": True,
185
+ "filter_profanity": True,
186
+ "speakers_expected": 2
187
  }
188
  )
189
  transcript_id = transcript_response.json()['id']