Spaces:

EvalBot
/

Audio

Sleeping

App Files Files Community

norhan12 commited on Jun 11, 2025

Commit

0068d30

verified ·

1 Parent(s): ec0833e

Update process_interview.py

Browse files

Files changed (1) hide show

process_interview.py +15 -4

process_interview.py CHANGED Viewed

@@ -133,9 +133,11 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
         segment = audio[start_ms:end_ms]
         temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
         segment.export(temp_path, format="wav")
         y, sr = librosa.load(temp_path, sr=16000)
         pitches = librosa.piptrack(y=y, sr=sr)[0]
         pitches = pitches[pitches > 0]
         features = {
             'duration': (end_ms - start_ms) / 1000,
             'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
@@ -147,16 +149,24 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
             'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
             'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
         }
         os.remove(temp_path)
         return features
     except Exception as e:
         logger.error(f"Feature extraction failed: {str(e)}")
         return {
-            'duration': 0.0, 'mean_pitch': 0.0, 'min_pitch': 0.0, 'max_pitch': 0.0,
-            'pitch_sd': 0.0, 'intensityMean': 0.0, 'intensityMin': 0.0,
-            'intensityMax': 0.0, 'intensitySD': 0.0
         }
 def transcribe(audio_path: str) -> Dict:
     try:
         with open(audio_path, 'rb') as f:
@@ -172,7 +182,8 @@ def transcribe(audio_path: str) -> Dict:
             json={
                 "audio_url": audio_url,
                 "speaker_labels": True,
-                "filter_profanity": True
             }
         )
         transcript_id = transcript_response.json()['id']

         segment = audio[start_ms:end_ms]
         temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
         segment.export(temp_path, format="wav")
         y, sr = librosa.load(temp_path, sr=16000)
         pitches = librosa.piptrack(y=y, sr=sr)[0]
         pitches = pitches[pitches > 0]
         features = {
             'duration': (end_ms - start_ms) / 1000,
             'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
             'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
             'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
         }
         os.remove(temp_path)
         return features
     except Exception as e:
         logger.error(f"Feature extraction failed: {str(e)}")
         return {
+            'duration': (end_ms - start_ms) / 1000,
+            'mean_pitch': 0.0,
+            'min_pitch': 0.0,
+            'max_pitch': 0.0,
+            'pitch_sd': 0.0,
+            'intensityMean': 0.0,
+            'intensityMin': 0.0,
+            'intensityMax': 0.0,
+            'intensitySD': 0.0,
         }
 def transcribe(audio_path: str) -> Dict:
     try:
         with open(audio_path, 'rb') as f:
             json={
                 "audio_url": audio_url,
                 "speaker_labels": True,
+                "filter_profanity": True,
+                "speakers_expected": 2
             }
         )
         transcript_id = transcript_response.json()['id']