Update process_interview.py
Browse files- process_interview.py +15 -4
process_interview.py
CHANGED
|
@@ -133,9 +133,11 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
|
|
| 133 |
segment = audio[start_ms:end_ms]
|
| 134 |
temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
|
| 135 |
segment.export(temp_path, format="wav")
|
|
|
|
| 136 |
y, sr = librosa.load(temp_path, sr=16000)
|
| 137 |
pitches = librosa.piptrack(y=y, sr=sr)[0]
|
| 138 |
pitches = pitches[pitches > 0]
|
|
|
|
| 139 |
features = {
|
| 140 |
'duration': (end_ms - start_ms) / 1000,
|
| 141 |
'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
|
|
@@ -147,16 +149,24 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
|
|
| 147 |
'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
|
| 148 |
'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
|
| 149 |
}
|
|
|
|
| 150 |
os.remove(temp_path)
|
| 151 |
return features
|
| 152 |
except Exception as e:
|
| 153 |
logger.error(f"Feature extraction failed: {str(e)}")
|
| 154 |
return {
|
| 155 |
-
'duration':
|
| 156 |
-
'
|
| 157 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
}
|
| 159 |
|
|
|
|
| 160 |
def transcribe(audio_path: str) -> Dict:
|
| 161 |
try:
|
| 162 |
with open(audio_path, 'rb') as f:
|
|
@@ -172,7 +182,8 @@ def transcribe(audio_path: str) -> Dict:
|
|
| 172 |
json={
|
| 173 |
"audio_url": audio_url,
|
| 174 |
"speaker_labels": True,
|
| 175 |
-
"filter_profanity": True
|
|
|
|
| 176 |
}
|
| 177 |
)
|
| 178 |
transcript_id = transcript_response.json()['id']
|
|
|
|
| 133 |
segment = audio[start_ms:end_ms]
|
| 134 |
temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
|
| 135 |
segment.export(temp_path, format="wav")
|
| 136 |
+
|
| 137 |
y, sr = librosa.load(temp_path, sr=16000)
|
| 138 |
pitches = librosa.piptrack(y=y, sr=sr)[0]
|
| 139 |
pitches = pitches[pitches > 0]
|
| 140 |
+
|
| 141 |
features = {
|
| 142 |
'duration': (end_ms - start_ms) / 1000,
|
| 143 |
'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
|
|
|
|
| 149 |
'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
|
| 150 |
'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
|
| 151 |
}
|
| 152 |
+
|
| 153 |
os.remove(temp_path)
|
| 154 |
return features
|
| 155 |
except Exception as e:
|
| 156 |
logger.error(f"Feature extraction failed: {str(e)}")
|
| 157 |
return {
|
| 158 |
+
'duration': (end_ms - start_ms) / 1000,
|
| 159 |
+
'mean_pitch': 0.0,
|
| 160 |
+
'min_pitch': 0.0,
|
| 161 |
+
'max_pitch': 0.0,
|
| 162 |
+
'pitch_sd': 0.0,
|
| 163 |
+
'intensityMean': 0.0,
|
| 164 |
+
'intensityMin': 0.0,
|
| 165 |
+
'intensityMax': 0.0,
|
| 166 |
+
'intensitySD': 0.0,
|
| 167 |
}
|
| 168 |
|
| 169 |
+
|
| 170 |
def transcribe(audio_path: str) -> Dict:
|
| 171 |
try:
|
| 172 |
with open(audio_path, 'rb') as f:
|
|
|
|
| 182 |
json={
|
| 183 |
"audio_url": audio_url,
|
| 184 |
"speaker_labels": True,
|
| 185 |
+
"filter_profanity": True,
|
| 186 |
+
"speakers_expected": 2
|
| 187 |
}
|
| 188 |
)
|
| 189 |
transcript_id = transcript_response.json()['id']
|