Spaces:
Running
Running
fix: Lightweight baseline extraction — only compute pitch/energy/tempo inline, skip full feature pipeline to prevent OOM on HF
Browse files- backend/routes/auth.py +24 -6
backend/routes/auth.py
CHANGED
|
@@ -133,8 +133,8 @@ async def setup_baseline(
|
|
| 133 |
db: Session = Depends(get_db)
|
| 134 |
):
|
| 135 |
"""
|
| 136 |
-
Onboarding step: Takes a neutral reading audio clip, extracts
|
| 137 |
-
|
| 138 |
"""
|
| 139 |
suffix = ".webm" if audio.content_type == "audio/webm" else ".wav"
|
| 140 |
tmp_fd, tmp_path = tempfile.mkstemp(suffix=suffix)
|
|
@@ -146,11 +146,29 @@ async def setup_baseline(
|
|
| 146 |
|
| 147 |
try:
|
| 148 |
wav_path = convert_to_wav(tmp_path)
|
| 149 |
-
features = extract_features(wav_path)
|
| 150 |
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
except Exception as e:
|
| 155 |
print(f"[Baseline] Audio processing failed, using defaults: {e}")
|
| 156 |
current_user.baseline_pitch = 150.0
|
|
|
|
| 133 |
db: Session = Depends(get_db)
|
| 134 |
):
|
| 135 |
"""
|
| 136 |
+
Onboarding step: Takes a neutral reading audio clip, extracts ONLY the 3
|
| 137 |
+
baseline metrics (pitch, energy, speech_rate) using lightweight librosa calls.
|
| 138 |
"""
|
| 139 |
suffix = ".webm" if audio.content_type == "audio/webm" else ".wav"
|
| 140 |
tmp_fd, tmp_path = tempfile.mkstemp(suffix=suffix)
|
|
|
|
| 146 |
|
| 147 |
try:
|
| 148 |
wav_path = convert_to_wav(tmp_path)
|
|
|
|
| 149 |
|
| 150 |
+
# Lightweight inline extraction — only 3 values needed
|
| 151 |
+
import librosa
|
| 152 |
+
import numpy as np
|
| 153 |
+
y, sr = librosa.load(wav_path, sr=16000, mono=True, duration=15)
|
| 154 |
+
|
| 155 |
+
# Pitch
|
| 156 |
+
f0, voiced, _ = librosa.pyin(y, fmin=80, fmax=600, sr=sr)
|
| 157 |
+
f0_clean = f0[voiced == 1] if voiced is not None else np.array([])
|
| 158 |
+
pitch = float(np.mean(f0_clean)) if len(f0_clean) > 0 else 150.0
|
| 159 |
+
|
| 160 |
+
# Energy
|
| 161 |
+
rms = librosa.feature.rms(y=y)[0]
|
| 162 |
+
energy = float(np.sqrt(np.mean(rms ** 2)))
|
| 163 |
+
|
| 164 |
+
# Speech rate (tempo proxy)
|
| 165 |
+
tempo_arr, _ = librosa.beat.beat_track(y=y, sr=sr)
|
| 166 |
+
speech_rate = float(tempo_arr) if np.isscalar(tempo_arr) else float(tempo_arr[0])
|
| 167 |
+
|
| 168 |
+
current_user.baseline_pitch = round(pitch, 2)
|
| 169 |
+
current_user.baseline_energy = round(energy, 6)
|
| 170 |
+
current_user.baseline_speech_rate = round(speech_rate, 2)
|
| 171 |
+
|
| 172 |
except Exception as e:
|
| 173 |
print(f"[Baseline] Audio processing failed, using defaults: {e}")
|
| 174 |
current_user.baseline_pitch = 150.0
|