Spaces:
Sleeping
Sleeping
Update voice_break_api.py
Browse files- voice_break_api.py +21 -13
voice_break_api.py
CHANGED
|
@@ -15,6 +15,7 @@ import base64
|
|
| 15 |
from datetime import datetime
|
| 16 |
import json
|
| 17 |
import os
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
app = FastAPI(
|
|
@@ -61,38 +62,39 @@ class TrainingRequest(BaseModel):
|
|
| 61 |
|
| 62 |
def extract_features(audio_data: bytes) -> Dict[str, float]:
|
| 63 |
"""Estrai feature prosodiche dall'audio"""
|
|
|
|
| 64 |
try:
|
| 65 |
print(f"[DEBUG] Received audio data: {len(audio_data)} bytes")
|
| 66 |
-
print(f"[DEBUG] Audio data type: {type(audio_data)}")
|
| 67 |
|
| 68 |
-
#
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
| 71 |
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
| 73 |
print(f"[DEBUG] Loaded audio: shape={audio_array.shape}, sr={sr}")
|
| 74 |
|
| 75 |
# Feature extraction
|
| 76 |
rms = float(np.sqrt(np.mean(audio_array**2)))
|
| 77 |
-
print(f"[DEBUG] RMS calculated: {rms}")
|
| 78 |
-
|
| 79 |
zcr = float(np.mean(librosa.feature.zero_crossing_rate(audio_array)))
|
| 80 |
-
print(f"[DEBUG] ZCR calculated: {zcr}")
|
| 81 |
|
| 82 |
# Pitch (F0)
|
| 83 |
pitches, magnitudes = librosa.piptrack(y=audio_array, sr=sr)
|
| 84 |
pitch = float(np.mean(pitches[pitches > 0])) if len(pitches[pitches > 0]) > 0 else 0.0
|
| 85 |
-
print(f"[DEBUG] Pitch calculated: {pitch}")
|
| 86 |
|
| 87 |
# Spectral features
|
| 88 |
spectral_centroid = float(np.mean(librosa.feature.spectral_centroid(y=audio_array, sr=sr)))
|
| 89 |
spectral_rolloff = float(np.mean(librosa.feature.spectral_rolloff(y=audio_array, sr=sr)))
|
| 90 |
-
print(f"[DEBUG] Spectral features calculated")
|
| 91 |
|
| 92 |
# MFCC
|
| 93 |
mfccs = librosa.feature.mfcc(y=audio_array, sr=sr, n_mfcc=13)
|
| 94 |
mfcc_mean = [float(np.mean(mfcc)) for mfcc in mfccs]
|
| 95 |
-
|
|
|
|
| 96 |
|
| 97 |
return {
|
| 98 |
"rms": rms,
|
|
@@ -105,11 +107,17 @@ def extract_features(audio_data: bytes) -> Dict[str, float]:
|
|
| 105 |
}
|
| 106 |
except Exception as e:
|
| 107 |
print(f"[ERROR] Feature extraction failed: {str(e)}")
|
| 108 |
-
print(f"[ERROR] Error type: {type(e)}")
|
| 109 |
import traceback
|
| 110 |
traceback.print_exc()
|
| 111 |
raise HTTPException(status_code=500, detail=f"Feature extraction error: {str(e)}")
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
def classify_voice_element(features: Dict[str, float], baseline: Optional[Dict] = None) -> Dict:
|
| 115 |
"""
|
|
|
|
| 15 |
from datetime import datetime
|
| 16 |
import json
|
| 17 |
import os
|
| 18 |
+
import tempfile
|
| 19 |
|
| 20 |
|
| 21 |
app = FastAPI(
|
|
|
|
| 62 |
|
| 63 |
def extract_features(audio_data: bytes) -> Dict[str, float]:
|
| 64 |
"""Estrai feature prosodiche dall'audio"""
|
| 65 |
+
temp_file = None
|
| 66 |
try:
|
| 67 |
print(f"[DEBUG] Received audio data: {len(audio_data)} bytes")
|
|
|
|
| 68 |
|
| 69 |
+
# Salva temporaneamente il file con estensione
|
| 70 |
+
# Prova prima .webm (formato comune browser)
|
| 71 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.webm') as temp_file:
|
| 72 |
+
temp_file.write(audio_data)
|
| 73 |
+
temp_path = temp_file.name
|
| 74 |
|
| 75 |
+
print(f"[DEBUG] Saved to temp file: {temp_path}")
|
| 76 |
+
|
| 77 |
+
# Carica con librosa
|
| 78 |
+
audio_array, sr = librosa.load(temp_path, sr=22050)
|
| 79 |
print(f"[DEBUG] Loaded audio: shape={audio_array.shape}, sr={sr}")
|
| 80 |
|
| 81 |
# Feature extraction
|
| 82 |
rms = float(np.sqrt(np.mean(audio_array**2)))
|
|
|
|
|
|
|
| 83 |
zcr = float(np.mean(librosa.feature.zero_crossing_rate(audio_array)))
|
|
|
|
| 84 |
|
| 85 |
# Pitch (F0)
|
| 86 |
pitches, magnitudes = librosa.piptrack(y=audio_array, sr=sr)
|
| 87 |
pitch = float(np.mean(pitches[pitches > 0])) if len(pitches[pitches > 0]) > 0 else 0.0
|
|
|
|
| 88 |
|
| 89 |
# Spectral features
|
| 90 |
spectral_centroid = float(np.mean(librosa.feature.spectral_centroid(y=audio_array, sr=sr)))
|
| 91 |
spectral_rolloff = float(np.mean(librosa.feature.spectral_rolloff(y=audio_array, sr=sr)))
|
|
|
|
| 92 |
|
| 93 |
# MFCC
|
| 94 |
mfccs = librosa.feature.mfcc(y=audio_array, sr=sr, n_mfcc=13)
|
| 95 |
mfcc_mean = [float(np.mean(mfcc)) for mfcc in mfccs]
|
| 96 |
+
|
| 97 |
+
print(f"[DEBUG] Features extracted successfully")
|
| 98 |
|
| 99 |
return {
|
| 100 |
"rms": rms,
|
|
|
|
| 107 |
}
|
| 108 |
except Exception as e:
|
| 109 |
print(f"[ERROR] Feature extraction failed: {str(e)}")
|
|
|
|
| 110 |
import traceback
|
| 111 |
traceback.print_exc()
|
| 112 |
raise HTTPException(status_code=500, detail=f"Feature extraction error: {str(e)}")
|
| 113 |
+
finally:
|
| 114 |
+
# Pulisci file temporaneo
|
| 115 |
+
if temp_file and os.path.exists(temp_path):
|
| 116 |
+
try:
|
| 117 |
+
os.unlink(temp_path)
|
| 118 |
+
print(f"[DEBUG] Cleaned up temp file")
|
| 119 |
+
except:
|
| 120 |
+
pass
|
| 121 |
|
| 122 |
def classify_voice_element(features: Dict[str, float], baseline: Optional[Dict] = None) -> Dict:
|
| 123 |
"""
|