Spaces:

fabiodeluca77
/

voice-break-api

Sleeping

App Files Files Community

fabiodeluca77 commited on Oct 31, 2025

Commit

12b2aa0

verified ·

1 Parent(s): 7279214

Update voice_break_api.py

Browse files

Files changed (1) hide show

voice_break_api.py +21 -13

voice_break_api.py CHANGED Viewed

@@ -15,6 +15,7 @@ import base64
 from datetime import datetime
 import json
 import os
 app = FastAPI(
@@ -61,38 +62,39 @@ class TrainingRequest(BaseModel):
 def extract_features(audio_data: bytes) -> Dict[str, float]:
     """Estrai feature prosodiche dall'audio"""
     try:
         print(f"[DEBUG] Received audio data: {len(audio_data)} bytes")
-        print(f"[DEBUG] Audio data type: {type(audio_data)}")
-        # Converti bytes in array numpy
-        audio_bio = io.BytesIO(audio_data)
-        print(f"[DEBUG] Created BytesIO object")
-        audio_array, sr = librosa.load(audio_bio, sr=22050)
         print(f"[DEBUG] Loaded audio: shape={audio_array.shape}, sr={sr}")
         # Feature extraction
         rms = float(np.sqrt(np.mean(audio_array**2)))
-        print(f"[DEBUG] RMS calculated: {rms}")
         zcr = float(np.mean(librosa.feature.zero_crossing_rate(audio_array)))
-        print(f"[DEBUG] ZCR calculated: {zcr}")
         # Pitch (F0)
         pitches, magnitudes = librosa.piptrack(y=audio_array, sr=sr)
         pitch = float(np.mean(pitches[pitches > 0])) if len(pitches[pitches > 0]) > 0 else 0.0
-        print(f"[DEBUG] Pitch calculated: {pitch}")
         # Spectral features
         spectral_centroid = float(np.mean(librosa.feature.spectral_centroid(y=audio_array, sr=sr)))
         spectral_rolloff = float(np.mean(librosa.feature.spectral_rolloff(y=audio_array, sr=sr)))
-        print(f"[DEBUG] Spectral features calculated")
         # MFCC
         mfccs = librosa.feature.mfcc(y=audio_array, sr=sr, n_mfcc=13)
         mfcc_mean = [float(np.mean(mfcc)) for mfcc in mfccs]
-        print(f"[DEBUG] MFCC calculated")
         return {
             "rms": rms,
@@ -105,11 +107,17 @@ def extract_features(audio_data: bytes) -> Dict[str, float]:
         }
     except Exception as e:
         print(f"[ERROR] Feature extraction failed: {str(e)}")
-        print(f"[ERROR] Error type: {type(e)}")
         import traceback
         traceback.print_exc()
         raise HTTPException(status_code=500, detail=f"Feature extraction error: {str(e)}")
 def classify_voice_element(features: Dict[str, float], baseline: Optional[Dict] = None) -> Dict:
     """

 from datetime import datetime
 import json
 import os
+import tempfile
 app = FastAPI(
 def extract_features(audio_data: bytes) -> Dict[str, float]:
     """Estrai feature prosodiche dall'audio"""
+    temp_file = None
     try:
         print(f"[DEBUG] Received audio data: {len(audio_data)} bytes")
+        # Salva temporaneamente il file con estensione
+        # Prova prima .webm (formato comune browser)
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.webm') as temp_file:
+            temp_file.write(audio_data)
+            temp_path = temp_file.name
+        print(f"[DEBUG] Saved to temp file: {temp_path}")
+        # Carica con librosa
+        audio_array, sr = librosa.load(temp_path, sr=22050)
         print(f"[DEBUG] Loaded audio: shape={audio_array.shape}, sr={sr}")
         # Feature extraction
         rms = float(np.sqrt(np.mean(audio_array**2)))
         zcr = float(np.mean(librosa.feature.zero_crossing_rate(audio_array)))
         # Pitch (F0)
         pitches, magnitudes = librosa.piptrack(y=audio_array, sr=sr)
         pitch = float(np.mean(pitches[pitches > 0])) if len(pitches[pitches > 0]) > 0 else 0.0
         # Spectral features
         spectral_centroid = float(np.mean(librosa.feature.spectral_centroid(y=audio_array, sr=sr)))
         spectral_rolloff = float(np.mean(librosa.feature.spectral_rolloff(y=audio_array, sr=sr)))
         # MFCC
         mfccs = librosa.feature.mfcc(y=audio_array, sr=sr, n_mfcc=13)
         mfcc_mean = [float(np.mean(mfcc)) for mfcc in mfccs]
+        print(f"[DEBUG] Features extracted successfully")
         return {
             "rms": rms,
         }
     except Exception as e:
         print(f"[ERROR] Feature extraction failed: {str(e)}")
         import traceback
         traceback.print_exc()
         raise HTTPException(status_code=500, detail=f"Feature extraction error: {str(e)}")
+    finally:
+        # Pulisci file temporaneo
+        if temp_file and os.path.exists(temp_path):
+            try:
+                os.unlink(temp_path)
+                print(f"[DEBUG] Cleaned up temp file")
+            except:
+                pass
 def classify_voice_element(features: Dict[str, float], baseline: Optional[Dict] = None) -> Dict:
     """