fabiodeluca77 commited on
Commit
12b2aa0
·
verified ·
1 Parent(s): 7279214

Update voice_break_api.py

Browse files
Files changed (1) hide show
  1. voice_break_api.py +21 -13
voice_break_api.py CHANGED
@@ -15,6 +15,7 @@ import base64
15
  from datetime import datetime
16
  import json
17
  import os
 
18
 
19
 
20
  app = FastAPI(
@@ -61,38 +62,39 @@ class TrainingRequest(BaseModel):
61
 
62
  def extract_features(audio_data: bytes) -> Dict[str, float]:
63
  """Estrai feature prosodiche dall'audio"""
 
64
  try:
65
  print(f"[DEBUG] Received audio data: {len(audio_data)} bytes")
66
- print(f"[DEBUG] Audio data type: {type(audio_data)}")
67
 
68
- # Converti bytes in array numpy
69
- audio_bio = io.BytesIO(audio_data)
70
- print(f"[DEBUG] Created BytesIO object")
 
 
71
 
72
- audio_array, sr = librosa.load(audio_bio, sr=22050)
 
 
 
73
  print(f"[DEBUG] Loaded audio: shape={audio_array.shape}, sr={sr}")
74
 
75
  # Feature extraction
76
  rms = float(np.sqrt(np.mean(audio_array**2)))
77
- print(f"[DEBUG] RMS calculated: {rms}")
78
-
79
  zcr = float(np.mean(librosa.feature.zero_crossing_rate(audio_array)))
80
- print(f"[DEBUG] ZCR calculated: {zcr}")
81
 
82
  # Pitch (F0)
83
  pitches, magnitudes = librosa.piptrack(y=audio_array, sr=sr)
84
  pitch = float(np.mean(pitches[pitches > 0])) if len(pitches[pitches > 0]) > 0 else 0.0
85
- print(f"[DEBUG] Pitch calculated: {pitch}")
86
 
87
  # Spectral features
88
  spectral_centroid = float(np.mean(librosa.feature.spectral_centroid(y=audio_array, sr=sr)))
89
  spectral_rolloff = float(np.mean(librosa.feature.spectral_rolloff(y=audio_array, sr=sr)))
90
- print(f"[DEBUG] Spectral features calculated")
91
 
92
  # MFCC
93
  mfccs = librosa.feature.mfcc(y=audio_array, sr=sr, n_mfcc=13)
94
  mfcc_mean = [float(np.mean(mfcc)) for mfcc in mfccs]
95
- print(f"[DEBUG] MFCC calculated")
 
96
 
97
  return {
98
  "rms": rms,
@@ -105,11 +107,17 @@ def extract_features(audio_data: bytes) -> Dict[str, float]:
105
  }
106
  except Exception as e:
107
  print(f"[ERROR] Feature extraction failed: {str(e)}")
108
- print(f"[ERROR] Error type: {type(e)}")
109
  import traceback
110
  traceback.print_exc()
111
  raise HTTPException(status_code=500, detail=f"Feature extraction error: {str(e)}")
112
-
 
 
 
 
 
 
 
113
 
114
  def classify_voice_element(features: Dict[str, float], baseline: Optional[Dict] = None) -> Dict:
115
  """
 
15
  from datetime import datetime
16
  import json
17
  import os
18
+ import tempfile
19
 
20
 
21
  app = FastAPI(
 
62
 
63
  def extract_features(audio_data: bytes) -> Dict[str, float]:
64
  """Estrai feature prosodiche dall'audio"""
65
+ temp_file = None
66
  try:
67
  print(f"[DEBUG] Received audio data: {len(audio_data)} bytes")
 
68
 
69
+ # Salva temporaneamente il file con estensione
70
+ # Prova prima .webm (formato comune browser)
71
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.webm') as temp_file:
72
+ temp_file.write(audio_data)
73
+ temp_path = temp_file.name
74
 
75
+ print(f"[DEBUG] Saved to temp file: {temp_path}")
76
+
77
+ # Carica con librosa
78
+ audio_array, sr = librosa.load(temp_path, sr=22050)
79
  print(f"[DEBUG] Loaded audio: shape={audio_array.shape}, sr={sr}")
80
 
81
  # Feature extraction
82
  rms = float(np.sqrt(np.mean(audio_array**2)))
 
 
83
  zcr = float(np.mean(librosa.feature.zero_crossing_rate(audio_array)))
 
84
 
85
  # Pitch (F0)
86
  pitches, magnitudes = librosa.piptrack(y=audio_array, sr=sr)
87
  pitch = float(np.mean(pitches[pitches > 0])) if len(pitches[pitches > 0]) > 0 else 0.0
 
88
 
89
  # Spectral features
90
  spectral_centroid = float(np.mean(librosa.feature.spectral_centroid(y=audio_array, sr=sr)))
91
  spectral_rolloff = float(np.mean(librosa.feature.spectral_rolloff(y=audio_array, sr=sr)))
 
92
 
93
  # MFCC
94
  mfccs = librosa.feature.mfcc(y=audio_array, sr=sr, n_mfcc=13)
95
  mfcc_mean = [float(np.mean(mfcc)) for mfcc in mfccs]
96
+
97
+ print(f"[DEBUG] Features extracted successfully")
98
 
99
  return {
100
  "rms": rms,
 
107
  }
108
  except Exception as e:
109
  print(f"[ERROR] Feature extraction failed: {str(e)}")
 
110
  import traceback
111
  traceback.print_exc()
112
  raise HTTPException(status_code=500, detail=f"Feature extraction error: {str(e)}")
113
+ finally:
114
+ # Pulisci file temporaneo
115
+ if temp_file and os.path.exists(temp_path):
116
+ try:
117
+ os.unlink(temp_path)
118
+ print(f"[DEBUG] Cleaned up temp file")
119
+ except:
120
+ pass
121
 
122
  def classify_voice_element(features: Dict[str, float], baseline: Optional[Dict] = None) -> Dict:
123
  """