voice-break-api / voice_break_api.py
fabiodeluca77's picture
Update voice_break_api.py
12b2aa0 verified
"""
Voice Break API - FastAPI Backend
Endpoints per analisi vocale, ML classification e dataset management
"""
from fastapi import FastAPI, File, UploadFile, HTTPException, Form
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from typing import List, Dict, Optional
import librosa
import numpy as np
import io
import base64
from datetime import datetime
import json
import os
import tempfile
app = FastAPI(
title="Voice Break API",
description="API per analisi prosodica e classificazione vocale",
version="2.0.0"
)
# CORS - Permetti richieste da Lovable
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # In produzione, specifica il dominio Lovable
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ============================================================================
# MODELS
# ============================================================================
class VoiceAnalysisResult(BaseModel):
element: str # aria, acqua, terra, fuoco
confidence: float
features: Dict[str, float]
prosody_score: Dict[str, float]
timestamp: str
class BaselineCalibration(BaseModel):
rms_baseline: float
pitch_baseline: float
duration_ms: float
success: bool
message: str
class TrainingRequest(BaseModel):
element: str
audio_base64: str
duration_ms: float
# ============================================================================
# ANALISI AUDIO - Core Functions
# ============================================================================
def extract_features(audio_data: bytes) -> Dict[str, float]:
"""Estrai feature prosodiche dall'audio"""
temp_file = None
try:
print(f"[DEBUG] Received audio data: {len(audio_data)} bytes")
# Salva temporaneamente il file con estensione
# Prova prima .webm (formato comune browser)
with tempfile.NamedTemporaryFile(delete=False, suffix='.webm') as temp_file:
temp_file.write(audio_data)
temp_path = temp_file.name
print(f"[DEBUG] Saved to temp file: {temp_path}")
# Carica con librosa
audio_array, sr = librosa.load(temp_path, sr=22050)
print(f"[DEBUG] Loaded audio: shape={audio_array.shape}, sr={sr}")
# Feature extraction
rms = float(np.sqrt(np.mean(audio_array**2)))
zcr = float(np.mean(librosa.feature.zero_crossing_rate(audio_array)))
# Pitch (F0)
pitches, magnitudes = librosa.piptrack(y=audio_array, sr=sr)
pitch = float(np.mean(pitches[pitches > 0])) if len(pitches[pitches > 0]) > 0 else 0.0
# Spectral features
spectral_centroid = float(np.mean(librosa.feature.spectral_centroid(y=audio_array, sr=sr)))
spectral_rolloff = float(np.mean(librosa.feature.spectral_rolloff(y=audio_array, sr=sr)))
# MFCC
mfccs = librosa.feature.mfcc(y=audio_array, sr=sr, n_mfcc=13)
mfcc_mean = [float(np.mean(mfcc)) for mfcc in mfccs]
print(f"[DEBUG] Features extracted successfully")
return {
"rms": rms,
"zcr": zcr,
"pitch": pitch,
"spectral_centroid": spectral_centroid,
"spectral_rolloff": spectral_rolloff,
"mfcc_mean": mfcc_mean[0] if mfcc_mean else 0.0,
"duration": float(len(audio_array) / sr)
}
except Exception as e:
print(f"[ERROR] Feature extraction failed: {str(e)}")
import traceback
traceback.print_exc()
raise HTTPException(status_code=500, detail=f"Feature extraction error: {str(e)}")
finally:
# Pulisci file temporaneo
if temp_file and os.path.exists(temp_path):
try:
os.unlink(temp_path)
print(f"[DEBUG] Cleaned up temp file")
except:
pass
def classify_voice_element(features: Dict[str, float], baseline: Optional[Dict] = None) -> Dict:
"""
Classifica l'elemento vocale basandosi sulle feature prosodiche
Regole semplificate - in produzione usa il modello ML addestrato
"""
rms = features["rms"]
pitch = features["pitch"]
zcr = features["zcr"]
# Scoring per ogni elemento (0-100)
scores = {
"aria": 0.0,
"acqua": 0.0,
"terra": 0.0,
"fuoco": 0.0
}
# ARIA: Alta energia, pitch variabile, ZCR medio-alto
if rms > 0.05:
scores["aria"] += 30
if zcr > 0.1:
scores["aria"] += 30
if pitch > 200:
scores["aria"] += 40
# ACQUA: Energia fluida, pitch medio-basso, variazioni smooth
if 0.02 < rms < 0.06:
scores["acqua"] += 40
if 150 < pitch < 250:
scores["acqua"] += 35
if zcr < 0.12:
scores["acqua"] += 25
# TERRA: Energia stabile, pitch basso, ZCR basso
if rms < 0.04:
scores["terra"] += 35
if pitch < 180:
scores["terra"] += 40
if zcr < 0.08:
scores["terra"] += 25
# FUOCO: Alta energia, pitch alto, ZCR molto alto
if rms > 0.06:
scores["fuoco"] += 35
if pitch > 250:
scores["fuoco"] += 35
if zcr > 0.15:
scores["fuoco"] += 30
# Normalizza scores
total = sum(scores.values())
if total > 0:
scores = {k: (v / total) * 100 for k, v in scores.items()}
# Determina elemento dominante
dominant_element = max(scores, key=scores.get)
confidence = scores[dominant_element] / 100.0
return {
"element": dominant_element,
"confidence": confidence,
"scores": scores
}
# ============================================================================
# ENDPOINTS
# ============================================================================
@app.get("/")
async def root():
"""Health check"""
return {
"status": "online",
"service": "Voice Break API",
"version": "2.0.0",
"endpoints": [
"/api/calibrate-baseline",
"/api/analyze-voice",
"/api/train-sample",
"/api/dataset/list",
"/api/dataset/upload"
]
}
@app.post("/api/calibrate-baseline", response_model=BaselineCalibration)
async def calibrate_baseline(audio: UploadFile = File(...)):
"""
Endpoint 1: Calibrazione Baseline
L'utente registra la sua voce "normale" per creare un riferimento
"""
try:
audio_bytes = await audio.read()
# Estrai feature baseline
features = extract_features(audio_bytes)
return BaselineCalibration(
rms_baseline=features["rms"],
pitch_baseline=features["pitch"],
duration_ms=features["duration"] * 1000,
success=True,
message="Calibrazione baseline completata con successo"
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/analyze-voice", response_model=VoiceAnalysisResult)
async def analyze_voice(
audio: UploadFile = File(...),
baseline_rms: Optional[float] = Form(None),
baseline_pitch: Optional[float] = Form(None)
):
"""
Endpoint 2: Analisi Vocale Real-time
Analizza un audio e classifica l'elemento prosodico
"""
try:
audio_bytes = await audio.read()
# Estrai features
features = extract_features(audio_bytes)
# Prepara baseline se fornito
baseline = None
if baseline_rms and baseline_pitch:
baseline = {
"rms": baseline_rms,
"pitch": baseline_pitch
}
# Classifica elemento
classification = classify_voice_element(features, baseline)
return VoiceAnalysisResult(
element=classification["element"],
confidence=classification["confidence"],
features=features,
prosody_score=classification["scores"],
timestamp=datetime.now().isoformat()
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/train-sample")
async def train_sample(
audio: UploadFile = File(...),
element: str = Form(...),
user_id: Optional[str] = Form(None)
):
"""
Endpoint 3: Aggiungi Sample al Dataset
Salva un sample classificato per il training del modello
"""
try:
# Valida elemento
valid_elements = ["aria", "acqua", "terra", "fuoco"]
if element not in valid_elements:
raise HTTPException(status_code=400, detail=f"Elemento non valido. Usa: {valid_elements}")
audio_bytes = await audio.read()
# Estrai features per validazione
features = extract_features(audio_bytes)
# In produzione: salva su storage (S3, HF Dataset, etc.)
# Per ora restituiamo conferma
return {
"success": True,
"element": element,
"features": features,
"message": f"Sample {element} aggiunto al dataset",
"user_id": user_id
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/dataset/stats")
async def get_dataset_stats():
"""
Endpoint 4: Statistiche Dataset
Restituisce info sul dataset di training
"""
# In produzione: query dal database/storage
return {
"total_samples": 277,
"by_element": {
"aria": 68,
"acqua": 71,
"terra": 69,
"fuoco": 69
},
"last_updated": "2025-08-13T14:20:45Z",
"model_accuracy": 0.875
}
@app.post("/api/retrain-model")
async def retrain_model():
"""
Endpoint 5: Riaddestra Modello
Triggera il retraining del modello ML
(Solo per admin)
"""
# In produzione: trigger training job
return {
"success": True,
"message": "Retraining iniziato",
"estimated_time": "5 minuti",
"status_url": "/api/training/status"
}
# ============================================================================
# RUN
# ============================================================================
@app.get("/api/dataset/samples/{element}")
async def get_dataset_samples(element: str, limit: int = 10):
"""
Ottieni sample audio dal dataset per un elemento specifico
"""
valid_elements = ["aria", "acqua", "terra", "fuoco"]
if element.lower() not in valid_elements:
raise HTTPException(status_code=400, detail=f"Elemento non valido. Usa: {valid_elements}")
try:
dataset = load_audio_dataset()
# Filtra per elemento
filtered = [sample for sample in dataset['train'] if sample['label'].lower() == element.lower()]
# Limita risultati
samples = filtered[:limit]
return {
"element": element,
"total_available": len(filtered),
"samples_returned": len(samples),
"samples": [
{
"audio_path": s['audio']['path'],
"duration": s['audio'].get('duration', 0),
"sample_rate": s['audio'].get('sampling_rate', 0)
}
for s in samples
]
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Errore caricamento dataset: {str(e)}")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)