Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# app.py -
|
| 2 |
from fastapi import FastAPI, File, UploadFile, HTTPException
|
| 3 |
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
from fastapi.responses import HTMLResponse
|
|
@@ -6,6 +6,9 @@ from transformers import pipeline
|
|
| 6 |
import tempfile
|
| 7 |
import os
|
| 8 |
import uvicorn
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
# Crear app FastAPI
|
| 11 |
app = FastAPI(
|
|
@@ -32,8 +35,18 @@ async def startup_event():
|
|
| 32 |
global classifier
|
| 33 |
try:
|
| 34 |
print("🔄 Cargando modelo...")
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
except Exception as e:
|
| 38 |
print(f"❌ Error cargando modelo: {e}")
|
| 39 |
classifier = None
|
|
@@ -77,13 +90,6 @@ async def root():
|
|
| 77 |
<p>Documentación interactiva de la API (Swagger)</p>
|
| 78 |
</div>
|
| 79 |
|
| 80 |
-
<h2>🔗 Links útiles:</h2>
|
| 81 |
-
<ul>
|
| 82 |
-
<li><a href="/health">Health Check</a></li>
|
| 83 |
-
<li><a href="/docs">Documentación Swagger</a></li>
|
| 84 |
-
<li><a href="/redoc">Documentación ReDoc</a></li>
|
| 85 |
-
</ul>
|
| 86 |
-
|
| 87 |
<h2>📱 Uso desde Android:</h2>
|
| 88 |
<pre style="background: #f8f8f8; padding: 15px; border-radius: 5px;">
|
| 89 |
POST https://janiopi-musical-detector-api.hf.space/detect
|
|
@@ -115,7 +121,9 @@ async def health_check():
|
|
| 115 |
"model_loaded": classifier is not None,
|
| 116 |
"message": "API funcionando correctamente",
|
| 117 |
"model_info": "Janiopi/detector_de_instrumentos_v1",
|
| 118 |
-
"supported_instruments": ["Guitar", "Piano", "Drum"]
|
|
|
|
|
|
|
| 119 |
}
|
| 120 |
|
| 121 |
@app.post("/detect")
|
|
@@ -136,42 +144,43 @@ async def detect_instrument(audio: UploadFile = File(...)):
|
|
| 136 |
content = await audio.read()
|
| 137 |
print(f"📏 Tamaño: {len(content)} bytes")
|
| 138 |
|
| 139 |
-
#
|
| 140 |
-
|
| 141 |
-
file_extension = '.3gp'
|
| 142 |
-
elif audio.filename and audio.filename.endswith('.wav'):
|
| 143 |
-
file_extension = '.wav'
|
| 144 |
-
elif audio.content_type and 'wav' in audio.content_type:
|
| 145 |
-
file_extension = '.wav'
|
| 146 |
-
else:
|
| 147 |
-
file_extension = '.wav' # Por defecto
|
| 148 |
-
|
| 149 |
-
print(f"🎵 Usando extensión: {file_extension}")
|
| 150 |
-
|
| 151 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as temp_file:
|
| 152 |
temp_file.write(content)
|
| 153 |
temp_path = temp_file.name
|
| 154 |
|
| 155 |
try:
|
| 156 |
-
print("
|
| 157 |
-
|
| 158 |
-
# Usar librosa para cargar el audio de manera más robusta
|
| 159 |
-
import librosa
|
| 160 |
|
| 161 |
# Cargar audio con librosa (maneja múltiples formatos)
|
| 162 |
-
audio_data, sample_rate = librosa.load(temp_path, sr=16000)
|
| 163 |
print(f"🔊 Audio cargado: {len(audio_data)} samples a {sample_rate}Hz")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
# Guardar como WAV temporal para el modelo
|
| 166 |
-
temp_wav_path = temp_path.replace(
|
| 167 |
-
import soundfile as sf
|
| 168 |
sf.write(temp_wav_path, audio_data, sample_rate)
|
|
|
|
| 169 |
|
| 170 |
-
|
|
|
|
| 171 |
results = classifier(temp_wav_path)
|
| 172 |
print(f"🎯 Resultados raw: {results}")
|
| 173 |
|
| 174 |
-
# Limpiar
|
| 175 |
if os.path.exists(temp_wav_path):
|
| 176 |
os.unlink(temp_wav_path)
|
| 177 |
|
|
@@ -183,6 +192,7 @@ async def detect_instrument(audio: UploadFile = File(...)):
|
|
| 183 |
"score": round(float(result["score"]), 4)
|
| 184 |
})
|
| 185 |
|
|
|
|
| 186 |
formatted_results.sort(key=lambda x: x["score"], reverse=True)
|
| 187 |
|
| 188 |
print(f"✅ Resultados formateados: {formatted_results}")
|
|
@@ -191,11 +201,11 @@ async def detect_instrument(audio: UploadFile = File(...)):
|
|
| 191 |
"success": True,
|
| 192 |
"results": formatted_results,
|
| 193 |
"filename": audio.filename,
|
| 194 |
-
"processed_size_bytes": len(content),
|
| 195 |
"audio_info": {
|
| 196 |
"samples": len(audio_data),
|
| 197 |
"sample_rate": sample_rate,
|
| 198 |
-
"duration_seconds": len(audio_data) / sample_rate
|
|
|
|
| 199 |
}
|
| 200 |
}
|
| 201 |
|
|
@@ -210,10 +220,19 @@ async def detect_instrument(audio: UploadFile = File(...)):
|
|
| 210 |
print(f"❌ Error inesperado: {e}")
|
| 211 |
import traceback
|
| 212 |
traceback.print_exc()
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
@app.get("/test")
|
| 219 |
async def test_endpoint():
|
|
|
|
| 1 |
+
# app.py - Versión corregida con padding
|
| 2 |
from fastapi import FastAPI, File, UploadFile, HTTPException
|
| 3 |
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
from fastapi.responses import HTMLResponse
|
|
|
|
| 6 |
import tempfile
|
| 7 |
import os
|
| 8 |
import uvicorn
|
| 9 |
+
import librosa
|
| 10 |
+
import soundfile as sf
|
| 11 |
+
import numpy as np
|
| 12 |
|
| 13 |
# Crear app FastAPI
|
| 14 |
app = FastAPI(
|
|
|
|
| 35 |
global classifier
|
| 36 |
try:
|
| 37 |
print("🔄 Cargando modelo...")
|
| 38 |
+
# Configurar pipeline con padding y truncación
|
| 39 |
+
classifier = pipeline(
|
| 40 |
+
"audio-classification",
|
| 41 |
+
model="Janiopi/detector_de_instrumentos_v1",
|
| 42 |
+
feature_extractor_kwargs={
|
| 43 |
+
"padding": True,
|
| 44 |
+
"truncation": True,
|
| 45 |
+
"max_length": 240000, # 15 segundos a 16kHz
|
| 46 |
+
"return_tensors": "pt"
|
| 47 |
+
}
|
| 48 |
+
)
|
| 49 |
+
print("✅ Modelo cargado exitosamente con configuración de padding")
|
| 50 |
except Exception as e:
|
| 51 |
print(f"❌ Error cargando modelo: {e}")
|
| 52 |
classifier = None
|
|
|
|
| 90 |
<p>Documentación interactiva de la API (Swagger)</p>
|
| 91 |
</div>
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
<h2>📱 Uso desde Android:</h2>
|
| 94 |
<pre style="background: #f8f8f8; padding: 15px; border-radius: 5px;">
|
| 95 |
POST https://janiopi-musical-detector-api.hf.space/detect
|
|
|
|
| 121 |
"model_loaded": classifier is not None,
|
| 122 |
"message": "API funcionando correctamente",
|
| 123 |
"model_info": "Janiopi/detector_de_instrumentos_v1",
|
| 124 |
+
"supported_instruments": ["Guitar", "Piano", "Drum"],
|
| 125 |
+
"max_duration_seconds": 15,
|
| 126 |
+
"sample_rate": 16000
|
| 127 |
}
|
| 128 |
|
| 129 |
@app.post("/detect")
|
|
|
|
| 144 |
content = await audio.read()
|
| 145 |
print(f"📏 Tamaño: {len(content)} bytes")
|
| 146 |
|
| 147 |
+
# Crear archivo temporal
|
| 148 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
temp_file.write(content)
|
| 150 |
temp_path = temp_file.name
|
| 151 |
|
| 152 |
try:
|
| 153 |
+
print("🎵 Cargando audio con librosa...")
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
# Cargar audio con librosa (maneja múltiples formatos)
|
| 156 |
+
audio_data, sample_rate = librosa.load(temp_path, sr=16000)
|
| 157 |
print(f"🔊 Audio cargado: {len(audio_data)} samples a {sample_rate}Hz")
|
| 158 |
+
print(f"⏱️ Duración: {len(audio_data)/sample_rate:.2f} segundos")
|
| 159 |
+
|
| 160 |
+
# Verificar duración mínima
|
| 161 |
+
if len(audio_data) < 1600: # Menos de 0.1 segundos
|
| 162 |
+
raise ValueError("Audio demasiado corto (mínimo 0.1 segundos)")
|
| 163 |
+
|
| 164 |
+
# Truncar a máximo 15 segundos
|
| 165 |
+
max_samples = 15 * 16000
|
| 166 |
+
if len(audio_data) > max_samples:
|
| 167 |
+
audio_data = audio_data[:max_samples]
|
| 168 |
+
print(f"🔄 Audio truncado a 15 segundos")
|
| 169 |
+
|
| 170 |
+
# Asegurar que el audio tenga el formato correcto
|
| 171 |
+
audio_data = np.array(audio_data, dtype=np.float32)
|
| 172 |
|
| 173 |
# Guardar como WAV temporal para el modelo
|
| 174 |
+
temp_wav_path = temp_path.replace('.wav', '_processed.wav')
|
|
|
|
| 175 |
sf.write(temp_wav_path, audio_data, sample_rate)
|
| 176 |
+
print(f"💾 Audio guardado como: {temp_wav_path}")
|
| 177 |
|
| 178 |
+
print("🤖 Ejecutando modelo...")
|
| 179 |
+
# Procesar con el modelo
|
| 180 |
results = classifier(temp_wav_path)
|
| 181 |
print(f"🎯 Resultados raw: {results}")
|
| 182 |
|
| 183 |
+
# Limpiar archivo WAV procesado
|
| 184 |
if os.path.exists(temp_wav_path):
|
| 185 |
os.unlink(temp_wav_path)
|
| 186 |
|
|
|
|
| 192 |
"score": round(float(result["score"]), 4)
|
| 193 |
})
|
| 194 |
|
| 195 |
+
# Ordenar por score descendente
|
| 196 |
formatted_results.sort(key=lambda x: x["score"], reverse=True)
|
| 197 |
|
| 198 |
print(f"✅ Resultados formateados: {formatted_results}")
|
|
|
|
| 201 |
"success": True,
|
| 202 |
"results": formatted_results,
|
| 203 |
"filename": audio.filename,
|
|
|
|
| 204 |
"audio_info": {
|
| 205 |
"samples": len(audio_data),
|
| 206 |
"sample_rate": sample_rate,
|
| 207 |
+
"duration_seconds": round(len(audio_data) / sample_rate, 2),
|
| 208 |
+
"processed_size_bytes": len(content)
|
| 209 |
}
|
| 210 |
}
|
| 211 |
|
|
|
|
| 220 |
print(f"❌ Error inesperado: {e}")
|
| 221 |
import traceback
|
| 222 |
traceback.print_exc()
|
| 223 |
+
|
| 224 |
+
# Mensajes de error más específicos
|
| 225 |
+
error_msg = str(e)
|
| 226 |
+
if "Unable to create tensor" in error_msg:
|
| 227 |
+
detail = "Error de formato de audio. Intenta con un archivo WAV de mejor calidad."
|
| 228 |
+
elif "too short" in error_msg.lower():
|
| 229 |
+
detail = "Audio demasiado corto. Graba al menos 1 segundo."
|
| 230 |
+
elif "padding" in error_msg:
|
| 231 |
+
detail = "Error de procesamiento de audio. Intenta con un archivo diferente."
|
| 232 |
+
else:
|
| 233 |
+
detail = f"Error procesando audio: {error_msg}"
|
| 234 |
+
|
| 235 |
+
raise HTTPException(status_code=500, detail=detail)
|
| 236 |
|
| 237 |
@app.get("/test")
|
| 238 |
async def test_endpoint():
|