""" Gradio UI para Keyword Spotting API v2.0 Usa el endpoint HTTP /predict para analizar audio y detectar keywords. """ import os import httpx import gradio as gr from dotenv import load_dotenv load_dotenv() # ============================================================================ # CONFIGURACIÓN # ============================================================================ API_URL = os.getenv("API_URL", "http://localhost:8000") API_KEY = os.getenv("API_KEY", "") DEFAULT_KEYWORDS = "sí, no, quizás, imposible, hola, adiós, gracias, por favor" # Autenticación (opcional) GRADIO_USERNAME = os.getenv("GRADIO_USERNAME") GRADIO_PASSWORD = os.getenv("GRADIO_PASSWORD") # ============================================================================ # ESTILOS CSS # ============================================================================ CSS = """ /* Contenedor principal */ .gradio-container { max-width: 900px !important; margin: 0 auto !important; font-family: 'Segoe UI', system-ui, sans-serif !important; } /* Header */ .header-container { text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 12px; margin-bottom: 20px; color: white; } .header-container h1 { margin: 0; font-size: 2em; } .header-container p { margin: 10px 0 0 0; opacity: 0.9; } /* Resultado principal */ .result-box { padding: 24px; border-radius: 12px; background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%); color: white; text-align: center; margin: 16px 0; } .result-box.error { background: linear-gradient(135deg, #cb2d3e 0%, #ef473a 100%); } .result-word { font-size: 2.5em; font-weight: bold; margin: 0; text-transform: uppercase; letter-spacing: 2px; } .result-confidence { font-size: 1.2em; margin-top: 8px; opacity: 0.9; } /* Transcripción */ .transcription-box { padding: 16px; background: rgba(102, 126, 234, 0.15); border-left: 4px solid #667eea; border-radius: 0 8px 8px 0; margin: 16px 0; } .transcription-label { font-size: 0.85em; color: #a0a0a0; margin-bottom: 4px; } .transcription-text { font-size: 1.2em; color: #ffffff; font-style: italic; } /* Alternativas */ .alternatives-container { margin-top: 16px; } .alternatives-container > p { color: #ffffff !important; } .alternative-item { display: flex; align-items: center; padding: 12px 16px; background: rgba(255, 255, 255, 0.1); border-radius: 8px; margin-bottom: 8px; border: 1px solid rgba(255, 255, 255, 0.2); } .alternative-keyword { font-weight: 600; min-width: 120px; color: #ffffff; } .alternative-bar { flex: 1; height: 24px; background: #e9ecef; border-radius: 12px; overflow: hidden; margin: 0 12px; } .alternative-fill { height: 100%; background: linear-gradient(90deg, #667eea, #764ba2); border-radius: 12px; transition: width 0.3s ease; } .alternative-score { font-weight: 600; min-width: 60px; text-align: right; color: #a78bfa; } /* Botón */ .primary-btn { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; border: none !important; font-size: 1.1em !important; padding: 12px 32px !important; } .primary-btn:hover { opacity: 0.9; transform: translateY(-1px); } """ # ============================================================================ # FUNCIONES # ============================================================================ def format_result_html(result: dict) -> str: """Formatea el resultado en HTML bonito.""" if not result.get("success", False): return f"""

Error

{result.get('message', 'Error desconocido')}

""" word = result.get("word_detected", "—") confidence = result.get("confidence", 0) transcription = result.get("transcription", "") alternatives = result.get("alternatives", []) # Buscar palabras con el mismo score máximo (100% o igual al principal) top_words = [word] remaining_alternatives = [] for alt in alternatives: alt_score = alt.get("score", 0) # Si tiene el mismo score que el principal (con tolerancia de 0.01) if abs(alt_score - confidence) < 0.01: top_words.append(alt.get("keyword", "")) else: remaining_alternatives.append(alt) # Formatear palabras principales if len(top_words) > 1: words_display = " / ".join(top_words) else: words_display = word # Box principal html = f"""

{words_display}

Confianza: {confidence * 100:.1f}%

""" # Transcripción if transcription: html += f"""

📝 Transcripción de Whisper:

"{transcription}"

""" # Alternativas (solo las que no están en el top) if remaining_alternatives: html += '

🔄 Otras palabras detectadas:

' for alt in remaining_alternatives: keyword = alt.get("keyword", "") score = alt.get("score", 0) bar_width = score * 100 html += f"""

{keyword}

{score * 100:.1f}%

""" html += '

' return html def predict_keywords(audio, keywords_text: str) -> str: """ Envía audio al endpoint /predict y retorna HTML con resultados. """ if audio is None: return '

⚠️

Por favor, graba o sube un audio

' if not API_KEY: return '

⚠️

API_KEY no configurada. Configura la variable de entorno.

' # Parsear keywords keywords = keywords_text.strip() if keywords_text else DEFAULT_KEYWORDS try: # Gradio devuelve (sample_rate, numpy_array) o filepath según el type # Con type="filepath" devuelve la ruta del archivo audio_path = audio # Preparar request url = f"{API_URL}/predict" headers = {"x-api-key": API_KEY} with open(audio_path, "rb") as f: files = {"audio_file": (os.path.basename(audio_path), f, "audio/wav")} data = {"keywords": keywords} response = httpx.post( url, headers=headers, files=files, data=data, timeout=60.0 ) if response.status_code == 200: result = response.json() return format_result_html(result) elif response.status_code == 401: return '

🔐

API Key inválida

' else: error_detail = response.json().get("detail", response.text) return f'

Error {response.status_code}

{error_detail}

' except httpx.ConnectError: return '

🔌

No se pudo conectar al servidor. ¿Está corriendo la API?

' except Exception as e: return f'

❌

{str(e)}

' # ============================================================================ # INTERFAZ GRADIO # ============================================================================ with gr.Blocks(title="🎯 Keyword Spotting") as demo: # Inyectar CSS como HTML (compatible con todas las versiones de Gradio) gr.HTML(f"") # Header gr.HTML(""" """) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 🎤 Audio") audio_input = gr.Audio( sources=["microphone", "upload"], type="filepath", label="Graba o sube un audio" ) gr.Markdown("### 🏷️ Keywords") keywords_input = gr.Textbox( label="Palabras a detectar (separadas por coma)", placeholder="sí, no, quizás, imposible...", value=DEFAULT_KEYWORDS, lines=2 ) submit_btn = gr.Button( "🔍 Analizar Audio", variant="primary", elem_classes=["primary-btn"] ) with gr.Column(scale=1): gr.Markdown("### 📊 Resultados") result_output = gr.HTML( value='

Los resultados aparecerán aquí

' ) # Ejemplos gr.Markdown("---") gr.Markdown("### 💡 Tips") gr.Markdown(""" - **Habla claro**: Pronuncia la palabra de forma clara y pausada - **Sin ruido**: Evita ruido de fondo para mejores resultados - **Keywords**: Puedes personalizar las palabras a detectar - **Formatos**: Soporta WAV, MP3, OGG y otros formatos de audio """) # Footer gr.Markdown(""" --- Powered by Whisper AI • Transcription + Text Matching """) # Event handler submit_btn.click( fn=predict_keywords, inputs=[audio_input, keywords_input], outputs=result_output ) # También procesar al soltar audio audio_input.change( fn=predict_keywords, inputs=[audio_input, keywords_input], outputs=result_output ) # ============================================================================ # MAIN # ============================================================================ if __name__ == "__main__": # Configurar autenticación si hay contraseña auth = None if GRADIO_PASSWORD: auth = (GRADIO_USERNAME, GRADIO_PASSWORD) print(f"🔐 Autenticación habilitada. Usuario: {GRADIO_USERNAME}") else: print("⚠️ Sin autenticación. Configura GRADIO_PASSWORD para proteger la app.") demo.launch( server_name="0.0.0.0", server_port=7860, share=True, auth=auth )