Spaces:
Sleeping
Sleeping
| """ | |
| Gradio UI para Keyword Spotting API v2.0 | |
| Usa el endpoint HTTP /predict para analizar audio y detectar keywords. | |
| """ | |
| import os | |
| import httpx | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # ============================================================================ | |
| # CONFIGURACIÓN | |
| # ============================================================================ | |
| API_URL = os.getenv("API_URL", "http://localhost:8000") | |
| API_KEY = os.getenv("API_KEY", "") | |
| DEFAULT_KEYWORDS = "sí, no, quizás, imposible, hola, adiós, gracias, por favor" | |
| # Autenticación (opcional) | |
| GRADIO_USERNAME = os.getenv("GRADIO_USERNAME") | |
| GRADIO_PASSWORD = os.getenv("GRADIO_PASSWORD") | |
| # ============================================================================ | |
| # ESTILOS CSS | |
| # ============================================================================ | |
| CSS = """ | |
| /* Contenedor principal */ | |
| .gradio-container { | |
| max-width: 900px !important; | |
| margin: 0 auto !important; | |
| font-family: 'Segoe UI', system-ui, sans-serif !important; | |
| } | |
| /* Header */ | |
| .header-container { | |
| text-align: center; | |
| padding: 20px; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| border-radius: 12px; | |
| margin-bottom: 20px; | |
| color: white; | |
| } | |
| .header-container h1 { | |
| margin: 0; | |
| font-size: 2em; | |
| } | |
| .header-container p { | |
| margin: 10px 0 0 0; | |
| opacity: 0.9; | |
| } | |
| /* Resultado principal */ | |
| .result-box { | |
| padding: 24px; | |
| border-radius: 12px; | |
| background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%); | |
| color: white; | |
| text-align: center; | |
| margin: 16px 0; | |
| } | |
| .result-box.error { | |
| background: linear-gradient(135deg, #cb2d3e 0%, #ef473a 100%); | |
| } | |
| .result-word { | |
| font-size: 2.5em; | |
| font-weight: bold; | |
| margin: 0; | |
| text-transform: uppercase; | |
| letter-spacing: 2px; | |
| } | |
| .result-confidence { | |
| font-size: 1.2em; | |
| margin-top: 8px; | |
| opacity: 0.9; | |
| } | |
| /* Transcripción */ | |
| .transcription-box { | |
| padding: 16px; | |
| background: rgba(102, 126, 234, 0.15); | |
| border-left: 4px solid #667eea; | |
| border-radius: 0 8px 8px 0; | |
| margin: 16px 0; | |
| } | |
| .transcription-label { | |
| font-size: 0.85em; | |
| color: #a0a0a0; | |
| margin-bottom: 4px; | |
| } | |
| .transcription-text { | |
| font-size: 1.2em; | |
| color: #ffffff; | |
| font-style: italic; | |
| } | |
| /* Alternativas */ | |
| .alternatives-container { | |
| margin-top: 16px; | |
| } | |
| .alternatives-container > p { | |
| color: #ffffff !important; | |
| } | |
| .alternative-item { | |
| display: flex; | |
| align-items: center; | |
| padding: 12px 16px; | |
| background: rgba(255, 255, 255, 0.1); | |
| border-radius: 8px; | |
| margin-bottom: 8px; | |
| border: 1px solid rgba(255, 255, 255, 0.2); | |
| } | |
| .alternative-keyword { | |
| font-weight: 600; | |
| min-width: 120px; | |
| color: #ffffff; | |
| } | |
| .alternative-bar { | |
| flex: 1; | |
| height: 24px; | |
| background: #e9ecef; | |
| border-radius: 12px; | |
| overflow: hidden; | |
| margin: 0 12px; | |
| } | |
| .alternative-fill { | |
| height: 100%; | |
| background: linear-gradient(90deg, #667eea, #764ba2); | |
| border-radius: 12px; | |
| transition: width 0.3s ease; | |
| } | |
| .alternative-score { | |
| font-weight: 600; | |
| min-width: 60px; | |
| text-align: right; | |
| color: #a78bfa; | |
| } | |
| /* Botón */ | |
| .primary-btn { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; | |
| border: none !important; | |
| font-size: 1.1em !important; | |
| padding: 12px 32px !important; | |
| } | |
| .primary-btn:hover { | |
| opacity: 0.9; | |
| transform: translateY(-1px); | |
| } | |
| """ | |
| # ============================================================================ | |
| # FUNCIONES | |
| # ============================================================================ | |
| def format_result_html(result: dict) -> str: | |
| """Formatea el resultado en HTML bonito.""" | |
| if not result.get("success", False): | |
| return f""" | |
| <div class="result-box error"> | |
| <p class="result-word">Error</p> | |
| <p class="result-confidence">{result.get('message', 'Error desconocido')}</p> | |
| </div> | |
| """ | |
| word = result.get("word_detected", "—") | |
| confidence = result.get("confidence", 0) | |
| transcription = result.get("transcription", "") | |
| alternatives = result.get("alternatives", []) | |
| # Buscar palabras con el mismo score máximo (100% o igual al principal) | |
| top_words = [word] | |
| remaining_alternatives = [] | |
| for alt in alternatives: | |
| alt_score = alt.get("score", 0) | |
| # Si tiene el mismo score que el principal (con tolerancia de 0.01) | |
| if abs(alt_score - confidence) < 0.01: | |
| top_words.append(alt.get("keyword", "")) | |
| else: | |
| remaining_alternatives.append(alt) | |
| # Formatear palabras principales | |
| if len(top_words) > 1: | |
| words_display = " / ".join(top_words) | |
| else: | |
| words_display = word | |
| # Box principal | |
| html = f""" | |
| <div class="result-box"> | |
| <p class="result-word">{words_display}</p> | |
| <p class="result-confidence">Confianza: {confidence * 100:.1f}%</p> | |
| </div> | |
| """ | |
| # Transcripción | |
| if transcription: | |
| html += f""" | |
| <div class="transcription-box"> | |
| <div class="transcription-label">📝 Transcripción de Whisper:</div> | |
| <div class="transcription-text">"{transcription}"</div> | |
| </div> | |
| """ | |
| # Alternativas (solo las que no están en el top) | |
| if remaining_alternatives: | |
| html += '<div class="alternatives-container"><p style="font-weight: 600; margin-bottom: 12px;">🔄 Otras palabras detectadas:</p>' | |
| for alt in remaining_alternatives: | |
| keyword = alt.get("keyword", "") | |
| score = alt.get("score", 0) | |
| bar_width = score * 100 | |
| html += f""" | |
| <div class="alternative-item"> | |
| <span class="alternative-keyword">{keyword}</span> | |
| <div class="alternative-bar"> | |
| <div class="alternative-fill" style="width: {bar_width}%"></div> | |
| </div> | |
| <span class="alternative-score">{score * 100:.1f}%</span> | |
| </div> | |
| """ | |
| html += '</div>' | |
| return html | |
| def predict_keywords(audio, keywords_text: str) -> str: | |
| """ | |
| Envía audio al endpoint /predict y retorna HTML con resultados. | |
| """ | |
| if audio is None: | |
| return '<div class="result-box error"><p class="result-word">⚠️</p><p class="result-confidence">Por favor, graba o sube un audio</p></div>' | |
| if not API_KEY: | |
| return '<div class="result-box error"><p class="result-word">⚠️</p><p class="result-confidence">API_KEY no configurada. Configura la variable de entorno.</p></div>' | |
| # Parsear keywords | |
| keywords = keywords_text.strip() if keywords_text else DEFAULT_KEYWORDS | |
| try: | |
| # Gradio devuelve (sample_rate, numpy_array) o filepath según el type | |
| # Con type="filepath" devuelve la ruta del archivo | |
| audio_path = audio | |
| # Preparar request | |
| url = f"{API_URL}/predict" | |
| headers = {"x-api-key": API_KEY} | |
| with open(audio_path, "rb") as f: | |
| files = {"audio_file": (os.path.basename(audio_path), f, "audio/wav")} | |
| data = {"keywords": keywords} | |
| response = httpx.post( | |
| url, | |
| headers=headers, | |
| files=files, | |
| data=data, | |
| timeout=60.0 | |
| ) | |
| if response.status_code == 200: | |
| result = response.json() | |
| return format_result_html(result) | |
| elif response.status_code == 401: | |
| return '<div class="result-box error"><p class="result-word">🔐</p><p class="result-confidence">API Key inválida</p></div>' | |
| else: | |
| error_detail = response.json().get("detail", response.text) | |
| return f'<div class="result-box error"><p class="result-word">Error {response.status_code}</p><p class="result-confidence">{error_detail}</p></div>' | |
| except httpx.ConnectError: | |
| return '<div class="result-box error"><p class="result-word">🔌</p><p class="result-confidence">No se pudo conectar al servidor. ¿Está corriendo la API?</p></div>' | |
| except Exception as e: | |
| return f'<div class="result-box error"><p class="result-word">❌</p><p class="result-confidence">{str(e)}</p></div>' | |
| # ============================================================================ | |
| # INTERFAZ GRADIO | |
| # ============================================================================ | |
| with gr.Blocks(title="🎯 Keyword Spotting") as demo: | |
| # Inyectar CSS como HTML (compatible con todas las versiones de Gradio) | |
| gr.HTML(f"<style>{CSS}</style>") | |
| # Header | |
| gr.HTML(""" | |
| <div class="header-container"> | |
| <h1>🎯 Keyword Spotting</h1> | |
| <p>Detecta palabras clave en audio usando Whisper AI</p> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 🎤 Audio") | |
| audio_input = gr.Audio( | |
| sources=["microphone", "upload"], | |
| type="filepath", | |
| label="Graba o sube un audio" | |
| ) | |
| gr.Markdown("### 🏷️ Keywords") | |
| keywords_input = gr.Textbox( | |
| label="Palabras a detectar (separadas por coma)", | |
| placeholder="sí, no, quizás, imposible...", | |
| value=DEFAULT_KEYWORDS, | |
| lines=2 | |
| ) | |
| submit_btn = gr.Button( | |
| "🔍 Analizar Audio", | |
| variant="primary", | |
| elem_classes=["primary-btn"] | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📊 Resultados") | |
| result_output = gr.HTML( | |
| value='<div style="padding: 40px; text-align: center; color: #999;">Los resultados aparecerán aquí</div>' | |
| ) | |
| # Ejemplos | |
| gr.Markdown("---") | |
| gr.Markdown("### 💡 Tips") | |
| gr.Markdown(""" | |
| - **Habla claro**: Pronuncia la palabra de forma clara y pausada | |
| - **Sin ruido**: Evita ruido de fondo para mejores resultados | |
| - **Keywords**: Puedes personalizar las palabras a detectar | |
| - **Formatos**: Soporta WAV, MP3, OGG y otros formatos de audio | |
| """) | |
| # Footer | |
| gr.Markdown(""" | |
| --- | |
| <center style="color: #999; font-size: 0.9em;"> | |
| Powered by <b>Whisper AI</b> • Transcription + Text Matching | |
| </center> | |
| """) | |
| # Event handler | |
| submit_btn.click( | |
| fn=predict_keywords, | |
| inputs=[audio_input, keywords_input], | |
| outputs=result_output | |
| ) | |
| # También procesar al soltar audio | |
| audio_input.change( | |
| fn=predict_keywords, | |
| inputs=[audio_input, keywords_input], | |
| outputs=result_output | |
| ) | |
| # ============================================================================ | |
| # MAIN | |
| # ============================================================================ | |
| if __name__ == "__main__": | |
| # Configurar autenticación si hay contraseña | |
| auth = None | |
| if GRADIO_PASSWORD: | |
| auth = (GRADIO_USERNAME, GRADIO_PASSWORD) | |
| print(f"🔐 Autenticación habilitada. Usuario: {GRADIO_USERNAME}") | |
| else: | |
| print("⚠️ Sin autenticación. Configura GRADIO_PASSWORD para proteger la app.") | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True, | |
| auth=auth | |
| ) | |