Spaces:

marcosremar2
/

ufpalign

Build error

App Files Files Community

marcosremar2 commited on Jun 7, 2025

Commit

317700f

1 Parent(s): 97280ac

Fix pandas version compatibility and add comprehensive API test script

Browse files

Files changed (2) hide show

Dockerfile +5 -3
test_api.py +269 -0

Dockerfile CHANGED Viewed

@@ -8,7 +8,7 @@ ENV DEBIAN_FRONTEND=noninteractive
 ENV LC_ALL=C.UTF-8
 ENV LANG=C.UTF-8
-# Install system dependencies
 RUN apt-get update && \
     apt-get install -y \
     python3 \
@@ -21,9 +21,11 @@ RUN apt-get update && \
     curl \
     git \
     build-essential \
     && rm -rf /var/lib/apt/lists/*
-# Install Python packages
 RUN pip3 install --no-cache-dir \
     montreal-forced-aligner==2.2.17 \
     fastapi==0.104.1 \
@@ -31,7 +33,7 @@ RUN pip3 install --no-cache-dir \
     python-multipart==0.0.6 \
     pydantic==2.5.0 \
     textgrid==1.5 \
-    pandas==2.1.4 \
     numpy==1.24.3
 # Create workspace

 ENV LC_ALL=C.UTF-8
 ENV LANG=C.UTF-8
+# Install system dependencies and update CA certificates
 RUN apt-get update && \
     apt-get install -y \
     python3 \
     curl \
     git \
     build-essential \
+    ca-certificates \
+    && update-ca-certificates \
     && rm -rf /var/lib/apt/lists/*
+# Install Python packages with compatible versions for Python 3.8
 RUN pip3 install --no-cache-dir \
     montreal-forced-aligner==2.2.17 \
     fastapi==0.104.1 \
     python-multipart==0.0.6 \
     pydantic==2.5.0 \
     textgrid==1.5 \
+    pandas==2.0.3 \
     numpy==1.24.3
 # Create workspace

test_api.py ADDED Viewed

	@@ -0,0 +1,269 @@

+#!/usr/bin/env python3
+"""
+Script de teste para a API do Montreal Forced Aligner Portuguese
+Testa todos os endpoints e funcionalidades da API
+"""
+import requests
+import json
+import time
+import os
+from pathlib import Path
+import wave
+import numpy as np
+from io import BytesIO
+# Configuração da API
+API_BASE_URL = "http://localhost:7860"  # Para teste local
+# API_BASE_URL = "https://marcosremar2-ufpalign.hf.space"  # Para Hugging Face Spaces
+def create_test_audio(filename="test_audio.wav", duration=3, sample_rate=16000):
+    """
+    Cria um arquivo de áudio de teste simples (tom puro).
+    Para teste real, use um arquivo WAV com fala em português.
+    """
+    t = np.linspace(0, duration, int(sample_rate * duration), False)
+    # Tom de 440 Hz (Lá)
+    audio = np.sin(2 * np.pi * 440 * t) * 0.3
+    # Converter para 16-bit
+    audio_int16 = (audio * 32767).astype(np.int16)
+    # Salvar como WAV
+    with wave.open(filename, 'w') as wav_file:
+        wav_file.setnchannels(1)  # Mono
+        wav_file.setsampwidth(2)  # 16-bit
+        wav_file.setframerate(sample_rate)
+        wav_file.writeframes(audio_int16.tobytes())
+    print(f"✅ Arquivo de áudio de teste criado: {filename}")
+    return filename
+def test_health_check():
+    """Testa o endpoint de health check"""
+    print("\n🔍 Testando Health Check...")
+    try:
+        response = requests.get(f"{API_BASE_URL}/health", timeout=10)
+        if response.status_code == 200:
+            data = response.json()
+            print(f"✅ Health Check OK: {data}")
+            return True
+        else:
+            print(f"❌ Health Check falhou: {response.status_code}")
+            return False
+    except Exception as e:
+        print(f"❌ Erro no Health Check: {e}")
+        return False
+def test_models_list():
+    """Testa o endpoint de listagem de modelos"""
+    print("\n🔍 Testando listagem de modelos...")
+    try:
+        response = requests.get(f"{API_BASE_URL}/models", timeout=30)
+        if response.status_code == 200:
+            data = response.json()
+            print(f"✅ Modelos disponíveis:")
+            print(f"   Acústicos: {data.get('acoustic_models', [])[:3]}...")
+            print(f"   G2P: {data.get('g2p_models', [])[:3]}...")
+            return True
+        else:
+            print(f"❌ Falha ao listar modelos: {response.status_code}")
+            return False
+    except Exception as e:
+        print(f"❌ Erro ao listar modelos: {e}")
+        return False
+def test_alignment(audio_file=None, text="Esta é uma frase de teste em português"):
+    """Testa o endpoint principal de alinhamento"""
+    print("\n🔍 Testando alinhamento forçado...")
+    # Usar arquivo de áudio fornecido ou criar um de teste
+    if audio_file is None:
+        audio_file = create_test_audio()
+    if not os.path.exists(audio_file):
+        print(f"❌ Arquivo de áudio não encontrado: {audio_file}")
+        return False
+    try:
+        # Preparar dados para upload
+        files = {'audio': open(audio_file, 'rb')}
+        data = {'text': text}
+        print(f"📝 Texto: {text}")
+        print(f"🎵 Áudio: {audio_file}")
+        print("⏳ Enviando para alinhamento...")
+        # Fazer requisição
+        response = requests.post(
+            f"{API_BASE_URL}/align",
+            files=files,
+            data=data,
+            timeout=300  # 5 minutos
+        )
+        files['audio'].close()
+        if response.status_code == 200:
+            result = response.json()
+            print(f"✅ Alinhamento concluído!")
+            print(f"   Arquivo: {result['filename']}")
+            print(f"   Duração: {result['duration']:.2f}s")
+            print(f"   Tiers: {len(result['tiers'])}")
+            # Mostrar algumas informações dos tiers
+            for tier in result['tiers']:
+                print(f"   - {tier['name']}: {len(tier['intervals'])} intervalos")
+            return result
+        else:
+            error_data = response.json() if response.headers.get('content-type') == 'application/json' else response.text
+            print(f"❌ Falha no alinhamento: {response.status_code}")
+            print(f"   Erro: {error_data}")
+            return False
+    except Exception as e:
+        print(f"❌ Erro no alinhamento: {e}")
+        return False
+    finally:
+        # Limpar arquivo de teste se foi criado
+        if audio_file == "test_audio.wav" and os.path.exists(audio_file):
+            os.remove(audio_file)
+def test_download(filename):
+    """Testa o download do arquivo TextGrid"""
+    print(f"\n🔍 Testando download do TextGrid: {filename}")
+    try:
+        # Remover extensão .TextGrid se presente
+        base_filename = filename.replace('.TextGrid', '')
+        response = requests.get(f"{API_BASE_URL}/download/{base_filename}", timeout=30)
+        if response.status_code == 200:
+            print(f"✅ Download concluído!")
+            print(f"   Tamanho: {len(response.content)} bytes")
+            print(f"   Tipo: {response.headers.get('content-type', 'unknown')}")
+            # Salvar arquivo localmente para verificação
+            download_filename = f"downloaded_{base_filename}.TextGrid"
+            with open(download_filename, 'w', encoding='utf-8') as f:
+                f.write(response.text)
+            print(f"   Salvo como: {download_filename}")
+            return True
+        else:
+            print(f"❌ Falha no download: {response.status_code}")
+            return False
+    except Exception as e:
+        print(f"❌ Erro no download: {e}")
+        return False
+def test_web_interface():
+    """Testa se a interface web está acessível"""
+    print("\n🔍 Testando interface web...")
+    try:
+        response = requests.get(API_BASE_URL, timeout=10)
+        if response.status_code == 200:
+            if "MFA Portuguese Alignment" in response.text:
+                print("✅ Interface web acessível e funcionando")
+                return True
+            else:
+                print("⚠️ Interface web acessível mas conteúdo inesperado")
+                return False
+        else:
+            print(f"❌ Interface web inacessível: {response.status_code}")
+            return False
+    except Exception as e:
+        print(f"❌ Erro na interface web: {e}")
+        return False
+def run_full_test(audio_file=None, text=None):
+    """Executa todos os testes da API"""
+    print("🚀 Iniciando testes completos da API MFA Portuguese")
+    print(f"🌐 URL base: {API_BASE_URL}")
+    print("="*60)
+    results = {}
+    # Texto padrão em português
+    if text is None:
+        text = "Olá, este é um teste de alinhamento forçado para português brasileiro."
+    # 1. Health Check
+    results['health'] = test_health_check()
+    # 2. Interface Web
+    results['web'] = test_web_interface()
+    # 3. Listar Modelos
+    results['models'] = test_models_list()
+    # 4. Alinhamento Principal
+    alignment_result = test_alignment(audio_file, text)
+    results['alignment'] = alignment_result is not False
+    # 5. Download (se alinhamento foi bem-sucedido)
+    if alignment_result and isinstance(alignment_result, dict):
+        results['download'] = test_download(alignment_result['filename'])
+    else:
+        results['download'] = False
+        print("\n⏭️ Pulando teste de download (alinhamento falhou)")
+    # Relatório final
+    print("\n" + "="*60)
+    print("📊 RELATÓRIO FINAL DOS TESTES")
+    print("="*60)
+    total_tests = len(results)
+    passed_tests = sum(1 for result in results.values() if result)
+    for test_name, result in results.items():
+        status = "✅ PASSOU" if result else "❌ FALHOU"
+        print(f"{test_name.upper():12} | {status}")
+    print("-"*60)
+    print(f"RESUMO: {passed_tests}/{total_tests} testes passaram")
+    if passed_tests == total_tests:
+        print("🎉 Todos os testes passaram! API funcionando perfeitamente.")
+        return True
+    else:
+        print("⚠️ Alguns testes falharam. Verifique os logs acima.")
+        return False
+def test_with_real_audio():
+    """
+    Exemplo de como testar com áudio real.
+    Substitua pelos seus próprios arquivos.
+    """
+    # Exemplo de uso com arquivo real
+    audio_file = "exemplo.wav"  # Substitua pelo seu arquivo
+    text = "Transcrição exata do que está sendo falado no áudio"
+    if os.path.exists(audio_file):
+        print(f"\n🎯 Testando com áudio real: {audio_file}")
+        return run_full_test(audio_file, text)
+    else:
+        print(f"⚠️ Arquivo {audio_file} não encontrado. Usando áudio de teste.")
+        return run_full_test()
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Testa a API do MFA Portuguese")
+    parser.add_argument("--url", default=API_BASE_URL, help="URL base da API")
+    parser.add_argument("--audio", help="Arquivo de áudio para teste")
+    parser.add_argument("--text", help="Texto para alinhamento")
+    parser.add_argument("--real", action="store_true", help="Tenta usar áudio real")
+    args = parser.parse_args()
+    # Atualizar URL se fornecida
+    API_BASE_URL = args.url
+    if args.real:
+        success = test_with_real_audio()
+    else:
+        success = run_full_test(args.audio, args.text)
+    exit(0 if success else 1)