Spaces:

marcosremar2
/

ufpalign

Build error

App Files Files Community

marcosremar2 commited on Jun 7, 2025

Commit

1e3ff26

1 Parent(s): 3722cb7

Implementar UFPAlign com segmentação de sílabas - Substituir MFA por UFPAlign para alinhamento específico do português brasileiro - Adicionar interface para mostrar início e fim de cada sílaba - Exibir informações detalhadas de fonemas, palavras e sílabas - Usar repositório oficial do UFPAlign da UFPA - Interface otimizada para visualização de segmentação silábica

Browse files

Files changed (2) hide show

Dockerfile +73 -20
app.py +162 -71

Dockerfile CHANGED Viewed

@@ -1,9 +1,53 @@
-# Montreal Forced Aligner with Portuguese Models
-FROM mmcauliffe/montreal-forced-aligner:latest
-LABEL maintainer="MFA Portuguese Alignment - Hugging Face Spaces"
-# Install additional Python packages for FastAPI (Python 3.12 compatible versions)
 RUN pip install --no-cache-dir \
     fastapi \
     uvicorn \
@@ -11,28 +55,37 @@ RUN pip install --no-cache-dir \
     pydantic \
     textgrid \
     pandas \
-    numpy
-# Create workspace
-WORKDIR /app
-# Download Portuguese models during build
-RUN mfa model download dictionary portuguese_mfa && \
-    mfa model download acoustic portuguese_mfa && \
-    mfa model download g2p portuguese_brazil_mfa
 # Copy application files
-COPY app.py /app/
-COPY README.md /app/
-# Create uploads directory
-RUN mkdir -p /app/uploads /app/output
 # Expose port
 EXPOSE 7860
-# Set environment variable for Gradio
-ENV GRADIO_SERVER_NAME="0.0.0.0"
-# Run the application
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

+# UFPAlign Brazilian Portuguese Speech Alignment Container
+FROM kaldiasr/kaldi:latest
+LABEL maintainer="UFPAlign Hugging Face Space"
+LABEL description="UFPAlign - Brazilian Portuguese Forced Phonetic Alignment Tool"
+LABEL version="1.0"
+# Set environment variables
+ENV UFPALIGN_DIR=/opt/UFPAlign
+ENV KALDI_ROOT=/opt/kaldi
+ENV LC_ALL=pt_BR.UTF-8
+ENV LANG=pt_BR.UTF-8
+ENV PYTHONPATH=/opt/UFPAlign:$PYTHONPATH
+# Update system and install dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    sudo \
+    curl \
+    wget \
+    openjdk-8-jdk \
+    locales \
+    python3-pip \
+    python3-dev \
+    python3-setuptools \
+    build-essential \
+    sox \
+    ffmpeg \
+    git \
+    unzip && \
+    # Configure locale for Portuguese (Brazil)
+    sed -i '/pt_BR.UTF-8/s/^# //g' /etc/locale.gen && \
+    locale-gen && \
+    # Upgrade pip
+    python3 -m pip install --upgrade pip && \
+    # Cleanup
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+# Create UFPAlign directory
+RUN mkdir -p ${UFPALIGN_DIR}
+# Clone UFPAlign repository
+RUN cd /opt && \
+    git clone https://github.com/falabrasil/ufpalign.git UFPAlign && \
+    cd UFPAlign && \
+    # Make the shell script executable
+    chmod +x ufpalign.sh
+# Install Python dependencies for UFPAlign and FastAPI
 RUN pip install --no-cache-dir \
     fastapi \
     uvicorn \
     pydantic \
     textgrid \
     pandas \
+    numpy \
+    scikit-learn \
+    scipy \
+    matplotlib
+# Install additional UFPAlign Python requirements if they exist
+RUN cd ${UFPALIGN_DIR} && \
+    if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
 # Copy application files
+COPY app.py /app/app.py
+# Create necessary directories
+RUN mkdir -p /app/uploads /app/output /app/logs
+# Set working directory
+WORKDIR /app
+# Make sure UFPAlign models and dictionaries are available
+RUN cd ${UFPALIGN_DIR} && \
+    # Check if demo files exist and are accessible
+    ls -la demo/ || echo "Demo directory not found" && \
+    # Ensure scripts are executable
+    find . -name "*.sh" -exec chmod +x {} \;
 # Expose port
 EXPOSE 7860
+# Health check
+HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:7860/health || exit 1
+# Start the application
+CMD ["python3", "app.py"]

app.py CHANGED Viewed

@@ -1,10 +1,10 @@
 #!/usr/bin/env python3
 """
-Montreal Forced Aligner - Portuguese Brazilian Speech Alignment
 FastAPI Application for Hugging Face Spaces
-Uses the Montreal Forced Aligner (MFA) with pre-trained Portuguese models
-for high-quality forced speech alignment.
 """
 import os
@@ -27,8 +27,8 @@ logger = logging.getLogger(__name__)
 # Initialize FastAPI app
 app = FastAPI(
-    title="MFA Portuguese Alignment",
-    description="Portuguese Brazilian speech alignment using Montreal Forced Aligner",
     version="1.0.0"
 )
@@ -38,9 +38,9 @@ OUTPUT_DIR = Path("/app/output")
 UPLOAD_DIR.mkdir(exist_ok=True)
 OUTPUT_DIR.mkdir(exist_ok=True)
-def run_mfa_alignment(audio_path: str, text_content: str, output_dir: str) -> tuple[bool, str, str]:
     """
-    Run Montreal Forced Aligner on the input audio and text.
     Args:
         audio_path: Path to the audio file
@@ -51,59 +51,64 @@ def run_mfa_alignment(audio_path: str, text_content: str, output_dir: str) -> tu
         Tuple of (success, textgrid_path, error_message)
     """
     try:
-        # Create temporary directory for MFA processing
         with tempfile.TemporaryDirectory() as temp_dir:
             temp_path = Path(temp_dir)
-            # Create MFA corpus structure
-            corpus_dir = temp_path / "corpus"
-            corpus_dir.mkdir()
             # Copy audio file
             audio_name = Path(audio_path).stem
-            shutil.copy2(audio_path, corpus_dir / f"{audio_name}.wav")
             # Create text file
-            text_file = corpus_dir / f"{audio_name}.txt"
             with open(text_file, 'w', encoding='utf-8') as f:
                 f.write(text_content.strip())
             # Create output directory
-            alignment_dir = temp_path / "alignment"
             alignment_dir.mkdir()
-            logger.info(f"🎯 Running MFA alignment for: {audio_name}")
             logger.info(f"📝 Text: {text_content[:100]}...")
-            # Run MFA alignment
             cmd = [
-                "mfa", "align",
-                str(corpus_dir),
-                "portuguese_mfa",  # Use pre-downloaded Portuguese dictionary model
-                "portuguese_mfa",  # Use pre-downloaded Portuguese acoustic model
-                str(alignment_dir),
-                "--clean"
             ]
-            logger.info(f"🚀 MFA Command: {' '.join(cmd)}")
             result = subprocess.run(
                 cmd,
                 capture_output=True,
                 text=True,
-                timeout=300  # 5 minute timeout
             )
             if result.returncode != 0:
-                error_msg = f"MFA alignment failed: {result.stderr}"
                 logger.error(error_msg)
                 return False, "", error_msg
-            # Find the generated TextGrid file
             textgrid_file = alignment_dir / f"{audio_name}.TextGrid"
             if not textgrid_file.exists():
-                error_msg = f"TextGrid file not found: {textgrid_file}"
                 logger.error(error_msg)
                 return False, "", error_msg
@@ -111,24 +116,24 @@ def run_mfa_alignment(audio_path: str, text_content: str, output_dir: str) -> tu
             output_path = Path(output_dir) / f"{audio_name}.TextGrid"
             shutil.copy2(textgrid_file, output_path)
-            logger.info(f"✅ Alignment completed: {output_path}")
             return True, str(output_path), ""
     except subprocess.TimeoutExpired:
-        return False, "", "MFA alignment timed out after 5 minutes"
     except Exception as e:
-        logger.error(f"MFA alignment error: {str(e)}")
         return False, "", f"Alignment error: {str(e)}"
-def parse_textgrid_to_json(textgrid_path: str) -> Dict:
     """
-    Parse TextGrid file and extract alignment information.
     Args:
         textgrid_path: Path to the TextGrid file
     Returns:
-        Dictionary with alignment data
     """
     try:
         # Load TextGrid
@@ -137,9 +142,13 @@ def parse_textgrid_to_json(textgrid_path: str) -> Dict:
         result = {
             "filename": Path(textgrid_path).name,
             "duration": tg.maxTime,
             "tiers": []
         }
         for tier in tg:
             tier_data = {
                 "name": tier.name,
@@ -149,12 +158,39 @@ def parse_textgrid_to_json(textgrid_path: str) -> Dict:
             if hasattr(tier, 'intervals'):
                 for interval in tier:
-                    tier_data["intervals"].append({
                         "start": round(interval.minTime, 3),
                         "end": round(interval.maxTime, 3),
                         "duration": round(interval.maxTime - interval.minTime, 3),
                         "text": interval.mark
-                    })
             result["tiers"].append(tier_data)
@@ -171,10 +207,10 @@ async def main_interface():
     <!DOCTYPE html>
     <html>
     <head>
-        <title>MFA Portuguese Alignment</title>
         <meta charset="UTF-8">
         <style>
-            body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }
             .header { text-align: center; margin-bottom: 30px; }
             .form-group { margin-bottom: 20px; }
             label { display: block; margin-bottom: 5px; font-weight: bold; }
@@ -185,20 +221,25 @@ async def main_interface():
             .info { background: #f8f9fa; padding: 15px; border-radius: 4px; margin-bottom: 20px; }
             .result { background: #d4edda; padding: 15px; border-radius: 4px; margin-top: 20px; }
             .error { background: #f8d7da; padding: 15px; border-radius: 4px; margin-top: 20px; }
         </style>
     </head>
     <body>
         <div class="header">
-            <h1>🎯 MFA Portuguese Alignment</h1>
-            <p>Montreal Forced Aligner com modelos portugueses pré-treinados</p>
         </div>
         <div class="info">
-            <h3>📋 Instruções:</h3>
             <ul>
-                <li><strong>Áudio:</strong> Arquivo WAV, preferível 16kHz mono</li>
-                <li><strong>Texto:</strong> Transcrição exata do áudio em português</li>
-                <li><strong>Resultado:</strong> Arquivo TextGrid com alinhamento fonético</li>
             </ul>
         </div>
@@ -213,7 +254,7 @@ async def main_interface():
                 <textarea id="text" name="text" placeholder="Digite aqui o texto exato que está sendo falado no áudio..." required></textarea>
             </div>
-            <button type="submit">🚀 Executar Alinhamento</button>
         </form>
         <div id="result"></div>
@@ -235,7 +276,7 @@ async def main_interface():
                 formData.append('text', text);
                 const resultDiv = document.getElementById('result');
-                resultDiv.innerHTML = '<div class="info">⏳ Processando alinhamento... Isso pode levar alguns minutos.</div>';
                 try {
                     const response = await fetch('/align', {
@@ -246,18 +287,71 @@ async def main_interface():
                     const result = await response.json();
                     if (response.ok) {
                         resultDiv.innerHTML = `
                             <div class="result">
-                                <h3>✅ Alinhamento Concluído!</h3>
                                 <p><strong>Arquivo:</strong> ${result.filename}</p>
-                                <p><strong>Duração:</strong> ${result.duration.toFixed(2)}s</p>
-                                <p><strong>Tiers encontradas:</strong> ${result.tiers.length}</p>
                                 <a href="/download/${result.filename.replace('.TextGrid', '')}" target="_blank">
                                     📥 Download TextGrid
                                 </a>
-                                <details style="margin-top: 15px;">
-                                    <summary>📊 Visualizar Dados de Alinhamento</summary>
-                                    <pre style="background: #f8f9fa; padding: 10px; overflow-x: auto;">${JSON.stringify(result, null, 2)}</pre>
                                 </details>
                             </div>
                         `;
@@ -289,7 +383,7 @@ async def align_audio(
     text: str = Form(...)
 ):
     """
-    Perform forced alignment on uploaded audio and text.
     """
     try:
         # Validate file type
@@ -309,8 +403,8 @@ async def align_audio(
         logger.info(f"📁 Arquivo salvo: {audio_path}")
         logger.info(f"📝 Texto: {text[:100]}...")
-        # Run alignment
-        success, textgrid_path, error_msg = run_mfa_alignment(
             str(audio_path),
             text,
             str(OUTPUT_DIR)
@@ -319,8 +413,8 @@ async def align_audio(
         if not success:
             raise HTTPException(status_code=500, detail=error_msg)
-        # Parse TextGrid and return results
-        result = parse_textgrid_to_json(textgrid_path)
         # Cleanup uploaded file
         try:
@@ -360,29 +454,26 @@ async def download_textgrid(filename: str):
 @app.get("/health")
 async def health_check():
     """Health check endpoint."""
-    return {"status": "healthy", "aligner": "Montreal Forced Aligner", "language": "Portuguese"}
 @app.get("/models")
-async def list_models():
-    """List available MFA models."""
     try:
-        # Check acoustic models
-        acoustic_result = subprocess.run(
-            ["mfa", "model", "list", "acoustic"],
-            capture_output=True,
-            text=True
-        )
-        # Check G2P models
-        g2p_result = subprocess.run(
-            ["mfa", "model", "list", "g2p"],
             capture_output=True,
             text=True
         )
         return {
-            "acoustic_models": acoustic_result.stdout.split('\n') if acoustic_result.returncode == 0 else [],
-            "g2p_models": g2p_result.stdout.split('\n') if g2p_result.returncode == 0 else []
         }
     except Exception as e:
         return {"error": str(e)}

 #!/usr/bin/env python3
 """
+UFPAlign - Brazilian Portuguese Speech Alignment
 FastAPI Application for Hugging Face Spaces
+Uses UFPAlign (Universidade Federal do Pará) for high-quality forced speech alignment
+specifically designed for Brazilian Portuguese, with detailed syllable information.
 """
 import os
 # Initialize FastAPI app
 app = FastAPI(
+    title="UFPAlign Portuguese Syllable Alignment",
+    description="Brazilian Portuguese speech alignment using UFPAlign with detailed syllable segmentation",
     version="1.0.0"
 )
 UPLOAD_DIR.mkdir(exist_ok=True)
 OUTPUT_DIR.mkdir(exist_ok=True)
+def run_ufpalign_alignment(audio_path: str, text_content: str, output_dir: str) -> tuple[bool, str, str]:
     """
+    Run UFPAlign on the input audio and text.
     Args:
         audio_path: Path to the audio file
         Tuple of (success, textgrid_path, error_message)
     """
     try:
+        # Create temporary directory for UFPAlign processing
         with tempfile.TemporaryDirectory() as temp_dir:
             temp_path = Path(temp_dir)
+            # Create UFPAlign input structure
+            input_dir = temp_path / "input"
+            input_dir.mkdir()
             # Copy audio file
             audio_name = Path(audio_path).stem
+            audio_input = input_dir / f"{audio_name}.wav"
+            shutil.copy2(audio_path, audio_input)
             # Create text file
+            text_file = input_dir / f"{audio_name}.txt"
             with open(text_file, 'w', encoding='utf-8') as f:
                 f.write(text_content.strip())
             # Create output directory
+            alignment_dir = temp_path / "output"
             alignment_dir.mkdir()
+            logger.info(f"🎯 Running UFPAlign for: {audio_name}")
             logger.info(f"📝 Text: {text_content[:100]}...")
+            # Run UFPAlign using the shell script
             cmd = [
+                "/opt/UFPAlign/ufpalign.sh",
+                str(audio_input),
+                str(text_file),
+                str(alignment_dir)
             ]
+            logger.info(f"🚀 UFPAlign Command: {' '.join(cmd)}")
             result = subprocess.run(
                 cmd,
                 capture_output=True,
                 text=True,
+                timeout=600,  # 10 minute timeout
+                cwd="/opt/UFPAlign"
             )
             if result.returncode != 0:
+                error_msg = f"UFPAlign failed: {result.stderr}\nSTDOUT: {result.stdout}"
                 logger.error(error_msg)
                 return False, "", error_msg
+            # Look for the generated TextGrid file
             textgrid_file = alignment_dir / f"{audio_name}.TextGrid"
+            # UFPAlign might create files with different names, so let's search
+            textgrid_files = list(alignment_dir.glob("*.TextGrid"))
+            if textgrid_files:
+                textgrid_file = textgrid_files[0]
             if not textgrid_file.exists():
+                error_msg = f"TextGrid file not found in: {alignment_dir}. Available files: {list(alignment_dir.iterdir())}"
                 logger.error(error_msg)
                 return False, "", error_msg
             output_path = Path(output_dir) / f"{audio_name}.TextGrid"
             shutil.copy2(textgrid_file, output_path)
+            logger.info(f"✅ UFPAlign completed: {output_path}")
             return True, str(output_path), ""
     except subprocess.TimeoutExpired:
+        return False, "", "UFPAlign timed out after 10 minutes"
     except Exception as e:
+        logger.error(f"UFPAlign error: {str(e)}")
         return False, "", f"Alignment error: {str(e)}"
+def parse_textgrid_to_syllable_info(textgrid_path: str) -> Dict:
     """
+    Parse TextGrid file and extract detailed syllable information from UFPAlign.
     Args:
         textgrid_path: Path to the TextGrid file
     Returns:
+        Dictionary with detailed syllable alignment data
     """
     try:
         # Load TextGrid
         result = {
             "filename": Path(textgrid_path).name,
             "duration": tg.maxTime,
+            "syllables": [],
+            "phonemes": [],
+            "words": [],
             "tiers": []
         }
+        # Process each tier
         for tier in tg:
             tier_data = {
                 "name": tier.name,
             if hasattr(tier, 'intervals'):
                 for interval in tier:
+                    interval_data = {
                         "start": round(interval.minTime, 3),
                         "end": round(interval.maxTime, 3),
                         "duration": round(interval.maxTime - interval.minTime, 3),
                         "text": interval.mark
+                    }
+                    tier_data["intervals"].append(interval_data)
+                    # Categorize based on tier name
+                    if "sil" in tier.name.lower() or "syllable" in tier.name.lower():
+                        if interval.mark.strip() and interval.mark.strip() != "":
+                            result["syllables"].append({
+                                "syllable": interval.mark,
+                                "start_time": round(interval.minTime, 3),
+                                "end_time": round(interval.maxTime, 3),
+                                "duration": round(interval.maxTime - interval.minTime, 3)
+                            })
+                    elif "phone" in tier.name.lower() or "fone" in tier.name.lower():
+                        if interval.mark.strip() and interval.mark.strip() != "":
+                            result["phonemes"].append({
+                                "phoneme": interval.mark,
+                                "start_time": round(interval.minTime, 3),
+                                "end_time": round(interval.maxTime, 3),
+                                "duration": round(interval.maxTime - interval.minTime, 3)
+                            })
+                    elif "word" in tier.name.lower() or "palavra" in tier.name.lower():
+                        if interval.mark.strip() and interval.mark.strip() != "":
+                            result["words"].append({
+                                "word": interval.mark,
+                                "start_time": round(interval.minTime, 3),
+                                "end_time": round(interval.maxTime, 3),
+                                "duration": round(interval.maxTime - interval.minTime, 3)
+                            })
             result["tiers"].append(tier_data)
     <!DOCTYPE html>
     <html>
     <head>
+        <title>UFPAlign - Segmentação de Sílabas</title>
         <meta charset="UTF-8">
         <style>
+            body { font-family: Arial, sans-serif; max-width: 1000px; margin: 0 auto; padding: 20px; }
             .header { text-align: center; margin-bottom: 30px; }
             .form-group { margin-bottom: 20px; }
             label { display: block; margin-bottom: 5px; font-weight: bold; }
             .info { background: #f8f9fa; padding: 15px; border-radius: 4px; margin-bottom: 20px; }
             .result { background: #d4edda; padding: 15px; border-radius: 4px; margin-top: 20px; }
             .error { background: #f8d7da; padding: 15px; border-radius: 4px; margin-top: 20px; }
+            .syllable-item { background: #e7f3ff; padding: 10px; margin: 5px 0; border-radius: 4px; border-left: 4px solid #007bff; }
+            .syllable-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 10px; margin-top: 15px; }
+            .tier-section { margin-bottom: 20px; }
+            .tier-title { font-weight: bold; color: #333; margin-bottom: 10px; }
         </style>
     </head>
     <body>
         <div class="header">
+            <h1>🎯 UFPAlign - Segmentação de Sílabas</h1>
+            <p>Alinhamento fonético brasileiro com informações detalhadas de sílabas</p>
         </div>
         <div class="info">
+            <h3>📋 Sobre o UFPAlign:</h3>
             <ul>
+                <li><strong>Desenvolvido pela UFPA:</strong> Especificamente para português brasileiro</li>
+                <li><strong>Sílabas:</strong> Mostra início e fim de cada sílaba com precisão</li>
+                <li><strong>Multi-camadas:</strong> Fonemas, sílabas, palavras e transcrições</li>
+                <li><strong>Áudio:</strong> Arquivo WAV, preferencialmente 16kHz mono</li>
             </ul>
         </div>
                 <textarea id="text" name="text" placeholder="Digite aqui o texto exato que está sendo falado no áudio..." required></textarea>
             </div>
+            <button type="submit">🚀 Executar Segmentação de Sílabas</button>
         </form>
         <div id="result"></div>
                 formData.append('text', text);
                 const resultDiv = document.getElementById('result');
+                resultDiv.innerHTML = '<div class="info">⏳ Processando com UFPAlign... Isso pode levar alguns minutos.</div>';
                 try {
                     const response = await fetch('/align', {
                     const result = await response.json();
                     if (response.ok) {
+                        let syllableHtml = '';
+                        if (result.syllables && result.syllables.length > 0) {
+                            syllableHtml = '<div class="tier-section"><div class="tier-title">🔤 Sílabas Identificadas:</div><div class="syllable-grid">';
+                            result.syllables.forEach((syl, index) => {
+                                syllableHtml += `
+                                    <div class="syllable-item">
+                                        <strong>Sílaba ${index + 1}:</strong> "${syl.syllable}"<br>
+                                        <strong>Início:</strong> ${syl.start_time}s<br>
+                                        <strong>Fim:</strong> ${syl.end_time}s<br>
+                                        <strong>Duração:</strong> ${syl.duration}s
+                                    </div>
+                                `;
+                            });
+                            syllableHtml += '</div></div>';
+                        }
+                        let phonemeHtml = '';
+                        if (result.phonemes && result.phonemes.length > 0) {
+                            phonemeHtml = '<div class="tier-section"><div class="tier-title">🔊 Fonemas:</div><div class="syllable-grid">';
+                            result.phonemes.forEach((ph, index) => {
+                                phonemeHtml += `
+                                    <div class="syllable-item" style="background: #fff3cd;">
+                                        <strong>Fonema ${index + 1}:</strong> "${ph.phoneme}"<br>
+                                        <strong>Início:</strong> ${ph.start_time}s<br>
+                                        <strong>Fim:</strong> ${ph.end_time}s<br>
+                                        <strong>Duração:</strong> ${ph.duration}s
+                                    </div>
+                                `;
+                            });
+                            phonemeHtml += '</div></div>';
+                        }
+                        let wordHtml = '';
+                        if (result.words && result.words.length > 0) {
+                            wordHtml = '<div class="tier-section"><div class="tier-title">📝 Palavras:</div><div class="syllable-grid">';
+                            result.words.forEach((word, index) => {
+                                wordHtml += `
+                                    <div class="syllable-item" style="background: #d1ecf1;">
+                                        <strong>Palavra ${index + 1}:</strong> "${word.word}"<br>
+                                        <strong>Início:</strong> ${word.start_time}s<br>
+                                        <strong>Fim:</strong> ${word.end_time}s<br>
+                                        <strong>Duração:</strong> ${word.duration}s
+                                    </div>
+                                `;
+                            });
+                            wordHtml += '</div></div>';
+                        }
                         resultDiv.innerHTML = `
                             <div class="result">
+                                <h3>✅ Segmentação Concluída com UFPAlign!</h3>
                                 <p><strong>Arquivo:</strong> ${result.filename}</p>
+                                <p><strong>Duração Total:</strong> ${result.duration.toFixed(2)}s</p>
+                                <p><strong>Camadas encontradas:</strong> ${result.tiers.length}</p>
                                 <a href="/download/${result.filename.replace('.TextGrid', '')}" target="_blank">
                                     📥 Download TextGrid
                                 </a>
+                                ${syllableHtml}
+                                ${wordHtml}
+                                ${phonemeHtml}
+                                <details style="margin-top: 20px;">
+                                    <summary>📊 Dados Completos de Alinhamento</summary>
+                                    <pre style="background: #f8f9fa; padding: 10px; overflow-x: auto; max-height: 400px;">${JSON.stringify(result, null, 2)}</pre>
                                 </details>
                             </div>
                         `;
     text: str = Form(...)
 ):
     """
+    Perform forced alignment on uploaded audio and text using UFPAlign.
     """
     try:
         # Validate file type
         logger.info(f"📁 Arquivo salvo: {audio_path}")
         logger.info(f"📝 Texto: {text[:100]}...")
+        # Run UFPAlign alignment
+        success, textgrid_path, error_msg = run_ufpalign_alignment(
             str(audio_path),
             text,
             str(OUTPUT_DIR)
         if not success:
             raise HTTPException(status_code=500, detail=error_msg)
+        # Parse TextGrid and return results with syllable information
+        result = parse_textgrid_to_syllable_info(textgrid_path)
         # Cleanup uploaded file
         try:
 @app.get("/health")
 async def health_check():
     """Health check endpoint."""
+    return {"status": "healthy", "aligner": "UFPAlign", "language": "Brazilian Portuguese"}
 @app.get("/models")
+async def list_ufpalign_info():
+    """List UFPAlign information."""
     try:
+        # Check if UFPAlign is available
+        result = subprocess.run(
+            ["ls", "/opt/UFPAlign/"],
             capture_output=True,
             text=True
         )
         return {
+            "aligner": "UFPAlign",
+            "version": "Kaldi-based",
+            "language": "Brazilian Portuguese",
+            "features": ["Syllable segmentation", "Phoneme alignment", "Word boundaries"],
+            "available": result.returncode == 0,
+            "ufpalign_files": result.stdout.split('\n') if result.returncode == 0 else []
         }
     except Exception as e:
         return {"error": str(e)}