ufpalign / Dockerfile.ufpalign
marcosremar2's picture
Add UFPAlign Docker implementation - Fix MFA Portuguese model names in original Dockerfile - Add comprehensive UFPAlign Docker setup with Kaldi toolkit - Include Docker Compose configuration for multi-container deployment - Add Makefile with convenient commands for container management - Support for interactive shell, API service, and batch processing - Complete documentation for UFPAlign Brazilian Portuguese alignment
a80c875
FROM kaldiasr/kaldi:latest
LABEL maintainer="UFPAlign Docker Implementation"
LABEL description="UFPAlign - Brazilian Portuguese Forced Phonetic Alignment Tool"
LABEL version="1.0"
# Set environment variables
ENV UFPALIGN_DIR=/opt/UFPAlign
ENV KALDI_ROOT=/opt/kaldi
ENV LC_ALL=pt_BR.UTF-8
ENV LANG=pt_BR.UTF-8
ENV PYTHONPATH=/opt/UFPAlign:$PYTHONPATH
# Update system and install dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
sudo \
curl \
wget \
openjdk-8-jdk \
locales \
python3-pip \
python3-dev \
python3-setuptools \
build-essential \
sox \
ffmpeg \
git && \
# Configure locale for Portuguese (Brazil)
sed -i '/pt_BR.UTF-8/s/^# //g' /etc/locale.gen && \
locale-gen && \
# Upgrade pip
pip3 install --upgrade pip && \
# Create UFPAlign directory
mkdir -p $UFPALIGN_DIR && \
# Clean up apt cache
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install Python dependencies
RUN pip3 install --no-cache-dir \
gdown==5.0.0 \
numpy==1.26.3 \
pandas==2.2.2 \
TextGrid==1.5 \
Unidecode==1.3.8 \
fastapi \
uvicorn \
python-multipart \
pydantic \
aiofiles
# Clone UFPAlign repository
RUN cd /opt && \
git clone https://github.com/falabrasil/ufpalign.git UFPAlign && \
cd UFPAlign && \
chmod +x ufpalign.sh
# Set working directory
WORKDIR $UFPALIGN_DIR
# Copy application files if they exist in build context
COPY app.py /opt/UFPAlign/ 2>/dev/null || true
COPY requirements.txt /opt/UFPAlign/ 2>/dev/null || true
# Create necessary directories
RUN mkdir -p \
$UFPALIGN_DIR/uploads \
$UFPALIGN_DIR/output \
$UFPALIGN_DIR/textgrid \
/root/logs
# Download models during build (optional - can be done at runtime)
# RUN utils/download_model.sh "data" $UFPALIGN_DIR && \
# utils/download_model.sh "mono" $UFPALIGN_DIR
# Make sure Kaldi is properly linked
RUN if [ -d "/opt/kaldi" ]; then \
ln -sf /opt/kaldi/egs/wsj/s5/steps $UFPALIGN_DIR/steps; \
ln -sf /opt/kaldi/egs/wsj/s5/utils $UFPALIGN_DIR/utils_kaldi; \
ln -sf /opt/kaldi/egs/wsj/s5/path.sh $UFPALIGN_DIR/path.sh; \
fi
# Set proper permissions
RUN chown -R root:root $UFPALIGN_DIR && \
chmod -R 755 $UFPALIGN_DIR
# Expose port for API if needed
EXPOSE 7860
# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
CMD [ -f "$UFPALIGN_DIR/ufpalign.sh" ] && echo "UFPAlign is ready" || exit 1
# Default command
CMD ["/bin/bash"]