File size: 933 Bytes
f187c31
125b07b
0b7b535
125b07b
d17c108
8cb66fd
 
d17c108
 
 
 
292dcba
eab1485
733fb2c
b399523
eab1485
5807e15
9bac561
d17c108
9bac561
d17c108
 
 
733fb2c
d17c108
733fb2c
 
eab1485
d17c108
733fb2c
9ac41e6
d17c108
2d07114
125b07b
d17c108
79672fe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
FROM python:3.11-slim

WORKDIR /app

# Prevent Python from buffering stdout/stderr
ENV PYTHONUNBUFFERED=1

# Set HuggingFace cache to persistent storage
ENV HF_HOME=/data/.huggingface
ENV HF_HUB_CACHE=/data/.huggingface/hub

# Install system dependencies
RUN apt-get update && apt-get install -y \
    ffmpeg \
    libopenblas0 \
    && rm -rf /var/lib/apt/lists/*

# Install AVX512 + OpenBLAS optimized llama-cpp-python wheel (MAXIMUM SPEED!)
RUN pip install --no-cache-dir \
    https://huggingface.co/datasets/AIencoder/llama-cpp-wheels/resolve/main/llama_cpp_python-0.3.16%2Bcpuavx512_openblas-cp311-cp311-manylinux_2_31_x86_64.whl \
    || pip install --no-cache-dir llama-cpp-python

# Install Python dependencies
RUN pip install --no-cache-dir \
    gradio>=5.0.0 \
    faster-whisper \
    huggingface_hub

# Copy application
COPY app.py /app/app.py

# Expose port
EXPOSE 7860

# Run the app directly
CMD ["python", "app.py"]